Blame view
security/commoncap.c
43 KB
2874c5fd2
|
1 |
// SPDX-License-Identifier: GPL-2.0-or-later |
3e1c2515a
|
2 |
/* Common capabilities, needed by capability.o. |
1da177e4c
|
3 |
*/ |
c59ede7b7
|
4 |
#include <linux/capability.h> |
3fc689e96
|
5 |
#include <linux/audit.h> |
1da177e4c
|
6 7 |
#include <linux/init.h> #include <linux/kernel.h> |
b1d9e6b06
|
8 |
#include <linux/lsm_hooks.h> |
1da177e4c
|
9 10 11 12 13 |
#include <linux/file.h> #include <linux/mm.h> #include <linux/mman.h> #include <linux/pagemap.h> #include <linux/swap.h> |
1da177e4c
|
14 15 16 17 18 |
#include <linux/skbuff.h> #include <linux/netlink.h> #include <linux/ptrace.h> #include <linux/xattr.h> #include <linux/hugetlb.h> |
b53767719
|
19 |
#include <linux/mount.h> |
b460cbc58
|
20 |
#include <linux/sched.h> |
3898b1b4e
|
21 22 |
#include <linux/prctl.h> #include <linux/securebits.h> |
3486740a4
|
23 |
#include <linux/user_namespace.h> |
404015308
|
24 |
#include <linux/binfmts.h> |
51b79bee6
|
25 |
#include <linux/personality.h> |
7bc23abcb
|
26 |
#include <linux/mnt_idmapping.h> |
72c2d5823
|
27 |
|
b5f22a59c
|
28 29 30 31 32 33 34 35 36 37 38 |
/* * If a non-root user executes a setuid-root binary in * !secure(SECURE_NOROOT) mode, then we raise capabilities. * However if fE is also set, then the intent is for only * the file capabilities to be applied, and the setuid-root * bit is left on either to change the uid (plausible) or * to get full privilege on a kernel without file capabilities * support. So in that case we do not raise capabilities. * * Warn if that happens, once per boot. */ |
d7627467b
|
39 |
static void warn_setuid_and_fcaps_mixed(const char *fname) |
b5f22a59c
|
40 41 42 43 44 45 46 47 48 49 |
{ static int warned; if (!warned) { printk(KERN_INFO "warning: `%s' has both setuid-root and" " effective capabilities. Therefore not raising all" " capabilities. ", fname); warned = 1; } } |
1d045980e
|
50 51 |
/** * cap_capable - Determine whether a task has a particular effective capability |
3699c53c4
|
52 |
* @cred: The credentials to use |
049ae601f
|
53 |
* @targ_ns: The user namespace in which we need the capability |
1d045980e
|
54 |
* @cap: The capability to check for |
e88ed488a
|
55 |
* @opts: Bitmask of options defined in include/linux/security.h |
1d045980e
|
56 57 58 59 |
* * Determine whether the nominated task has the specified capability amongst * its effective set, returning 0 if it does, -ve if it does not. * |
3699c53c4
|
60 61 62 63 |
* NOTE WELL: cap_has_capability() cannot be used like the kernel's capable() * and has_capability() functions. That is, it has the reverse semantics: * cap_has_capability() returns 0 when a task has a capability, but the * kernel's capable() and has_capability() returns 1 for this case. |
a6dbb1ef2
|
64 |
*/ |
6a9de4911
|
65 |
int cap_capable(const struct cred *cred, struct user_namespace *targ_ns, |
c1a85a00e
|
66 |
int cap, unsigned int opts) |
1da177e4c
|
67 |
{ |
520d9eabc
|
68 |
struct user_namespace *ns = targ_ns; |
3486740a4
|
69 |
|
520d9eabc
|
70 71 72 73 74 |
/* See if cred has the capability in the target user namespace * by examining the target user namespace and all of the target * user namespace's parents. */ for (;;) { |
3486740a4
|
75 |
/* Do we have the necessary capabilities? */ |
520d9eabc
|
76 |
if (ns == cred->user_ns) |
3486740a4
|
77 |
return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM; |
64db4c7f4
|
78 79 80 81 82 |
/* * If we're already at a lower level than we're looking for, * we're done searching. */ if (ns->level <= cred->user_ns->level) |
3486740a4
|
83 |
return -EPERM; |
520d9eabc
|
84 85 86 87 88 89 |
/* * The owner of the user namespace in the parent of the * user namespace has all caps. */ if ((ns->parent == cred->user_ns) && uid_eq(ns->owner, cred->euid)) return 0; |
3486740a4
|
90 |
/* |
520d9eabc
|
91 |
* If you have a capability in a parent user ns, then you have |
3486740a4
|
92 93 |
* it over all children user namespaces as well. */ |
520d9eabc
|
94 |
ns = ns->parent; |
3486740a4
|
95 96 97 |
} /* We never get here */ |
1da177e4c
|
98 |
} |
1d045980e
|
99 100 101 102 103 104 105 106 |
/** * cap_settime - Determine whether the current process may set the system clock * @ts: The time to set * @tz: The timezone to set * * Determine whether the current process may set the system clock and timezone * information, returning 0 if permission granted, -ve if denied. */ |
457db29bf
|
107 |
int cap_settime(const struct timespec64 *ts, const struct timezone *tz) |
1da177e4c
|
108 109 110 111 112 |
{ if (!capable(CAP_SYS_TIME)) return -EPERM; return 0; } |
1d045980e
|
113 |
/** |
9e48858f7
|
114 |
* cap_ptrace_access_check - Determine whether the current process may access |
1d045980e
|
115 116 117 118 |
* another * @child: The process to be accessed * @mode: The mode of attachment. * |
8409cca70
|
119 120 121 122 123 124 |
* If we are in the same or an ancestor user_ns and have all the target * task's capabilities, then ptrace access is allowed. * If we have the ptrace capability to the target user_ns, then ptrace * access is allowed. * Else denied. * |
1d045980e
|
125 126 127 |
* Determine whether a process may access another, returning 0 if permission * granted, -ve if denied. */ |
9e48858f7
|
128 |
int cap_ptrace_access_check(struct task_struct *child, unsigned int mode) |
1da177e4c
|
129 |
{ |
c69e8d9c0
|
130 |
int ret = 0; |
8409cca70
|
131 |
const struct cred *cred, *child_cred; |
caaee6234
|
132 |
const kernel_cap_t *caller_caps; |
c69e8d9c0
|
133 134 |
rcu_read_lock(); |
8409cca70
|
135 136 |
cred = current_cred(); child_cred = __task_cred(child); |
caaee6234
|
137 138 139 140 |
if (mode & PTRACE_MODE_FSCREDS) caller_caps = &cred->cap_effective; else caller_caps = &cred->cap_permitted; |
c4a4d6037
|
141 |
if (cred->user_ns == child_cred->user_ns && |
caaee6234
|
142 |
cap_issubset(child_cred->cap_permitted, *caller_caps)) |
8409cca70
|
143 |
goto out; |
c4a4d6037
|
144 |
if (ns_capable(child_cred->user_ns, CAP_SYS_PTRACE)) |
8409cca70
|
145 146 147 |
goto out; ret = -EPERM; out: |
c69e8d9c0
|
148 149 |
rcu_read_unlock(); return ret; |
5cd9c58fb
|
150 |
} |
1d045980e
|
151 152 153 154 |
/** * cap_ptrace_traceme - Determine whether another process may trace the current * @parent: The task proposed to be the tracer * |
8409cca70
|
155 156 157 158 159 160 |
* If parent is in the same or an ancestor user_ns and has all current's * capabilities, then ptrace access is allowed. * If parent has the ptrace capability to current's user_ns, then ptrace * access is allowed. * Else denied. * |
1d045980e
|
161 162 163 |
* Determine whether the nominated task is permitted to trace the current * process, returning 0 if permission is granted, -ve if denied. */ |
5cd9c58fb
|
164 165 |
int cap_ptrace_traceme(struct task_struct *parent) { |
c69e8d9c0
|
166 |
int ret = 0; |
8409cca70
|
167 |
const struct cred *cred, *child_cred; |
c69e8d9c0
|
168 169 |
rcu_read_lock(); |
8409cca70
|
170 171 |
cred = __task_cred(parent); child_cred = current_cred(); |
c4a4d6037
|
172 |
if (cred->user_ns == child_cred->user_ns && |
8409cca70
|
173 174 |
cap_issubset(child_cred->cap_permitted, cred->cap_permitted)) goto out; |
c4a4d6037
|
175 |
if (has_ns_capability(parent, child_cred->user_ns, CAP_SYS_PTRACE)) |
8409cca70
|
176 177 178 |
goto out; ret = -EPERM; out: |
c69e8d9c0
|
179 180 |
rcu_read_unlock(); return ret; |
1da177e4c
|
181 |
} |
1d045980e
|
182 183 184 185 186 187 188 189 190 191 192 193 |
/** * cap_capget - Retrieve a task's capability sets * @target: The task from which to retrieve the capability sets * @effective: The place to record the effective set * @inheritable: The place to record the inheritable set * @permitted: The place to record the permitted set * * This function retrieves the capabilities of the nominated task and returns * them to the caller. */ int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted) |
1da177e4c
|
194 |
{ |
c69e8d9c0
|
195 |
const struct cred *cred; |
b6dff3ec5
|
196 |
|
1da177e4c
|
197 |
/* Derived from kernel/capability.c:sys_capget. */ |
c69e8d9c0
|
198 199 |
rcu_read_lock(); cred = __task_cred(target); |
b6dff3ec5
|
200 201 202 |
*effective = cred->cap_effective; *inheritable = cred->cap_inheritable; *permitted = cred->cap_permitted; |
c69e8d9c0
|
203 |
rcu_read_unlock(); |
1da177e4c
|
204 205 |
return 0; } |
1d045980e
|
206 207 208 209 |
/* * Determine whether the inheritable capabilities are limited to the old * permitted set. Returns 1 if they are limited, 0 if they are not. */ |
72c2d5823
|
210 211 |
static inline int cap_inh_is_capped(void) { |
1d045980e
|
212 213 214 |
/* they are so limited unless the current task has the CAP_SETPCAP * capability */ |
c4a4d6037
|
215 |
if (cap_capable(current_cred(), current_cred()->user_ns, |
c1a85a00e
|
216 |
CAP_SETPCAP, CAP_OPT_NONE) == 0) |
1d045980e
|
217 |
return 0; |
1d045980e
|
218 |
return 1; |
1209726ce
|
219 |
} |
72c2d5823
|
220 |
|
1d045980e
|
221 222 223 224 225 226 227 228 229 230 231 232 |
/** * cap_capset - Validate and apply proposed changes to current's capabilities * @new: The proposed new credentials; alterations should be made here * @old: The current task's current credentials * @effective: A pointer to the proposed new effective capabilities set * @inheritable: A pointer to the proposed new inheritable capabilities set * @permitted: A pointer to the proposed new permitted capabilities set * * This function validates and applies a proposed mass change to the current * process's capability sets. The changes are made to the proposed new * credentials, and assuming no error, will be committed by the caller of LSM. */ |
d84f4f992
|
233 234 235 236 237 |
int cap_capset(struct cred *new, const struct cred *old, const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted) |
1da177e4c
|
238 |
{ |
d84f4f992
|
239 240 241 242 |
if (cap_inh_is_capped() && !cap_issubset(*inheritable, cap_combine(old->cap_inheritable, old->cap_permitted))) |
72c2d5823
|
243 |
/* incapable of using this inheritable set */ |
1da177e4c
|
244 |
return -EPERM; |
d84f4f992
|
245 |
|
3b7391de6
|
246 |
if (!cap_issubset(*inheritable, |
d84f4f992
|
247 248 |
cap_combine(old->cap_inheritable, old->cap_bset))) |
3b7391de6
|
249 250 |
/* no new pI capabilities outside bounding set */ return -EPERM; |
1da177e4c
|
251 252 |
/* verify restrictions on target's new Permitted set */ |
d84f4f992
|
253 |
if (!cap_issubset(*permitted, old->cap_permitted)) |
1da177e4c
|
254 |
return -EPERM; |
1da177e4c
|
255 256 |
/* verify the _new_Effective_ is a subset of the _new_Permitted_ */ |
d84f4f992
|
257 |
if (!cap_issubset(*effective, *permitted)) |
1da177e4c
|
258 |
return -EPERM; |
1da177e4c
|
259 |
|
d84f4f992
|
260 261 262 |
new->cap_effective = *effective; new->cap_inheritable = *inheritable; new->cap_permitted = *permitted; |
58319057b
|
263 264 265 266 267 268 269 270 271 272 |
/* * Mask off ambient bits that are no longer both permitted and * inheritable. */ new->cap_ambient = cap_intersect(new->cap_ambient, cap_intersect(*permitted, *inheritable)); if (WARN_ON(!cap_ambient_invariant_ok(new))) return -EINVAL; |
1da177e4c
|
273 274 |
return 0; } |
1d045980e
|
275 276 277 278 279 280 |
/** * cap_inode_need_killpriv - Determine if inode change affects privileges * @dentry: The inode/dentry in being changed with change marked ATTR_KILL_PRIV * * Determine if an inode having a change applied that's marked ATTR_KILL_PRIV * affects the security markings on that inode, and if it is, should |
ab5348c9c
|
281 |
* inode_killpriv() be invoked or the change rejected. |
1d045980e
|
282 |
* |
049ae601f
|
283 |
* Return: 1 if security.capability has a value, meaning inode_killpriv() |
ab5348c9c
|
284 |
* is required, 0 otherwise, meaning inode_killpriv() is not required. |
1d045980e
|
285 |
*/ |
b53767719
|
286 287 |
int cap_inode_need_killpriv(struct dentry *dentry) { |
c6f493d63
|
288 |
struct inode *inode = d_backing_inode(dentry); |
b53767719
|
289 |
int error; |
5d6c31910
|
290 291 |
error = __vfs_getxattr(dentry, inode, XATTR_NAME_CAPS, NULL, 0); return error > 0; |
b53767719
|
292 |
} |
1d045980e
|
293 294 |
/** * cap_inode_killpriv - Erase the security markings on an inode |
71bc356f9
|
295 296 297 |
* * @mnt_userns: user namespace of the mount the inode was found from * @dentry: The inode/dentry to alter |
1d045980e
|
298 299 300 |
* * Erase the privilege-enhancing security markings on an inode. * |
71bc356f9
|
301 302 303 304 305 306 |
* If the inode has been found through an idmapped mount the user namespace of * the vfsmount must be passed through @mnt_userns. This function will then * take care to map the inode according to @mnt_userns before checking * permissions. On non-idmapped mounts or if permission checking is to be * performed on the raw inode simply passs init_user_ns. * |
049ae601f
|
307 |
* Return: 0 if successful, -ve on error. |
1d045980e
|
308 |
*/ |
71bc356f9
|
309 |
int cap_inode_killpriv(struct user_namespace *mnt_userns, struct dentry *dentry) |
b53767719
|
310 |
{ |
5d6c31910
|
311 |
int error; |
b53767719
|
312 |
|
71bc356f9
|
313 |
error = __vfs_removexattr(mnt_userns, dentry, XATTR_NAME_CAPS); |
5d6c31910
|
314 315 316 |
if (error == -EOPNOTSUPP) error = 0; return error; |
b53767719
|
317 |
} |
8db6c34f1
|
318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 |
static bool rootid_owns_currentns(kuid_t kroot) { struct user_namespace *ns; if (!uid_valid(kroot)) return false; for (ns = current_user_ns(); ; ns = ns->parent) { if (from_kuid(ns, kroot) == 0) return true; if (ns == &init_user_ns) break; } return false; } static __u32 sansflags(__u32 m) { return m & ~VFS_CAP_FLAGS_EFFECTIVE; } |
dc32b5c3e
|
339 |
static bool is_v2header(size_t size, const struct vfs_cap_data *cap) |
8db6c34f1
|
340 |
{ |
8db6c34f1
|
341 342 |
if (size != XATTR_CAPS_SZ_2) return false; |
dc32b5c3e
|
343 |
return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_2; |
8db6c34f1
|
344 |
} |
dc32b5c3e
|
345 |
static bool is_v3header(size_t size, const struct vfs_cap_data *cap) |
8db6c34f1
|
346 |
{ |
8db6c34f1
|
347 348 |
if (size != XATTR_CAPS_SZ_3) return false; |
dc32b5c3e
|
349 |
return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_3; |
8db6c34f1
|
350 351 352 353 354 355 356 357 358 359 360 361 362 |
} /* * getsecurity: We are called for security.* before any attempt to read the * xattr from the inode itself. * * This gives us a chance to read the on-disk value and convert it. If we * return -EOPNOTSUPP, then vfs_getxattr() will call the i_op handler. * * Note we are not called by vfs_getxattr_alloc(), but that is only called * by the integrity subsystem, which really wants the unconverted values - * so that's good. */ |
71bc356f9
|
363 364 |
int cap_inode_getsecurity(struct user_namespace *mnt_userns, struct inode *inode, const char *name, void **buffer, |
8db6c34f1
|
365 366 367 368 |
bool alloc) { int size, ret; kuid_t kroot; |
f2b00be48
|
369 |
u32 nsmagic, magic; |
8db6c34f1
|
370 371 372 |
uid_t root, mappedroot; char *tmpbuf = NULL; struct vfs_cap_data *cap; |
f2b00be48
|
373 |
struct vfs_ns_cap_data *nscap = NULL; |
8db6c34f1
|
374 375 376 377 378 |
struct dentry *dentry; struct user_namespace *fs_ns; if (strcmp(name, "capability") != 0) return -EOPNOTSUPP; |
355139a8d
|
379 |
dentry = d_find_any_alias(inode); |
8db6c34f1
|
380 381 382 383 |
if (!dentry) return -EINVAL; size = sizeof(struct vfs_ns_cap_data); |
71bc356f9
|
384 |
ret = (int)vfs_getxattr_alloc(mnt_userns, dentry, XATTR_NAME_CAPS, |
c7c7a1a18
|
385 |
&tmpbuf, size, GFP_NOFS); |
8db6c34f1
|
386 |
dput(dentry); |
82e5d8cc7
|
387 |
if (ret < 0 || !tmpbuf) |
8db6c34f1
|
388 389 390 391 |
return ret; fs_ns = inode->i_sb->s_user_ns; cap = (struct vfs_cap_data *) tmpbuf; |
dc32b5c3e
|
392 |
if (is_v2header((size_t) ret, cap)) { |
f2b00be48
|
393 394 395 396 397 398 399 |
root = 0; } else if (is_v3header((size_t) ret, cap)) { nscap = (struct vfs_ns_cap_data *) tmpbuf; root = le32_to_cpu(nscap->rootid); } else { size = -EINVAL; goto out_free; |
8db6c34f1
|
400 |
} |
8db6c34f1
|
401 |
kroot = make_kuid(fs_ns, root); |
71bc356f9
|
402 |
/* If this is an idmapped mount shift the kuid. */ |
38753e917
|
403 |
kroot = mapped_kuid_fs(mnt_userns, fs_ns, kroot); |
71bc356f9
|
404 |
|
8db6c34f1
|
405 406 407 408 |
/* If the root kuid maps to a valid uid in current ns, then return * this as a nscap. */ mappedroot = from_kuid(current_user_ns(), kroot); if (mappedroot != (uid_t)-1 && mappedroot != (uid_t)0) { |
f2b00be48
|
409 |
size = sizeof(struct vfs_ns_cap_data); |
8db6c34f1
|
410 |
if (alloc) { |
f2b00be48
|
411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 |
if (!nscap) { /* v2 -> v3 conversion */ nscap = kzalloc(size, GFP_ATOMIC); if (!nscap) { size = -ENOMEM; goto out_free; } nsmagic = VFS_CAP_REVISION_3; magic = le32_to_cpu(cap->magic_etc); if (magic & VFS_CAP_FLAGS_EFFECTIVE) nsmagic |= VFS_CAP_FLAGS_EFFECTIVE; memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32); nscap->magic_etc = cpu_to_le32(nsmagic); } else { /* use allocated v3 buffer */ tmpbuf = NULL; } |
8db6c34f1
|
428 |
nscap->rootid = cpu_to_le32(mappedroot); |
f2b00be48
|
429 430 431 |
*buffer = nscap; } goto out_free; |
8db6c34f1
|
432 433 434 |
} if (!rootid_owns_currentns(kroot)) { |
f2b00be48
|
435 436 |
size = -EOVERFLOW; goto out_free; |
8db6c34f1
|
437 438 439 440 441 |
} /* This comes from a parent namespace. Return as a v2 capability */ size = sizeof(struct vfs_cap_data); if (alloc) { |
f2b00be48
|
442 443 444 445 446 447 448 |
if (nscap) { /* v3 -> v2 conversion */ cap = kzalloc(size, GFP_ATOMIC); if (!cap) { size = -ENOMEM; goto out_free; } |
8db6c34f1
|
449 450 451 452 453 454 |
magic = VFS_CAP_REVISION_2; nsmagic = le32_to_cpu(nscap->magic_etc); if (nsmagic & VFS_CAP_FLAGS_EFFECTIVE) magic |= VFS_CAP_FLAGS_EFFECTIVE; memcpy(&cap->data, &nscap->data, sizeof(__le32) * 2 * VFS_CAP_U32); cap->magic_etc = cpu_to_le32(magic); |
1f5781725
|
455 |
} else { |
f2b00be48
|
456 457 |
/* use unconverted v2 */ tmpbuf = NULL; |
8db6c34f1
|
458 |
} |
f2b00be48
|
459 |
*buffer = cap; |
8db6c34f1
|
460 |
} |
f2b00be48
|
461 |
out_free: |
8db6c34f1
|
462 463 464 |
kfree(tmpbuf); return size; } |
e65ce2a50
|
465 466 467 468 469 470 471 |
/** * rootid_from_xattr - translate root uid of vfs caps * * @value: vfs caps value which may be modified by this function * @size: size of @ivalue * @task_ns: user namespace of the caller * @mnt_userns: user namespace of the mount the inode was found from |
f895d0ff4
|
472 |
* @fs_userns: user namespace of the filesystem |
e65ce2a50
|
473 474 475 476 477 478 479 |
* * If the inode has been found through an idmapped mount the user namespace of * the vfsmount must be passed through @mnt_userns. This function will then * take care to map the inode according to @mnt_userns before checking * permissions. On non-idmapped mounts or if permission checking is to be * performed on the raw inode simply passs init_user_ns. */ |
8db6c34f1
|
480 |
static kuid_t rootid_from_xattr(const void *value, size_t size, |
e65ce2a50
|
481 |
struct user_namespace *task_ns, |
f895d0ff4
|
482 483 |
struct user_namespace *mnt_userns, struct user_namespace *fs_userns) |
8db6c34f1
|
484 485 |
{ const struct vfs_ns_cap_data *nscap = value; |
e65ce2a50
|
486 |
kuid_t rootkid; |
8db6c34f1
|
487 488 489 490 |
uid_t rootid = 0; if (size == XATTR_CAPS_SZ_3) rootid = le32_to_cpu(nscap->rootid); |
e65ce2a50
|
491 |
rootkid = make_kuid(task_ns, rootid); |
f895d0ff4
|
492 |
return mapped_kuid_user(mnt_userns, fs_userns, rootkid); |
8db6c34f1
|
493 |
} |
dc32b5c3e
|
494 |
static bool validheader(size_t size, const struct vfs_cap_data *cap) |
8db6c34f1
|
495 |
{ |
dc32b5c3e
|
496 |
return is_v2header(size, cap) || is_v3header(size, cap); |
8db6c34f1
|
497 |
} |
e65ce2a50
|
498 499 500 501 502 503 504 505 |
/** * cap_convert_nscap - check vfs caps * * @mnt_userns: user namespace of the mount the inode was found from * @dentry: used to retrieve inode to check permissions on * @ivalue: vfs caps value which may be modified by this function * @size: size of @ivalue * |
8db6c34f1
|
506 507 508 |
* User requested a write of security.capability. If needed, update the * xattr to change from v2 to v3, or to fixup the v3 rootid. * |
e65ce2a50
|
509 510 511 512 513 514 |
* If the inode has been found through an idmapped mount the user namespace of * the vfsmount must be passed through @mnt_userns. This function will then * take care to map the inode according to @mnt_userns before checking * permissions. On non-idmapped mounts or if permission checking is to be * performed on the raw inode simply passs init_user_ns. * |
049ae601f
|
515 |
* Return: On success, return the new size; on error, return < 0. |
8db6c34f1
|
516 |
*/ |
e65ce2a50
|
517 518 |
int cap_convert_nscap(struct user_namespace *mnt_userns, struct dentry *dentry, const void **ivalue, size_t size) |
8db6c34f1
|
519 520 521 522 523 524 525 |
{ struct vfs_ns_cap_data *nscap; uid_t nsrootid; const struct vfs_cap_data *cap = *ivalue; __u32 magic, nsmagic; struct inode *inode = d_backing_inode(dentry); struct user_namespace *task_ns = current_user_ns(), |
3b0c2d3ea
|
526 |
*fs_ns = inode->i_sb->s_user_ns; |
8db6c34f1
|
527 528 529 530 531 |
kuid_t rootid; size_t newsize; if (!*ivalue) return -EINVAL; |
dc32b5c3e
|
532 |
if (!validheader(size, cap)) |
8db6c34f1
|
533 |
return -EINVAL; |
e65ce2a50
|
534 |
if (!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_SETFCAP)) |
8db6c34f1
|
535 |
return -EPERM; |
38753e917
|
536 |
if (size == XATTR_CAPS_SZ_2 && (mnt_userns == fs_ns)) |
8db6c34f1
|
537 538 539 |
if (ns_capable(inode->i_sb->s_user_ns, CAP_SETFCAP)) /* user is privileged, just write the v2 */ return size; |
38753e917
|
540 |
rootid = rootid_from_xattr(*ivalue, size, task_ns, mnt_userns, fs_ns); |
8db6c34f1
|
541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 |
if (!uid_valid(rootid)) return -EINVAL; nsrootid = from_kuid(fs_ns, rootid); if (nsrootid == -1) return -EINVAL; newsize = sizeof(struct vfs_ns_cap_data); nscap = kmalloc(newsize, GFP_ATOMIC); if (!nscap) return -ENOMEM; nscap->rootid = cpu_to_le32(nsrootid); nsmagic = VFS_CAP_REVISION_3; magic = le32_to_cpu(cap->magic_etc); if (magic & VFS_CAP_FLAGS_EFFECTIVE) nsmagic |= VFS_CAP_FLAGS_EFFECTIVE; nscap->magic_etc = cpu_to_le32(nsmagic); memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32); |
8db6c34f1
|
559 560 561 |
*ivalue = nscap; return newsize; } |
1d045980e
|
562 563 564 565 |
/* * Calculate the new process capability sets from the capability sets attached * to a file. */ |
c0b004413
|
566 |
static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps, |
a6f76f23d
|
567 |
struct linux_binprm *bprm, |
4d49f6710
|
568 |
bool *effective, |
fc7eadf76
|
569 |
bool *has_fcap) |
b53767719
|
570 |
{ |
a6f76f23d
|
571 |
struct cred *new = bprm->cred; |
c0b004413
|
572 573 574 575 |
unsigned i; int ret = 0; if (caps->magic_etc & VFS_CAP_FLAGS_EFFECTIVE) |
a6f76f23d
|
576 |
*effective = true; |
c0b004413
|
577 |
|
4d49f6710
|
578 |
if (caps->magic_etc & VFS_CAP_REVISION_MASK) |
fc7eadf76
|
579 |
*has_fcap = true; |
4d49f6710
|
580 |
|
c0b004413
|
581 582 583 584 585 586 |
CAP_FOR_EACH_U32(i) { __u32 permitted = caps->permitted.cap[i]; __u32 inheritable = caps->inheritable.cap[i]; /* * pP' = (X & fP) | (pI & fI) |
58319057b
|
587 |
* The addition of pA' is handled later. |
c0b004413
|
588 |
*/ |
a6f76f23d
|
589 590 591 |
new->cap_permitted.cap[i] = (new->cap_bset.cap[i] & permitted) | (new->cap_inheritable.cap[i] & inheritable); |
c0b004413
|
592 |
|
a6f76f23d
|
593 594 |
if (permitted & ~new->cap_permitted.cap[i]) /* insufficient to execute correctly */ |
c0b004413
|
595 |
ret = -EPERM; |
c0b004413
|
596 597 598 599 600 601 602 |
} /* * For legacy apps, with no internal support for recognizing they * do not have enough capabilities, we return an error if they are * missing some "forced" (aka file-permitted) capabilities. */ |
a6f76f23d
|
603 |
return *effective ? ret : 0; |
c0b004413
|
604 |
} |
71bc356f9
|
605 606 607 608 609 610 611 |
/** * get_vfs_caps_from_disk - retrieve vfs caps from disk * * @mnt_userns: user namespace of the mount the inode was found from * @dentry: dentry from which @inode is retrieved * @cpu_caps: vfs capabilities * |
1d045980e
|
612 |
* Extract the on-exec-apply capability sets for an executable file. |
71bc356f9
|
613 614 615 616 617 618 |
* * If the inode has been found through an idmapped mount the user namespace of * the vfsmount must be passed through @mnt_userns. This function will then * take care to map the inode according to @mnt_userns before checking * permissions. On non-idmapped mounts or if permission checking is to be * performed on the raw inode simply passs init_user_ns. |
1d045980e
|
619 |
*/ |
71bc356f9
|
620 621 622 |
int get_vfs_caps_from_disk(struct user_namespace *mnt_userns, const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps) |
c0b004413
|
623 |
{ |
c6f493d63
|
624 |
struct inode *inode = d_backing_inode(dentry); |
b53767719
|
625 |
__u32 magic_etc; |
e338d263a
|
626 |
unsigned tocopy, i; |
c0b004413
|
627 |
int size; |
8db6c34f1
|
628 629 630 |
struct vfs_ns_cap_data data, *nscaps = &data; struct vfs_cap_data *caps = (struct vfs_cap_data *) &data; kuid_t rootkuid; |
76ba89c76
|
631 |
struct user_namespace *fs_ns; |
c0b004413
|
632 633 |
memset(cpu_caps, 0, sizeof(struct cpu_vfs_cap_data)); |
5d6c31910
|
634 |
if (!inode) |
c0b004413
|
635 |
return -ENODATA; |
76ba89c76
|
636 |
fs_ns = inode->i_sb->s_user_ns; |
5d6c31910
|
637 |
size = __vfs_getxattr((struct dentry *)dentry, inode, |
8db6c34f1
|
638 |
XATTR_NAME_CAPS, &data, XATTR_CAPS_SZ); |
a6f76f23d
|
639 |
if (size == -ENODATA || size == -EOPNOTSUPP) |
c0b004413
|
640 641 |
/* no data, that's ok */ return -ENODATA; |
8db6c34f1
|
642 |
|
c0b004413
|
643 644 |
if (size < 0) return size; |
b53767719
|
645 |
|
e338d263a
|
646 |
if (size < sizeof(magic_etc)) |
b53767719
|
647 |
return -EINVAL; |
8db6c34f1
|
648 |
cpu_caps->magic_etc = magic_etc = le32_to_cpu(caps->magic_etc); |
b53767719
|
649 |
|
8db6c34f1
|
650 |
rootkuid = make_kuid(fs_ns, 0); |
a6f76f23d
|
651 |
switch (magic_etc & VFS_CAP_REVISION_MASK) { |
e338d263a
|
652 653 654 655 656 657 658 659 660 661 |
case VFS_CAP_REVISION_1: if (size != XATTR_CAPS_SZ_1) return -EINVAL; tocopy = VFS_CAP_U32_1; break; case VFS_CAP_REVISION_2: if (size != XATTR_CAPS_SZ_2) return -EINVAL; tocopy = VFS_CAP_U32_2; break; |
8db6c34f1
|
662 663 664 665 666 667 |
case VFS_CAP_REVISION_3: if (size != XATTR_CAPS_SZ_3) return -EINVAL; tocopy = VFS_CAP_U32_3; rootkuid = make_kuid(fs_ns, le32_to_cpu(nscaps->rootid)); break; |
b53767719
|
668 669 670 |
default: return -EINVAL; } |
8db6c34f1
|
671 672 673 |
/* Limit the caps to the mounter of the filesystem * or the more limited uid specified in the xattr. */ |
38753e917
|
674 |
rootkuid = mapped_kuid_fs(mnt_userns, fs_ns, rootkuid); |
8db6c34f1
|
675 676 |
if (!rootid_owns_currentns(rootkuid)) return -ENODATA; |
e338d263a
|
677 |
|
5459c164f
|
678 |
CAP_FOR_EACH_U32(i) { |
c0b004413
|
679 680 |
if (i >= tocopy) break; |
8db6c34f1
|
681 682 |
cpu_caps->permitted.cap[i] = le32_to_cpu(caps->data[i].permitted); cpu_caps->inheritable.cap[i] = le32_to_cpu(caps->data[i].inheritable); |
e338d263a
|
683 |
} |
a6f76f23d
|
684 |
|
7d8b6c637
|
685 686 |
cpu_caps->permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK; cpu_caps->inheritable.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK; |
2fec30e24
|
687 |
cpu_caps->rootid = rootkuid; |
c0b004413
|
688 |
return 0; |
b53767719
|
689 |
} |
1d045980e
|
690 691 692 693 694 |
/* * Attempt to get the on-exec apply capability sets for an executable file from * its xattrs and, if present, apply them to the proposed credentials being * constructed by execve(). */ |
56305aa9b
|
695 696 |
static int get_file_caps(struct linux_binprm *bprm, struct file *file, bool *effective, bool *has_fcap) |
b53767719
|
697 |
{ |
b53767719
|
698 |
int rc = 0; |
c0b004413
|
699 |
struct cpu_vfs_cap_data vcaps; |
b53767719
|
700 |
|
ee67ae7ef
|
701 |
cap_clear(bprm->cred->cap_permitted); |
3318a386e
|
702 |
|
1f29fae29
|
703 704 |
if (!file_caps_enabled) return 0; |
56305aa9b
|
705 |
if (!mnt_may_suid(file->f_path.mnt)) |
b53767719
|
706 |
return 0; |
380cf5ba6
|
707 708 709 710 711 712 |
/* * This check is redundant with mnt_may_suid() but is kept to make * explicit that capability bits are limited to s_user_ns and its * descendants. */ |
56305aa9b
|
713 |
if (!current_in_userns(file->f_path.mnt->mnt_sb->s_user_ns)) |
d07b846f6
|
714 |
return 0; |
b53767719
|
715 |
|
71bc356f9
|
716 717 |
rc = get_vfs_caps_from_disk(file_mnt_user_ns(file), file->f_path.dentry, &vcaps); |
c0b004413
|
718 719 |
if (rc < 0) { if (rc == -EINVAL) |
8db6c34f1
|
720 721 722 |
printk(KERN_NOTICE "Invalid argument reading file caps for %s ", bprm->filename); |
c0b004413
|
723 724 |
else if (rc == -ENODATA) rc = 0; |
b53767719
|
725 726 |
goto out; } |
b53767719
|
727 |
|
fc7eadf76
|
728 |
rc = bprm_caps_from_vfs_caps(&vcaps, bprm, effective, has_fcap); |
b53767719
|
729 730 |
out: |
b53767719
|
731 |
if (rc) |
ee67ae7ef
|
732 |
cap_clear(bprm->cred->cap_permitted); |
b53767719
|
733 734 735 |
return rc; } |
9304b46c9
|
736 |
static inline bool root_privileged(void) { return !issecure(SECURE_NOROOT); } |
81a6a0129
|
737 738 739 740 741 742 743 744 |
static inline bool __is_real(kuid_t uid, struct cred *cred) { return uid_eq(cred->uid, uid); } static inline bool __is_eff(kuid_t uid, struct cred *cred) { return uid_eq(cred->euid, uid); } static inline bool __is_suid(kuid_t uid, struct cred *cred) { return !__is_real(uid, cred) && __is_eff(uid, cred); } |
db1a8922c
|
745 746 747 748 749 750 751 752 753 754 755 756 |
/* * handle_privileged_root - Handle case of privileged root * @bprm: The execution parameters, including the proposed creds * @has_fcap: Are any file capabilities set? * @effective: Do we have effective root privilege? * @root_uid: This namespace' root UID WRT initial USER namespace * * Handle the case where root is privileged and hasn't been neutered by * SECURE_NOROOT. If file capabilities are set, they won't be combined with * set UID root and nothing is changed. If we are root, cap_permitted is * updated. If we have become set UID root, the effective bit is set. */ |
fc7eadf76
|
757 |
static void handle_privileged_root(struct linux_binprm *bprm, bool has_fcap, |
db1a8922c
|
758 759 760 761 |
bool *effective, kuid_t root_uid) { const struct cred *old = current_cred(); struct cred *new = bprm->cred; |
9304b46c9
|
762 |
if (!root_privileged()) |
db1a8922c
|
763 764 765 766 767 768 |
return; /* * If the legacy file capability is set, then don't set privs * for a setuid root binary run by a non-root user. Do set it * for a root user just to cause least surprise to an admin. */ |
81a6a0129
|
769 |
if (has_fcap && __is_suid(root_uid, new)) { |
db1a8922c
|
770 771 772 773 774 775 776 777 |
warn_setuid_and_fcaps_mixed(bprm->filename); return; } /* * To support inheritance of root-permissions and suid-root * executables under compatibility mode, we override the * capability sets for the file. */ |
81a6a0129
|
778 |
if (__is_eff(root_uid, new) || __is_real(root_uid, new)) { |
db1a8922c
|
779 780 781 782 783 784 785 |
/* pP' = (cap_bset & ~0) | (pI & ~0) */ new->cap_permitted = cap_combine(old->cap_bset, old->cap_inheritable); } /* * If only the real uid is 0, we do not set the effective bit. */ |
81a6a0129
|
786 |
if (__is_eff(root_uid, new)) |
db1a8922c
|
787 788 |
*effective = true; } |
4c7e715fc
|
789 790 791 792 793 794 |
#define __cap_gained(field, target, source) \ !cap_issubset(target->cap_##field, source->cap_##field) #define __cap_grew(target, source, cred) \ !cap_issubset(cred->cap_##target, cred->cap_##source) #define __cap_full(field, cred) \ cap_issubset(CAP_FULL_SET, cred->cap_##field) |
81a6a0129
|
795 796 797 798 799 800 |
static inline bool __is_setuid(struct cred *new, const struct cred *old) { return !uid_eq(new->euid, old->uid); } static inline bool __is_setgid(struct cred *new, const struct cred *old) { return !gid_eq(new->egid, old->gid); } |
9fbc2c796
|
801 |
/* |
dbbbe1105
|
802 |
* 1) Audit candidate if current->cap_effective is set |
9fbc2c796
|
803 804 805 |
* * We do not bother to audit if 3 things are true: * 1) cap_effective has all caps |
588fb2c7e
|
806 |
* 2) we became root *OR* are were already root |
9fbc2c796
|
807 808 809 810 811 |
* 3) root is supposed to have all caps (SECURE_NOROOT) * Since this is just a normal root execing a process. * * Number 1 above might fail if you don't have a full bset, but I think * that is interesting information to audit. |
dbbbe1105
|
812 813 814 815 816 |
* * A number of other conditions require logging: * 2) something prevented setuid root getting all caps * 3) non-setuid root gets fcaps * 4) non-setuid root gets ambient |
9fbc2c796
|
817 |
*/ |
dbbbe1105
|
818 819 |
static inline bool nonroot_raised_pE(struct cred *new, const struct cred *old, kuid_t root, bool has_fcap) |
9fbc2c796
|
820 821 |
{ bool ret = false; |
dbbbe1105
|
822 823 824 825 826 827 828 829 830 831 832 |
if ((__cap_grew(effective, ambient, new) && !(__cap_full(effective, new) && (__is_eff(root, new) || __is_real(root, new)) && root_privileged())) || (root_privileged() && __is_suid(root, new) && !__cap_full(effective, new)) || (!__is_setuid(new, old) && ((has_fcap && __cap_gained(permitted, new, old)) || __cap_gained(ambient, new, old)))) |
02ebbaf48
|
833 |
ret = true; |
dbbbe1105
|
834 |
|
9fbc2c796
|
835 836 |
return ret; } |
1d045980e
|
837 |
/** |
56305aa9b
|
838 |
* cap_bprm_creds_from_file - Set up the proposed credentials for execve(). |
1d045980e
|
839 |
* @bprm: The execution parameters, including the proposed creds |
56305aa9b
|
840 |
* @file: The file to pull the credentials from |
1d045980e
|
841 842 843 |
* * Set up the proposed credentials for a new execution context being * constructed by execve(). The proposed creds in @bprm->cred is altered, |
049ae601f
|
844 845 846 |
* which won't take effect immediately. * * Return: 0 if successful, -ve on error. |
a6f76f23d
|
847 |
*/ |
56305aa9b
|
848 |
int cap_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file) |
1da177e4c
|
849 |
{ |
56305aa9b
|
850 |
/* Process setpcap binaries and capabilities for uid 0 */ |
a6f76f23d
|
851 852 |
const struct cred *old = current_cred(); struct cred *new = bprm->cred; |
fc7eadf76
|
853 |
bool effective = false, has_fcap = false, is_setid; |
b53767719
|
854 |
int ret; |
18815a180
|
855 |
kuid_t root_uid; |
1da177e4c
|
856 |
|
58319057b
|
857 858 |
if (WARN_ON(!cap_ambient_invariant_ok(old))) return -EPERM; |
56305aa9b
|
859 |
ret = get_file_caps(bprm, file, &effective, &has_fcap); |
a6f76f23d
|
860 861 |
if (ret < 0) return ret; |
1da177e4c
|
862 |
|
18815a180
|
863 |
root_uid = make_kuid(new->user_ns, 0); |
fc7eadf76
|
864 |
handle_privileged_root(bprm, has_fcap, &effective, root_uid); |
b53767719
|
865 |
|
d52fc5dde
|
866 |
/* if we have fs caps, clear dangerous personality flags */ |
4c7e715fc
|
867 |
if (__cap_gained(permitted, new, old)) |
56305aa9b
|
868 |
bprm->per_clear |= PER_CLEAR_ON_SETID; |
d52fc5dde
|
869 |
|
a6f76f23d
|
870 |
/* Don't let someone trace a set[ug]id/setpcap binary with the revised |
259e5e6c7
|
871 872 873 |
* credentials unless they have the appropriate permit. * * In addition, if NO_NEW_PRIVS, then ensure we get no new privs. |
a6f76f23d
|
874 |
*/ |
81a6a0129
|
875 |
is_setid = __is_setuid(new, old) || __is_setgid(new, old); |
58319057b
|
876 |
|
4c7e715fc
|
877 |
if ((is_setid || __cap_gained(permitted, new, old)) && |
9227dd2a8
|
878 |
((bprm->unsafe & ~LSM_UNSAFE_PTRACE) || |
20523132e
|
879 |
!ptracer_capable(current, new->user_ns))) { |
a6f76f23d
|
880 |
/* downgrade; they get no more than they had, and maybe less */ |
70169420f
|
881 |
if (!ns_capable(new->user_ns, CAP_SETUID) || |
259e5e6c7
|
882 |
(bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS)) { |
a6f76f23d
|
883 884 |
new->euid = new->uid; new->egid = new->gid; |
1da177e4c
|
885 |
} |
b3a222e52
|
886 887 |
new->cap_permitted = cap_intersect(new->cap_permitted, old->cap_permitted); |
1da177e4c
|
888 |
} |
a6f76f23d
|
889 890 |
new->suid = new->fsuid = new->euid; new->sgid = new->fsgid = new->egid; |
1da177e4c
|
891 |
|
58319057b
|
892 |
/* File caps or setid cancels ambient. */ |
fc7eadf76
|
893 |
if (has_fcap || is_setid) |
58319057b
|
894 895 896 897 898 899 900 901 902 903 904 905 |
cap_clear(new->cap_ambient); /* * Now that we've computed pA', update pP' to give: * pP' = (X & fP) | (pI & fI) | pA' */ new->cap_permitted = cap_combine(new->cap_permitted, new->cap_ambient); /* * Set pE' = (fE ? pP' : pA'). Because pA' is zero if fE is set, * this is the same as pE' = (fE ? pP' : 0) | pA'. */ |
4bf2ea77d
|
906 907 908 |
if (effective) new->cap_effective = new->cap_permitted; else |
58319057b
|
909 910 911 912 |
new->cap_effective = new->cap_ambient; if (WARN_ON(!cap_ambient_invariant_ok(new))) return -EPERM; |
dbbbe1105
|
913 |
if (nonroot_raised_pE(new, old, root_uid, has_fcap)) { |
9fbc2c796
|
914 915 916 |
ret = audit_log_bprm_fcaps(bprm, new, old); if (ret < 0) return ret; |
3fc689e96
|
917 |
} |
1da177e4c
|
918 |
|
d84f4f992
|
919 |
new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); |
58319057b
|
920 921 922 |
if (WARN_ON(!cap_ambient_invariant_ok(new))) return -EPERM; |
46d98eb4e
|
923 |
/* Check for privilege-elevated exec. */ |
02ebbaf48
|
924 925 926 927 |
if (is_setid || (!__is_real(root_uid, new) && (effective || __cap_grew(permitted, ambient, new)))) |
56305aa9b
|
928 |
bprm->secureexec = 1; |
b53767719
|
929 |
|
ee67ae7ef
|
930 |
return 0; |
1da177e4c
|
931 |
} |
1d045980e
|
932 933 934 935 936 937 938 939 940 941 942 943 944 945 |
/** * cap_inode_setxattr - Determine whether an xattr may be altered * @dentry: The inode/dentry being altered * @name: The name of the xattr to be changed * @value: The value that the xattr will be changed to * @size: The size of value * @flags: The replacement flag * * Determine whether an xattr may be altered or set on an inode, returning 0 if * permission is granted, -ve if denied. * * This is used to make sure security xattrs don't get updated or set by those * who aren't privileged to do so. */ |
8f0cfa52a
|
946 947 |
int cap_inode_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) |
1da177e4c
|
948 |
{ |
b1d749c5c
|
949 |
struct user_namespace *user_ns = dentry->d_sb->s_user_ns; |
8db6c34f1
|
950 951 |
/* Ignore non-security xattrs */ if (strncmp(name, XATTR_SECURITY_PREFIX, |
c5eaab1d1
|
952 |
XATTR_SECURITY_PREFIX_LEN) != 0) |
8db6c34f1
|
953 954 955 956 957 958 959 |
return 0; /* * For XATTR_NAME_CAPS the check will be done in * cap_convert_nscap(), called by setxattr() */ if (strcmp(name, XATTR_NAME_CAPS) == 0) |
b53767719
|
960 |
return 0; |
1d045980e
|
961 |
|
b1d749c5c
|
962 |
if (!ns_capable(user_ns, CAP_SYS_ADMIN)) |
1da177e4c
|
963 964 965 |
return -EPERM; return 0; } |
1d045980e
|
966 967 |
/** * cap_inode_removexattr - Determine whether an xattr may be removed |
71bc356f9
|
968 969 970 971 |
* * @mnt_userns: User namespace of the mount the inode was found from * @dentry: The inode/dentry being altered * @name: The name of the xattr to be changed |
1d045980e
|
972 973 974 975 |
* * Determine whether an xattr may be removed from an inode, returning 0 if * permission is granted, -ve if denied. * |
71bc356f9
|
976 977 978 979 980 981 |
* If the inode has been found through an idmapped mount the user namespace of * the vfsmount must be passed through @mnt_userns. This function will then * take care to map the inode according to @mnt_userns before checking * permissions. On non-idmapped mounts or if permission checking is to be * performed on the raw inode simply passs init_user_ns. * |
1d045980e
|
982 983 984 |
* This is used to make sure security xattrs don't get removed by those who * aren't privileged to remove them. */ |
71bc356f9
|
985 986 |
int cap_inode_removexattr(struct user_namespace *mnt_userns, struct dentry *dentry, const char *name) |
1da177e4c
|
987 |
{ |
b1d749c5c
|
988 |
struct user_namespace *user_ns = dentry->d_sb->s_user_ns; |
8db6c34f1
|
989 990 |
/* Ignore non-security xattrs */ if (strncmp(name, XATTR_SECURITY_PREFIX, |
c5eaab1d1
|
991 |
XATTR_SECURITY_PREFIX_LEN) != 0) |
8db6c34f1
|
992 993 994 995 996 997 998 |
return 0; if (strcmp(name, XATTR_NAME_CAPS) == 0) { /* security.capability gets namespaced */ struct inode *inode = d_backing_inode(dentry); if (!inode) return -EINVAL; |
71bc356f9
|
999 |
if (!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_SETFCAP)) |
b53767719
|
1000 1001 |
return -EPERM; return 0; |
1d045980e
|
1002 |
} |
b1d749c5c
|
1003 |
if (!ns_capable(user_ns, CAP_SYS_ADMIN)) |
1da177e4c
|
1004 1005 1006 |
return -EPERM; return 0; } |
a6f76f23d
|
1007 |
/* |
1da177e4c
|
1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 |
* cap_emulate_setxuid() fixes the effective / permitted capabilities of * a process after a call to setuid, setreuid, or setresuid. * * 1) When set*uiding _from_ one of {r,e,s}uid == 0 _to_ all of * {r,e,s}uid != 0, the permitted and effective capabilities are * cleared. * * 2) When set*uiding _from_ euid == 0 _to_ euid != 0, the effective * capabilities of the process are cleared. * * 3) When set*uiding _from_ euid != 0 _to_ euid == 0, the effective * capabilities are set to the permitted capabilities. * |
a6f76f23d
|
1021 |
* fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should |
1da177e4c
|
1022 1023 |
* never happen. * |
a6f76f23d
|
1024 |
* -astor |
1da177e4c
|
1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 |
* * cevans - New behaviour, Oct '99 * A process may, via prctl(), elect to keep its capabilities when it * calls setuid() and switches away from uid==0. Both permitted and * effective sets will be retained. * Without this change, it was impossible for a daemon to drop only some * of its privilege. The call to setuid(!=0) would drop all privileges! * Keeping uid 0 is not an option because uid 0 owns too many vital * files.. * Thanks to Olaf Kirch and Peter Benie for spotting this. */ |
d84f4f992
|
1036 |
static inline void cap_emulate_setxuid(struct cred *new, const struct cred *old) |
1da177e4c
|
1037 |
{ |
18815a180
|
1038 1039 1040 1041 1042 1043 1044 |
kuid_t root_uid = make_kuid(old->user_ns, 0); if ((uid_eq(old->uid, root_uid) || uid_eq(old->euid, root_uid) || uid_eq(old->suid, root_uid)) && (!uid_eq(new->uid, root_uid) && !uid_eq(new->euid, root_uid) && |
58319057b
|
1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 |
!uid_eq(new->suid, root_uid))) { if (!issecure(SECURE_KEEP_CAPS)) { cap_clear(new->cap_permitted); cap_clear(new->cap_effective); } /* * Pre-ambient programs expect setresuid to nonroot followed * by exec to drop capabilities. We should make sure that * this remains the case. */ cap_clear(new->cap_ambient); |
1da177e4c
|
1057 |
} |
18815a180
|
1058 |
if (uid_eq(old->euid, root_uid) && !uid_eq(new->euid, root_uid)) |
d84f4f992
|
1059 |
cap_clear(new->cap_effective); |
18815a180
|
1060 |
if (!uid_eq(old->euid, root_uid) && uid_eq(new->euid, root_uid)) |
d84f4f992
|
1061 |
new->cap_effective = new->cap_permitted; |
1da177e4c
|
1062 |
} |
1d045980e
|
1063 1064 1065 1066 1067 1068 1069 |
/** * cap_task_fix_setuid - Fix up the results of setuid() call * @new: The proposed credentials * @old: The current task's current credentials * @flags: Indications of what has changed * * Fix up the results of setuid() call before the credential changes are |
049ae601f
|
1070 1071 1072 |
* actually applied. * * Return: 0 to grant the changes, -ve to deny them. |
1d045980e
|
1073 |
*/ |
d84f4f992
|
1074 |
int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags) |
1da177e4c
|
1075 1076 1077 1078 1079 |
{ switch (flags) { case LSM_SETID_RE: case LSM_SETID_ID: case LSM_SETID_RES: |
1d045980e
|
1080 1081 |
/* juggle the capabilities to follow [RES]UID changes unless * otherwise suppressed */ |
d84f4f992
|
1082 1083 |
if (!issecure(SECURE_NO_SETUID_FIXUP)) cap_emulate_setxuid(new, old); |
1da177e4c
|
1084 |
break; |
1da177e4c
|
1085 |
|
1d045980e
|
1086 1087 1088 1089 |
case LSM_SETID_FS: /* juggle the capabilties to follow FSUID changes, unless * otherwise suppressed * |
d84f4f992
|
1090 1091 1092 1093 |
* FIXME - is fsuser used for all CAP_FS_MASK capabilities? * if not, we might be a bit too harsh here. */ if (!issecure(SECURE_NO_SETUID_FIXUP)) { |
18815a180
|
1094 1095 |
kuid_t root_uid = make_kuid(old->user_ns, 0); if (uid_eq(old->fsuid, root_uid) && !uid_eq(new->fsuid, root_uid)) |
d84f4f992
|
1096 1097 |
new->cap_effective = cap_drop_fs_set(new->cap_effective); |
1d045980e
|
1098 |
|
18815a180
|
1099 |
if (!uid_eq(old->fsuid, root_uid) && uid_eq(new->fsuid, root_uid)) |
d84f4f992
|
1100 1101 1102 |
new->cap_effective = cap_raise_fs_set(new->cap_effective, new->cap_permitted); |
1da177e4c
|
1103 |
} |
d84f4f992
|
1104 |
break; |
1d045980e
|
1105 |
|
1da177e4c
|
1106 1107 1108 1109 1110 1111 |
default: return -EINVAL; } return 0; } |
b53767719
|
1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 |
/* * Rationale: code calling task_setscheduler, task_setioprio, and * task_setnice, assumes that * . if capable(cap_sys_nice), then those actions should be allowed * . if not capable(cap_sys_nice), but acting on your own processes, * then those actions should be allowed * This is insufficient now since you can call code without suid, but * yet with increased caps. * So we check for increased caps on the target process. */ |
de45e806a
|
1122 |
static int cap_safe_nice(struct task_struct *p) |
b53767719
|
1123 |
{ |
f54fb863c
|
1124 |
int is_subset, ret = 0; |
c69e8d9c0
|
1125 1126 1127 1128 |
rcu_read_lock(); is_subset = cap_issubset(__task_cred(p)->cap_permitted, current_cred()->cap_permitted); |
f54fb863c
|
1129 1130 |
if (!is_subset && !ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) ret = -EPERM; |
c69e8d9c0
|
1131 |
rcu_read_unlock(); |
f54fb863c
|
1132 |
return ret; |
b53767719
|
1133 |
} |
1d045980e
|
1134 1135 1136 |
/** * cap_task_setscheduler - Detemine if scheduler policy change is permitted * @p: The task to affect |
1d045980e
|
1137 1138 |
* * Detemine if the requested scheduler policy change is permitted for the |
049ae601f
|
1139 1140 1141 |
* specified task. * * Return: 0 if permission is granted, -ve if denied. |
1d045980e
|
1142 |
*/ |
b0ae19811
|
1143 |
int cap_task_setscheduler(struct task_struct *p) |
b53767719
|
1144 1145 1146 |
{ return cap_safe_nice(p); } |
1d045980e
|
1147 |
/** |
049ae601f
|
1148 |
* cap_task_setioprio - Detemine if I/O priority change is permitted |
1d045980e
|
1149 1150 1151 1152 |
* @p: The task to affect * @ioprio: The I/O priority to set * * Detemine if the requested I/O priority change is permitted for the specified |
049ae601f
|
1153 1154 1155 |
* task. * * Return: 0 if permission is granted, -ve if denied. |
1d045980e
|
1156 1157 |
*/ int cap_task_setioprio(struct task_struct *p, int ioprio) |
b53767719
|
1158 1159 1160 |
{ return cap_safe_nice(p); } |
1d045980e
|
1161 |
/** |
049ae601f
|
1162 |
* cap_task_setnice - Detemine if task priority change is permitted |
1d045980e
|
1163 1164 1165 1166 |
* @p: The task to affect * @nice: The nice value to set * * Detemine if the requested task priority change is permitted for the |
049ae601f
|
1167 1168 1169 |
* specified task. * * Return: 0 if permission is granted, -ve if denied. |
1d045980e
|
1170 1171 |
*/ int cap_task_setnice(struct task_struct *p, int nice) |
b53767719
|
1172 1173 1174 |
{ return cap_safe_nice(p); } |
3b7391de6
|
1175 |
/* |
1d045980e
|
1176 1177 |
* Implement PR_CAPBSET_DROP. Attempt to remove the specified capability from * the current task's bounding set. Returns 0 on success, -ve on error. |
3b7391de6
|
1178 |
*/ |
6d6f33284
|
1179 |
static int cap_prctl_drop(unsigned long cap) |
3b7391de6
|
1180 |
{ |
6d6f33284
|
1181 |
struct cred *new; |
160da84db
|
1182 |
if (!ns_capable(current_user_ns(), CAP_SETPCAP)) |
3b7391de6
|
1183 1184 1185 |
return -EPERM; if (!cap_valid(cap)) return -EINVAL; |
d84f4f992
|
1186 |
|
6d6f33284
|
1187 1188 1189 |
new = prepare_creds(); if (!new) return -ENOMEM; |
d84f4f992
|
1190 |
cap_lower(new->cap_bset, cap); |
6d6f33284
|
1191 |
return commit_creds(new); |
3b7391de6
|
1192 |
} |
3898b1b4e
|
1193 |
|
1d045980e
|
1194 1195 1196 |
/** * cap_task_prctl - Implement process control functions for this security module * @option: The process control function requested |
049ae601f
|
1197 1198 1199 1200 |
* @arg2: The argument data for this function * @arg3: The argument data for this function * @arg4: The argument data for this function * @arg5: The argument data for this function |
1d045980e
|
1201 1202 1203 1204 |
* * Allow process control functions (sys_prctl()) to alter capabilities; may * also deny access to other functions not otherwise implemented here. * |
049ae601f
|
1205 |
* Return: 0 or +ve on success, -ENOSYS if this function is not implemented |
1d045980e
|
1206 1207 1208 |
* here, other -ve on error. If -ENOSYS is returned, sys_prctl() and other LSM * modules will consider performing the function. */ |
3898b1b4e
|
1209 |
int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, |
d84f4f992
|
1210 |
unsigned long arg4, unsigned long arg5) |
3898b1b4e
|
1211 |
{ |
6d6f33284
|
1212 |
const struct cred *old = current_cred(); |
d84f4f992
|
1213 |
struct cred *new; |
d84f4f992
|
1214 |
|
3898b1b4e
|
1215 1216 1217 |
switch (option) { case PR_CAPBSET_READ: if (!cap_valid(arg2)) |
6d6f33284
|
1218 1219 |
return -EINVAL; return !!cap_raised(old->cap_bset, arg2); |
d84f4f992
|
1220 |
|
3898b1b4e
|
1221 |
case PR_CAPBSET_DROP: |
6d6f33284
|
1222 |
return cap_prctl_drop(arg2); |
3898b1b4e
|
1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 |
/* * The next four prctl's remain to assist with transitioning a * system from legacy UID=0 based privilege (when filesystem * capabilities are not in use) to a system using filesystem * capabilities only - as the POSIX.1e draft intended. * * Note: * * PR_SET_SECUREBITS = * issecure_mask(SECURE_KEEP_CAPS_LOCKED) * | issecure_mask(SECURE_NOROOT) * | issecure_mask(SECURE_NOROOT_LOCKED) * | issecure_mask(SECURE_NO_SETUID_FIXUP) * | issecure_mask(SECURE_NO_SETUID_FIXUP_LOCKED) * * will ensure that the current process and all of its * children will be locked into a pure * capability-based-privilege environment. */ case PR_SET_SECUREBITS: |
6d6f33284
|
1244 1245 1246 |
if ((((old->securebits & SECURE_ALL_LOCKS) >> 1) & (old->securebits ^ arg2)) /*[1]*/ || ((old->securebits & SECURE_ALL_LOCKS & ~arg2)) /*[2]*/ |
d84f4f992
|
1247 |
|| (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS)) /*[3]*/ |
6a9de4911
|
1248 |
|| (cap_capable(current_cred(), |
c1a85a00e
|
1249 1250 1251 |
current_cred()->user_ns, CAP_SETPCAP, CAP_OPT_NONE) != 0) /*[4]*/ |
3898b1b4e
|
1252 1253 1254 1255 1256 1257 1258 |
/* * [1] no changing of bits that are locked * [2] no unlocking of locks * [3] no setting of unsupported bits * [4] doing anything requires privilege (go read about * the "sendmail capabilities bug") */ |
d84f4f992
|
1259 1260 |
) /* cannot change a locked bit */ |
6d6f33284
|
1261 1262 1263 1264 1265 |
return -EPERM; new = prepare_creds(); if (!new) return -ENOMEM; |
d84f4f992
|
1266 |
new->securebits = arg2; |
6d6f33284
|
1267 |
return commit_creds(new); |
d84f4f992
|
1268 |
|
3898b1b4e
|
1269 |
case PR_GET_SECUREBITS: |
6d6f33284
|
1270 |
return old->securebits; |
3898b1b4e
|
1271 |
|
3898b1b4e
|
1272 |
case PR_GET_KEEPCAPS: |
6d6f33284
|
1273 |
return !!issecure(SECURE_KEEP_CAPS); |
d84f4f992
|
1274 |
|
3898b1b4e
|
1275 1276 |
case PR_SET_KEEPCAPS: if (arg2 > 1) /* Note, we rely on arg2 being unsigned here */ |
6d6f33284
|
1277 |
return -EINVAL; |
d84f4f992
|
1278 |
if (issecure(SECURE_KEEP_CAPS_LOCKED)) |
6d6f33284
|
1279 1280 1281 1282 1283 |
return -EPERM; new = prepare_creds(); if (!new) return -ENOMEM; |
d84f4f992
|
1284 1285 |
if (arg2) new->securebits |= issecure_mask(SECURE_KEEP_CAPS); |
3898b1b4e
|
1286 |
else |
d84f4f992
|
1287 |
new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); |
6d6f33284
|
1288 |
return commit_creds(new); |
3898b1b4e
|
1289 |
|
58319057b
|
1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 |
case PR_CAP_AMBIENT: if (arg2 == PR_CAP_AMBIENT_CLEAR_ALL) { if (arg3 | arg4 | arg5) return -EINVAL; new = prepare_creds(); if (!new) return -ENOMEM; cap_clear(new->cap_ambient); return commit_creds(new); } if (((!cap_valid(arg3)) | arg4 | arg5)) return -EINVAL; if (arg2 == PR_CAP_AMBIENT_IS_SET) { return !!cap_raised(current_cred()->cap_ambient, arg3); } else if (arg2 != PR_CAP_AMBIENT_RAISE && arg2 != PR_CAP_AMBIENT_LOWER) { return -EINVAL; } else { if (arg2 == PR_CAP_AMBIENT_RAISE && (!cap_raised(current_cred()->cap_permitted, arg3) || !cap_raised(current_cred()->cap_inheritable, |
746bf6d64
|
1314 1315 |
arg3) || issecure(SECURE_NO_CAP_AMBIENT_RAISE))) |
58319057b
|
1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 |
return -EPERM; new = prepare_creds(); if (!new) return -ENOMEM; if (arg2 == PR_CAP_AMBIENT_RAISE) cap_raise(new->cap_ambient, arg3); else cap_lower(new->cap_ambient, arg3); return commit_creds(new); } |
3898b1b4e
|
1327 1328 |
default: /* No functionality available - continue with default */ |
6d6f33284
|
1329 |
return -ENOSYS; |
3898b1b4e
|
1330 |
} |
1da177e4c
|
1331 |
} |
1d045980e
|
1332 |
/** |
1d045980e
|
1333 1334 1335 1336 1337 |
* cap_vm_enough_memory - Determine whether a new virtual mapping is permitted * @mm: The VM space in which the new mapping is to be made * @pages: The size of the mapping * * Determine whether the allocation of a new virtual mapping by the current |
049ae601f
|
1338 1339 1340 |
* task is permitted. * * Return: 1 if permission is granted, 0 if not. |
1d045980e
|
1341 |
*/ |
34b4e4aa3
|
1342 |
int cap_vm_enough_memory(struct mm_struct *mm, long pages) |
1da177e4c
|
1343 1344 |
{ int cap_sys_admin = 0; |
c1a85a00e
|
1345 1346 |
if (cap_capable(current_cred(), &init_user_ns, CAP_SYS_ADMIN, CAP_OPT_NOAUDIT) == 0) |
1da177e4c
|
1347 |
cap_sys_admin = 1; |
c1a85a00e
|
1348 |
|
b1d9e6b06
|
1349 |
return cap_sys_admin; |
1da177e4c
|
1350 |
} |
7c73875e7
|
1351 |
|
049ae601f
|
1352 |
/** |
d007794a1
|
1353 |
* cap_mmap_addr - check if able to map given addr |
7c73875e7
|
1354 |
* @addr: address attempting to be mapped |
7c73875e7
|
1355 |
* |
6f262d8e1
|
1356 |
* If the process is attempting to map memory below dac_mmap_min_addr they need |
7c73875e7
|
1357 |
* CAP_SYS_RAWIO. The other parameters to this function are unused by the |
049ae601f
|
1358 1359 1360 |
* capability security module. * * Return: 0 if this mapping should be allowed or -EPERM if not. |
7c73875e7
|
1361 |
*/ |
d007794a1
|
1362 |
int cap_mmap_addr(unsigned long addr) |
7c73875e7
|
1363 1364 |
{ int ret = 0; |
a2551df7e
|
1365 |
if (addr < dac_mmap_min_addr) { |
6a9de4911
|
1366 |
ret = cap_capable(current_cred(), &init_user_ns, CAP_SYS_RAWIO, |
c1a85a00e
|
1367 |
CAP_OPT_NONE); |
7c73875e7
|
1368 1369 1370 1371 1372 1373 |
/* set PF_SUPERPRIV if it turns out we allow the low mmap */ if (ret == 0) current->flags |= PF_SUPERPRIV; } return ret; } |
d007794a1
|
1374 |
|
e5467859f
|
1375 1376 |
int cap_mmap_file(struct file *file, unsigned long reqprot, unsigned long prot, unsigned long flags) |
d007794a1
|
1377 |
{ |
e5467859f
|
1378 |
return 0; |
d007794a1
|
1379 |
} |
b1d9e6b06
|
1380 1381 |
#ifdef CONFIG_SECURITY |
d1c5947ec
|
1382 |
static struct security_hook_list capability_hooks[] __lsm_ro_after_init = { |
b1d9e6b06
|
1383 1384 1385 1386 1387 1388 |
LSM_HOOK_INIT(capable, cap_capable), LSM_HOOK_INIT(settime, cap_settime), LSM_HOOK_INIT(ptrace_access_check, cap_ptrace_access_check), LSM_HOOK_INIT(ptrace_traceme, cap_ptrace_traceme), LSM_HOOK_INIT(capget, cap_capget), LSM_HOOK_INIT(capset, cap_capset), |
56305aa9b
|
1389 |
LSM_HOOK_INIT(bprm_creds_from_file, cap_bprm_creds_from_file), |
b1d9e6b06
|
1390 1391 |
LSM_HOOK_INIT(inode_need_killpriv, cap_inode_need_killpriv), LSM_HOOK_INIT(inode_killpriv, cap_inode_killpriv), |
8db6c34f1
|
1392 |
LSM_HOOK_INIT(inode_getsecurity, cap_inode_getsecurity), |
b1d9e6b06
|
1393 1394 1395 1396 1397 1398 1399 1400 1401 |
LSM_HOOK_INIT(mmap_addr, cap_mmap_addr), LSM_HOOK_INIT(mmap_file, cap_mmap_file), LSM_HOOK_INIT(task_fix_setuid, cap_task_fix_setuid), LSM_HOOK_INIT(task_prctl, cap_task_prctl), LSM_HOOK_INIT(task_setscheduler, cap_task_setscheduler), LSM_HOOK_INIT(task_setioprio, cap_task_setioprio), LSM_HOOK_INIT(task_setnice, cap_task_setnice), LSM_HOOK_INIT(vm_enough_memory, cap_vm_enough_memory), }; |
d117a154e
|
1402 |
static int __init capability_init(void) |
b1d9e6b06
|
1403 |
{ |
d69dece5f
|
1404 1405 |
security_add_hooks(capability_hooks, ARRAY_SIZE(capability_hooks), "capability"); |
d117a154e
|
1406 |
return 0; |
b1d9e6b06
|
1407 |
} |
d117a154e
|
1408 1409 1410 1411 1412 |
DEFINE_LSM(capability) = { .name = "capability", .order = LSM_ORDER_FIRST, .init = capability_init, }; |
b1d9e6b06
|
1413 |
#endif /* CONFIG_SECURITY */ |