Commit 72c2d5823fc7be799a12184974c3bdc57acea3c4

Authored by Andrew Morgan
Committed by Linus Torvalds
1 parent 7058cb02dd

V3 file capabilities: alter behavior of cap_setpcap

The non-filesystem capability meaning of CAP_SETPCAP is that a process, p1,
can change the capabilities of another process, p2.  This is not the
meaning that was intended for this capability at all, and this
implementation came about purely because, without filesystem capabilities,
there was no way to use capabilities without one process bestowing them on
another.

Since we now have a filesystem support for capabilities we can fix the
implementation of CAP_SETPCAP.

The most significant thing about this change is that, with it in effect, no
process can set the capabilities of another process.

The capabilities of a program are set via the capability convolution
rules:

   pI(post-exec) = pI(pre-exec)
   pP(post-exec) = (X(aka cap_bset) & fP) | (pI(post-exec) & fI)
   pE(post-exec) = fE ? pP(post-exec) : 0

at exec() time.  As such, the only influence the pre-exec() program can
have on the post-exec() program's capabilities are through the pI
capability set.

The correct implementation for CAP_SETPCAP (and that enabled by this patch)
is that it can be used to add extra pI capabilities to the current process
- to be picked up by subsequent exec()s when the above convolution rules
are applied.

Here is how it works:

Let's say we have a process, p. It has capability sets, pE, pP and pI.
Generally, p, can change the value of its own pI to pI' where

   (pI' & ~pI) & ~pP = 0.

That is, the only new things in pI' that were not present in pI need to
be present in pP.

The role of CAP_SETPCAP is basically to permit changes to pI beyond
the above:

   if (pE & CAP_SETPCAP) {
      pI' = anything; /* ie., even (pI' & ~pI) & ~pP != 0  */
   }

This capability is useful for things like login, which (say, via
pam_cap) might want to raise certain inheritable capabilities for use
by the children of the logged-in user's shell, but those capabilities
are not useful to or needed by the login program itself.

One such use might be to limit who can run ping. You set the
capabilities of the 'ping' program to be "= cap_net_raw+i", and then
only shells that have (pI & CAP_NET_RAW) will be able to run
it. Without CAP_SETPCAP implemented as described above, login(pam_cap)
would have to also have (pP & CAP_NET_RAW) in order to raise this
capability and pass it on through the inheritable set.

Signed-off-by: Andrew Morgan <morgan@kernel.org>
Signed-off-by: Serge E. Hallyn <serue@us.ibm.com>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: James Morris <jmorris@namei.org>
Cc: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 7 changed files with 80 additions and 24 deletions Side-by-side Diff

include/linux/capability.h
... ... @@ -310,10 +310,6 @@
310 310 #define CAP_SETFCAP 31
311 311  
312 312 #ifdef __KERNEL__
313   -/*
314   - * Bounding set
315   - */
316   -extern kernel_cap_t cap_bset;
317 313  
318 314 /*
319 315 * Internal kernel functions only
include/linux/security.h
... ... @@ -34,6 +34,13 @@
34 34 #include <linux/xfrm.h>
35 35 #include <net/flow.h>
36 36  
  37 +/*
  38 + * Bounding set
  39 + */
  40 +extern kernel_cap_t cap_bset;
  41 +
  42 +extern unsigned securebits;
  43 +
37 44 struct ctl_table;
38 45  
39 46 /*
... ... @@ -3,7 +3,7 @@
3 3 *
4 4 * Copyright (C) 1997 Andrew Main <zefram@fysh.org>
5 5 *
6   - * Integrated into 2.1.97+, Andrew G. Morgan <morgan@transmeta.com>
  6 + * Integrated into 2.1.97+, Andrew G. Morgan <morgan@kernel.org>
7 7 * 30 May 2002: Cleanup, Robert M. Love <rml@tech9.net>
8 8 */
9 9  
... ... @@ -13,9 +13,6 @@
13 13 #include <linux/security.h>
14 14 #include <linux/syscalls.h>
15 15 #include <asm/uaccess.h>
16   -
17   -unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */
18   -kernel_cap_t cap_bset = CAP_INIT_EFF_SET;
19 16  
20 17 /*
21 18 * This lock protects task->cap_* for all tasks including current.
... ... @@ -24,7 +24,7 @@
24 24 #include <linux/slab.h>
25 25 #include <linux/sysctl.h>
26 26 #include <linux/proc_fs.h>
27   -#include <linux/capability.h>
  27 +#include <linux/security.h>
28 28 #include <linux/ctype.h>
29 29 #include <linux/utsname.h>
30 30 #include <linux/smp_lock.h>
... ... @@ -371,6 +371,7 @@
371 371 .proc_handler = &proc_dointvec_taint,
372 372 },
373 373 #endif
  374 +#ifdef CONFIG_SECURITY_CAPABILITIES
374 375 {
375 376 .procname = "cap-bound",
376 377 .data = &cap_bset,
... ... @@ -378,6 +379,7 @@
378 379 .mode = 0600,
379 380 .proc_handler = &proc_dointvec_bset,
380 381 },
  382 +#endif /* def CONFIG_SECURITY_CAPABILITIES */
381 383 #ifdef CONFIG_BLK_DEV_INITRD
382 384 {
383 385 .ctl_name = KERN_REALROOTDEV,
384 386  
... ... @@ -1872,10 +1874,11 @@
1872 1874 return 0;
1873 1875 }
1874 1876  
  1877 +#ifdef CONFIG_SECURITY_CAPABILITIES
1875 1878 /*
1876 1879 * init may raise the set.
1877 1880 */
1878   -
  1881 +
1879 1882 int proc_dointvec_bset(struct ctl_table *table, int write, struct file *filp,
1880 1883 void __user *buffer, size_t *lenp, loff_t *ppos)
1881 1884 {
... ... @@ -1889,6 +1892,7 @@
1889 1892 return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1890 1893 do_proc_dointvec_bset_conv,&op);
1891 1894 }
  1895 +#endif /* def CONFIG_SECURITY_CAPABILITIES */
1892 1896  
1893 1897 /*
1894 1898 * Taint values can only be increased
kernel/sysctl_check.c
... ... @@ -38,7 +38,10 @@
38 38 { KERN_NODENAME, "hostname" },
39 39 { KERN_DOMAINNAME, "domainname" },
40 40  
  41 +#ifdef CONFIG_SECURITY_CAPABILITIES
41 42 { KERN_CAP_BSET, "cap-bound" },
  43 +#endif /* def CONFIG_SECURITY_CAPABILITIES */
  44 +
42 45 { KERN_PANIC, "panic" },
43 46 { KERN_REALROOTDEV, "real-root-dev" },
44 47  
45 48  
... ... @@ -1532,7 +1535,9 @@
1532 1535 (table->strategy == sysctl_ms_jiffies) ||
1533 1536 (table->proc_handler == proc_dostring) ||
1534 1537 (table->proc_handler == proc_dointvec) ||
  1538 +#ifdef CONFIG_SECURITY_CAPABILITIES
1535 1539 (table->proc_handler == proc_dointvec_bset) ||
  1540 +#endif /* def CONFIG_SECURITY_CAPABILITIES */
1536 1541 (table->proc_handler == proc_dointvec_minmax) ||
1537 1542 (table->proc_handler == proc_dointvec_jiffies) ||
1538 1543 (table->proc_handler == proc_dointvec_userhz_jiffies) ||
security/commoncap.c
... ... @@ -24,6 +24,25 @@
24 24 #include <linux/hugetlb.h>
25 25 #include <linux/mount.h>
26 26  
  27 +#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
  28 +/*
  29 + * Because of the reduced scope of CAP_SETPCAP when filesystem
  30 + * capabilities are in effect, it is safe to allow this capability to
  31 + * be available in the default configuration.
  32 + */
  33 +# define CAP_INIT_BSET CAP_FULL_SET
  34 +#else /* ie. ndef CONFIG_SECURITY_FILE_CAPABILITIES */
  35 +# define CAP_INIT_BSET CAP_INIT_EFF_SET
  36 +#endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */
  37 +
  38 +kernel_cap_t cap_bset = CAP_INIT_BSET; /* systemwide capability bound */
  39 +EXPORT_SYMBOL(cap_bset);
  40 +
  41 +/* Global security state */
  42 +
  43 +unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */
  44 +EXPORT_SYMBOL(securebits);
  45 +
27 46 int cap_netlink_send(struct sock *sk, struct sk_buff *skb)
28 47 {
29 48 NETLINK_CB(skb).eff_cap = current->cap_effective;
30 49  
... ... @@ -73,14 +92,44 @@
73 92 return 0;
74 93 }
75 94  
  95 +#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
  96 +
  97 +static inline int cap_block_setpcap(struct task_struct *target)
  98 +{
  99 + /*
  100 + * No support for remote process capability manipulation with
  101 + * filesystem capability support.
  102 + */
  103 + return (target != current);
  104 +}
  105 +
  106 +static inline int cap_inh_is_capped(void)
  107 +{
  108 + /*
  109 + * return 1 if changes to the inheritable set are limited
  110 + * to the old permitted set.
  111 + */
  112 + return !cap_capable(current, CAP_SETPCAP);
  113 +}
  114 +
  115 +#else /* ie., ndef CONFIG_SECURITY_FILE_CAPABILITIES */
  116 +
  117 +static inline int cap_block_setpcap(struct task_struct *t) { return 0; }
  118 +static inline int cap_inh_is_capped(void) { return 1; }
  119 +
  120 +#endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */
  121 +
76 122 int cap_capset_check (struct task_struct *target, kernel_cap_t *effective,
77 123 kernel_cap_t *inheritable, kernel_cap_t *permitted)
78 124 {
79   - /* Derived from kernel/capability.c:sys_capset. */
80   - /* verify restrictions on target's new Inheritable set */
81   - if (!cap_issubset (*inheritable,
82   - cap_combine (target->cap_inheritable,
83   - current->cap_permitted))) {
  125 + if (cap_block_setpcap(target)) {
  126 + return -EPERM;
  127 + }
  128 + if (cap_inh_is_capped()
  129 + && !cap_issubset(*inheritable,
  130 + cap_combine(target->cap_inheritable,
  131 + current->cap_permitted))) {
  132 + /* incapable of using this inheritable set */
84 133 return -EPERM;
85 134 }
86 135  
... ... @@ -37,15 +37,13 @@
37 37 kernel_cap_t * inheritable, kernel_cap_t * permitted)
38 38 {
39 39 *effective = *inheritable = *permitted = 0;
40   - if (!issecure(SECURE_NOROOT)) {
41   - if (target->euid == 0) {
42   - *permitted |= (~0 & ~CAP_FS_MASK);
43   - *effective |= (~0 & ~CAP_TO_MASK(CAP_SETPCAP) & ~CAP_FS_MASK);
44   - }
45   - if (target->fsuid == 0) {
46   - *permitted |= CAP_FS_MASK;
47   - *effective |= CAP_FS_MASK;
48   - }
  40 + if (target->euid == 0) {
  41 + *permitted |= (~0 & ~CAP_FS_MASK);
  42 + *effective |= (~0 & ~CAP_TO_MASK(CAP_SETPCAP) & ~CAP_FS_MASK);
  43 + }
  44 + if (target->fsuid == 0) {
  45 + *permitted |= CAP_FS_MASK;
  46 + *effective |= CAP_FS_MASK;
49 47 }
50 48 return 0;
51 49 }