Commit ab763c7112ce0e2559c73f921617c81dc7287ca6

Authored by Andrew G. Morgan
Committed by Linus Torvalds
1 parent 5459c164f0

security: filesystem capabilities refactor kernel code

To date, we've tried hard to confine filesystem support for capabilities
to the security modules.  This has left a lot of the code in
kernel/capability.c in a state where it looks like it supports something
that filesystem support for capabilities actually suppresses when the LSM
security/commmoncap.c code runs.  What is left is a lot of code that uses
sub-optimal locking in the main kernel

With this change we refactor the main kernel code and make it explicit
which locks are needed and that the only remaining kernel races in this
area are associated with non-filesystem capability code.

Signed-off-by: Andrew G. Morgan <morgan@kernel.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 1 changed file with 221 additions and 117 deletions Side-by-side Diff

... ... @@ -115,13 +115,210 @@
115 115 return 0;
116 116 }
117 117  
  118 +#ifndef CONFIG_SECURITY_FILE_CAPABILITIES
  119 +
118 120 /*
119   - * For sys_getproccap() and sys_setproccap(), any of the three
120   - * capability set pointers may be NULL -- indicating that that set is
121   - * uninteresting and/or not to be changed.
  121 + * Without filesystem capability support, we nominally support one process
  122 + * setting the capabilities of another
122 123 */
  124 +static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp,
  125 + kernel_cap_t *pIp, kernel_cap_t *pPp)
  126 +{
  127 + struct task_struct *target;
  128 + int ret;
123 129  
  130 + spin_lock(&task_capability_lock);
  131 + read_lock(&tasklist_lock);
  132 +
  133 + if (pid && pid != task_pid_vnr(current)) {
  134 + target = find_task_by_vpid(pid);
  135 + if (!target) {
  136 + ret = -ESRCH;
  137 + goto out;
  138 + }
  139 + } else
  140 + target = current;
  141 +
  142 + ret = security_capget(target, pEp, pIp, pPp);
  143 +
  144 +out:
  145 + read_unlock(&tasklist_lock);
  146 + spin_unlock(&task_capability_lock);
  147 +
  148 + return ret;
  149 +}
  150 +
124 151 /*
  152 + * cap_set_pg - set capabilities for all processes in a given process
  153 + * group. We call this holding task_capability_lock and tasklist_lock.
  154 + */
  155 +static inline int cap_set_pg(int pgrp_nr, kernel_cap_t *effective,
  156 + kernel_cap_t *inheritable,
  157 + kernel_cap_t *permitted)
  158 +{
  159 + struct task_struct *g, *target;
  160 + int ret = -EPERM;
  161 + int found = 0;
  162 + struct pid *pgrp;
  163 +
  164 + spin_lock(&task_capability_lock);
  165 + read_lock(&tasklist_lock);
  166 +
  167 + pgrp = find_vpid(pgrp_nr);
  168 + do_each_pid_task(pgrp, PIDTYPE_PGID, g) {
  169 + target = g;
  170 + while_each_thread(g, target) {
  171 + if (!security_capset_check(target, effective,
  172 + inheritable, permitted)) {
  173 + security_capset_set(target, effective,
  174 + inheritable, permitted);
  175 + ret = 0;
  176 + }
  177 + found = 1;
  178 + }
  179 + } while_each_pid_task(pgrp, PIDTYPE_PGID, g);
  180 +
  181 + read_unlock(&tasklist_lock);
  182 + spin_unlock(&task_capability_lock);
  183 +
  184 + if (!found)
  185 + ret = 0;
  186 + return ret;
  187 +}
  188 +
  189 +/*
  190 + * cap_set_all - set capabilities for all processes other than init
  191 + * and self. We call this holding task_capability_lock and tasklist_lock.
  192 + */
  193 +static inline int cap_set_all(kernel_cap_t *effective,
  194 + kernel_cap_t *inheritable,
  195 + kernel_cap_t *permitted)
  196 +{
  197 + struct task_struct *g, *target;
  198 + int ret = -EPERM;
  199 + int found = 0;
  200 +
  201 + spin_lock(&task_capability_lock);
  202 + read_lock(&tasklist_lock);
  203 +
  204 + do_each_thread(g, target) {
  205 + if (target == current
  206 + || is_container_init(target->group_leader))
  207 + continue;
  208 + found = 1;
  209 + if (security_capset_check(target, effective, inheritable,
  210 + permitted))
  211 + continue;
  212 + ret = 0;
  213 + security_capset_set(target, effective, inheritable, permitted);
  214 + } while_each_thread(g, target);
  215 +
  216 + read_unlock(&tasklist_lock);
  217 + spin_unlock(&task_capability_lock);
  218 +
  219 + if (!found)
  220 + ret = 0;
  221 +
  222 + return ret;
  223 +}
  224 +
  225 +/*
  226 + * Given the target pid does not refer to the current process we
  227 + * need more elaborate support... (This support is not present when
  228 + * filesystem capabilities are configured.)
  229 + */
  230 +static inline int do_sys_capset_other_tasks(pid_t pid, kernel_cap_t *effective,
  231 + kernel_cap_t *inheritable,
  232 + kernel_cap_t *permitted)
  233 +{
  234 + struct task_struct *target;
  235 + int ret;
  236 +
  237 + if (!capable(CAP_SETPCAP))
  238 + return -EPERM;
  239 +
  240 + if (pid == -1) /* all procs other than current and init */
  241 + return cap_set_all(effective, inheritable, permitted);
  242 +
  243 + else if (pid < 0) /* all procs in process group */
  244 + return cap_set_pg(-pid, effective, inheritable, permitted);
  245 +
  246 + /* target != current */
  247 + spin_lock(&task_capability_lock);
  248 + read_lock(&tasklist_lock);
  249 +
  250 + target = find_task_by_vpid(pid);
  251 + if (!target)
  252 + ret = -ESRCH;
  253 + else {
  254 + ret = security_capset_check(target, effective, inheritable,
  255 + permitted);
  256 +
  257 + /* having verified that the proposed changes are legal,
  258 + we now put them into effect. */
  259 + if (!ret)
  260 + security_capset_set(target, effective, inheritable,
  261 + permitted);
  262 + }
  263 +
  264 + read_unlock(&tasklist_lock);
  265 + spin_unlock(&task_capability_lock);
  266 +
  267 + return ret;
  268 +}
  269 +
  270 +#else /* ie., def CONFIG_SECURITY_FILE_CAPABILITIES */
  271 +
  272 +/*
  273 + * If we have configured with filesystem capability support, then the
  274 + * only thing that can change the capabilities of the current process
  275 + * is the current process. As such, we can't be in this code at the
  276 + * same time as we are in the process of setting capabilities in this
  277 + * process. The net result is that we can limit our use of locks to
  278 + * when we are reading the caps of another process.
  279 + */
  280 +static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp,
  281 + kernel_cap_t *pIp, kernel_cap_t *pPp)
  282 +{
  283 + int ret;
  284 +
  285 + if (pid && (pid != task_pid_vnr(current))) {
  286 + struct task_struct *target;
  287 +
  288 + spin_lock(&task_capability_lock);
  289 + read_lock(&tasklist_lock);
  290 +
  291 + target = find_task_by_vpid(pid);
  292 + if (!target)
  293 + ret = -ESRCH;
  294 + else
  295 + ret = security_capget(target, pEp, pIp, pPp);
  296 +
  297 + read_unlock(&tasklist_lock);
  298 + spin_unlock(&task_capability_lock);
  299 + } else
  300 + ret = security_capget(current, pEp, pIp, pPp);
  301 +
  302 + return ret;
  303 +}
  304 +
  305 +/*
  306 + * With filesystem capability support configured, the kernel does not
  307 + * permit the changing of capabilities in one process by another
  308 + * process. (CAP_SETPCAP has much less broad semantics when configured
  309 + * this way.)
  310 + */
  311 +static inline int do_sys_capset_other_tasks(pid_t pid,
  312 + kernel_cap_t *effective,
  313 + kernel_cap_t *inheritable,
  314 + kernel_cap_t *permitted)
  315 +{
  316 + return -EPERM;
  317 +}
  318 +
  319 +#endif /* ie., ndef CONFIG_SECURITY_FILE_CAPABILITIES */
  320 +
  321 +/*
125 322 * Atomically modify the effective capabilities returning the original
126 323 * value. No permission check is performed here - it is assumed that the
127 324 * caller is permitted to set the desired effective capabilities.
... ... @@ -155,7 +352,6 @@
155 352 {
156 353 int ret = 0;
157 354 pid_t pid;
158   - struct task_struct *target;
159 355 unsigned tocopy;
160 356 kernel_cap_t pE, pI, pP;
161 357  
162 358  
... ... @@ -169,24 +365,8 @@
169 365 if (pid < 0)
170 366 return -EINVAL;
171 367  
172   - spin_lock(&task_capability_lock);
173   - read_lock(&tasklist_lock);
  368 + ret = cap_get_target_pid(pid, &pE, &pI, &pP);
174 369  
175   - if (pid && pid != task_pid_vnr(current)) {
176   - target = find_task_by_vpid(pid);
177   - if (!target) {
178   - ret = -ESRCH;
179   - goto out;
180   - }
181   - } else
182   - target = current;
183   -
184   - ret = security_capget(target, &pE, &pI, &pP);
185   -
186   -out:
187   - read_unlock(&tasklist_lock);
188   - spin_unlock(&task_capability_lock);
189   -
190 370 if (!ret) {
191 371 struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
192 372 unsigned i;
... ... @@ -216,7 +396,6 @@
216 396 * before modification is attempted and the application
217 397 * fails.
218 398 */
219   -
220 399 if (copy_to_user(dataptr, kdata, tocopy
221 400 * sizeof(struct __user_cap_data_struct))) {
222 401 return -EFAULT;
223 402  
... ... @@ -226,70 +405,8 @@
226 405 return ret;
227 406 }
228 407  
229   -/*
230   - * cap_set_pg - set capabilities for all processes in a given process
231   - * group. We call this holding task_capability_lock and tasklist_lock.
232   - */
233   -static inline int cap_set_pg(int pgrp_nr, kernel_cap_t *effective,
234   - kernel_cap_t *inheritable,
235   - kernel_cap_t *permitted)
236   -{
237   - struct task_struct *g, *target;
238   - int ret = -EPERM;
239   - int found = 0;
240   - struct pid *pgrp;
241   -
242   - pgrp = find_vpid(pgrp_nr);
243   - do_each_pid_task(pgrp, PIDTYPE_PGID, g) {
244   - target = g;
245   - while_each_thread(g, target) {
246   - if (!security_capset_check(target, effective,
247   - inheritable,
248   - permitted)) {
249   - security_capset_set(target, effective,
250   - inheritable,
251   - permitted);
252   - ret = 0;
253   - }
254   - found = 1;
255   - }
256   - } while_each_pid_task(pgrp, PIDTYPE_PGID, g);
257   -
258   - if (!found)
259   - ret = 0;
260   - return ret;
261   -}
262   -
263   -/*
264   - * cap_set_all - set capabilities for all processes other than init
265   - * and self. We call this holding task_capability_lock and tasklist_lock.
266   - */
267   -static inline int cap_set_all(kernel_cap_t *effective,
268   - kernel_cap_t *inheritable,
269   - kernel_cap_t *permitted)
270   -{
271   - struct task_struct *g, *target;
272   - int ret = -EPERM;
273   - int found = 0;
274   -
275   - do_each_thread(g, target) {
276   - if (target == current || is_container_init(target->group_leader))
277   - continue;
278   - found = 1;
279   - if (security_capset_check(target, effective, inheritable,
280   - permitted))
281   - continue;
282   - ret = 0;
283   - security_capset_set(target, effective, inheritable, permitted);
284   - } while_each_thread(g, target);
285   -
286   - if (!found)
287   - ret = 0;
288   - return ret;
289   -}
290   -
291 408 /**
292   - * sys_capset - set capabilities for a process or a group of processes
  409 + * sys_capset - set capabilities for a process or (*) a group of processes
293 410 * @header: pointer to struct that contains capability version and
294 411 * target pid data
295 412 * @data: pointer to struct that contains the effective, permitted,
... ... @@ -313,7 +430,6 @@
313 430 struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
314 431 unsigned i, tocopy;
315 432 kernel_cap_t inheritable, permitted, effective;
316   - struct task_struct *target;
317 433 int ret;
318 434 pid_t pid;
319 435  
... ... @@ -324,9 +440,6 @@
324 440 if (get_user(pid, &header->pid))
325 441 return -EFAULT;
326 442  
327   - if (pid && pid != task_pid_vnr(current) && !capable(CAP_SETPCAP))
328   - return -EPERM;
329   -
330 443 if (copy_from_user(&kdata, data, tocopy
331 444 * sizeof(struct __user_cap_data_struct))) {
332 445 return -EFAULT;
333 446  
334 447  
335 448  
336 449  
337 450  
... ... @@ -344,40 +457,31 @@
344 457 i++;
345 458 }
346 459  
347   - spin_lock(&task_capability_lock);
348   - read_lock(&tasklist_lock);
  460 + if (pid && (pid != task_pid_vnr(current)))
  461 + ret = do_sys_capset_other_tasks(pid, &effective, &inheritable,
  462 + &permitted);
  463 + else {
  464 + /*
  465 + * This lock is required even when filesystem
  466 + * capability support is configured - it protects the
  467 + * sys_capget() call from returning incorrect data in
  468 + * the case that the targeted process is not the
  469 + * current one.
  470 + */
  471 + spin_lock(&task_capability_lock);
349 472  
350   - if (pid > 0 && pid != task_pid_vnr(current)) {
351   - target = find_task_by_vpid(pid);
352   - if (!target) {
353   - ret = -ESRCH;
354   - goto out;
355   - }
356   - } else
357   - target = current;
358   -
359   - ret = 0;
360   -
361   - /* having verified that the proposed changes are legal,
362   - we now put them into effect. */
363   - if (pid < 0) {
364   - if (pid == -1) /* all procs other than current and init */
365   - ret = cap_set_all(&effective, &inheritable, &permitted);
366   -
367   - else /* all procs in process group */
368   - ret = cap_set_pg(-pid, &effective, &inheritable,
369   - &permitted);
370   - } else {
371   - ret = security_capset_check(target, &effective, &inheritable,
  473 + ret = security_capset_check(current, &effective, &inheritable,
372 474 &permitted);
  475 + /*
  476 + * Having verified that the proposed changes are
  477 + * legal, we now put them into effect.
  478 + */
373 479 if (!ret)
374   - security_capset_set(target, &effective, &inheritable,
  480 + security_capset_set(current, &effective, &inheritable,
375 481 &permitted);
  482 + spin_unlock(&task_capability_lock);
376 483 }
377 484  
378   -out:
379   - read_unlock(&tasklist_lock);
380   - spin_unlock(&task_capability_lock);
381 485  
382 486 return ret;
383 487 }