Commit ad676077a2ae4af4bb6627486ce19ccce04f1efe

Authored by Aristeu Rozanski
Committed by Linus Torvalds
1 parent 868539a3b6

device_cgroup: convert device_cgroup internally to policy + exceptions

The original model of device_cgroup is having a whitelist where all the
allowed devices are listed. The problem with this approach is that is
impossible to have the case of allowing everything but few devices.

The reason for that lies in the way the whitelist is handled internally:
since there's only a whitelist, the "all devices" entry would have to be
removed and replaced by the entire list of possible devices but the ones
that are being denied.  Since dev_t is 32 bits long, representing the allowed
devices as a bitfield is not memory efficient.

This patch replaces the "whitelist" by a "exceptions" list and the default
policy is kept as "deny_all" variable in dev_cgroup structure.

The current interface determines that whenever "a" is written to devices.allow
or devices.deny, the entry masking all devices will be added or removed,
respectively. This behavior is kept and it's what will determine the default
policy:

	# cat devices.list
	a *:* rwm
	# echo a >devices.deny
	# cat devices.list
	# echo a >devices.allow
	# cat devices.list
	a *:* rwm

The interface is also preserved. For example, if one wants to block only access
to /dev/null:
	# ls -l /dev/null
	crw-rw-rw- 1 root root 1, 3 Jul 24 16:17 /dev/null
	# echo a >devices.allow
	# echo "c 1:3 rwm" >devices.deny
	# cat /dev/null
	cat: /dev/null: Operation not permitted
	# echo >/dev/null
	bash: /dev/null: Operation not permitted
	mknod /tmp/null c 1 3
	mknod: `/tmp/null': Operation not permitted
	# echo "c 1:3 r" >devices.allow
	# cat /dev/null
	# echo >/dev/null
	bash: /dev/null: Operation not permitted
	mknod /tmp/null c 1 3
	mknod: `/tmp/null': Operation not permitted
	# echo "c 1:3 rw" >devices.allow
	# echo >/dev/null
	# cat /dev/null
	# mknod /tmp/null c 1 3
	mknod: `/tmp/null': Operation not permitted
	# echo "c 1:3 rwm" >devices.allow
	# echo >/dev/null
	# cat /dev/null
	# mknod /tmp/null c 1 3
	#

Note that I didn't rename the functions/variables in this patch, but in the
next one to make reviewing easier.

Signed-off-by: Aristeu Rozanski <aris@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Li Zefan <lizefan@huawei.com>
Cc: James Morris <jmorris@namei.org>
Cc: Pavel Emelyanov <xemul@openvz.org>
Acked-by: Serge E. Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 1 changed file with 134 additions and 98 deletions Side-by-side Diff

security/device_cgroup.c
... ... @@ -96,7 +96,6 @@
96 96 return -ENOMEM;
97 97 }
98 98  
99   -/* Stupid prototype - don't bother combining existing entries */
100 99 /*
101 100 * called under devcgroup_mutex
102 101 */
103 102  
104 103  
105 104  
... ... @@ -136,16 +135,13 @@
136 135 struct dev_whitelist_item *walk, *tmp;
137 136  
138 137 list_for_each_entry_safe(walk, tmp, &dev_cgroup->whitelist, list) {
139   - if (walk->type == DEV_ALL)
140   - goto remove;
141 138 if (walk->type != wh->type)
142 139 continue;
143   - if (walk->major != ~0 && walk->major != wh->major)
  140 + if (walk->major != wh->major)
144 141 continue;
145   - if (walk->minor != ~0 && walk->minor != wh->minor)
  142 + if (walk->minor != wh->minor)
146 143 continue;
147 144  
148   -remove:
149 145 walk->access &= ~wh->access;
150 146 if (!walk->access) {
151 147 list_del_rcu(&walk->list);
152 148  
... ... @@ -185,19 +181,9 @@
185 181 INIT_LIST_HEAD(&dev_cgroup->whitelist);
186 182 parent_cgroup = cgroup->parent;
187 183  
188   - if (parent_cgroup == NULL) {
189   - struct dev_whitelist_item *wh;
190   - wh = kmalloc(sizeof(*wh), GFP_KERNEL);
191   - if (!wh) {
192   - kfree(dev_cgroup);
193   - return ERR_PTR(-ENOMEM);
194   - }
195   - wh->minor = wh->major = ~0;
196   - wh->type = DEV_ALL;
197   - wh->access = ACC_MASK;
  184 + if (parent_cgroup == NULL)
198 185 dev_cgroup->deny_all = false;
199   - list_add(&wh->list, &dev_cgroup->whitelist);
200   - } else {
  186 + else {
201 187 parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup);
202 188 mutex_lock(&devcgroup_mutex);
203 189 ret = dev_whitelist_copy(&dev_cgroup->whitelist,
204 190  
205 191  
206 192  
207 193  
208 194  
... ... @@ -268,33 +254,48 @@
268 254 char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN];
269 255  
270 256 rcu_read_lock();
271   - list_for_each_entry_rcu(wh, &devcgroup->whitelist, list) {
272   - set_access(acc, wh->access);
273   - set_majmin(maj, wh->major);
274   - set_majmin(min, wh->minor);
275   - seq_printf(m, "%c %s:%s %s\n", type_to_char(wh->type),
  257 + /*
  258 + * To preserve the compatibility:
  259 + * - Only show the "all devices" when the default policy is to allow
  260 + * - List the exceptions in case the default policy is to deny
  261 + * This way, the file remains as a "whitelist of devices"
  262 + */
  263 + if (devcgroup->deny_all == false) {
  264 + set_access(acc, ACC_MASK);
  265 + set_majmin(maj, ~0);
  266 + set_majmin(min, ~0);
  267 + seq_printf(m, "%c %s:%s %s\n", type_to_char(DEV_ALL),
276 268 maj, min, acc);
  269 + } else {
  270 + list_for_each_entry_rcu(wh, &devcgroup->whitelist, list) {
  271 + set_access(acc, wh->access);
  272 + set_majmin(maj, wh->major);
  273 + set_majmin(min, wh->minor);
  274 + seq_printf(m, "%c %s:%s %s\n", type_to_char(wh->type),
  275 + maj, min, acc);
  276 + }
277 277 }
278 278 rcu_read_unlock();
279 279  
280 280 return 0;
281 281 }
282 282  
283   -/*
284   - * may_access_whitelist:
285   - * does the access granted to dev_cgroup c contain the access
286   - * requested in whitelist item refwh.
287   - * return 1 if yes, 0 if no.
288   - * call with devcgroup_mutex held
  283 +/**
  284 + * may_access_whitelist - verifies if a new rule is part of what is allowed
  285 + * by a dev cgroup based on the default policy +
  286 + * exceptions. This is used to make sure a child cgroup
  287 + * won't have more privileges than its parent or to
  288 + * verify if a certain access is allowed.
  289 + * @dev_cgroup: dev cgroup to be tested against
  290 + * @refwh: new rule
289 291 */
290   -static int may_access_whitelist(struct dev_cgroup *c,
291   - struct dev_whitelist_item *refwh)
  292 +static int may_access_whitelist(struct dev_cgroup *dev_cgroup,
  293 + struct dev_whitelist_item *refwh)
292 294 {
293 295 struct dev_whitelist_item *whitem;
  296 + bool match = false;
294 297  
295   - list_for_each_entry(whitem, &c->whitelist, list) {
296   - if (whitem->type & DEV_ALL)
297   - return 1;
  298 + list_for_each_entry(whitem, &dev_cgroup->whitelist, list) {
298 299 if ((refwh->type & DEV_BLOCK) && !(whitem->type & DEV_BLOCK))
299 300 continue;
300 301 if ((refwh->type & DEV_CHAR) && !(whitem->type & DEV_CHAR))
301 302  
... ... @@ -305,8 +306,21 @@
305 306 continue;
306 307 if (refwh->access & (~whitem->access))
307 308 continue;
308   - return 1;
  309 + match = true;
  310 + break;
309 311 }
  312 +
  313 + /*
  314 + * In two cases we'll consider this new rule valid:
  315 + * - the dev cgroup has its default policy to allow + exception list:
  316 + * the new rule should *not* match any of the exceptions
  317 + * (!deny_all, !match)
  318 + * - the dev cgroup has its default policy to deny + exception list:
  319 + * the new rule *should* match the exceptions
  320 + * (deny_all, match)
  321 + */
  322 + if (dev_cgroup->deny_all == match)
  323 + return 1;
310 324 return 0;
311 325 }
312 326  
... ... @@ -356,11 +370,21 @@
356 370  
357 371 switch (*b) {
358 372 case 'a':
359   - wh.type = DEV_ALL;
360   - wh.access = ACC_MASK;
361   - wh.major = ~0;
362   - wh.minor = ~0;
363   - goto handle;
  373 + switch (filetype) {
  374 + case DEVCG_ALLOW:
  375 + if (!parent_has_perm(devcgroup, &wh))
  376 + return -EPERM;
  377 + dev_whitelist_clean(devcgroup);
  378 + devcgroup->deny_all = false;
  379 + break;
  380 + case DEVCG_DENY:
  381 + dev_whitelist_clean(devcgroup);
  382 + devcgroup->deny_all = true;
  383 + break;
  384 + default:
  385 + return -EINVAL;
  386 + }
  387 + return 0;
364 388 case 'b':
365 389 wh.type = DEV_BLOCK;
366 390 break;
367 391  
368 392  
... ... @@ -419,17 +443,31 @@
419 443 }
420 444 }
421 445  
422   -handle:
423 446 switch (filetype) {
424 447 case DEVCG_ALLOW:
425 448 if (!parent_has_perm(devcgroup, &wh))
426 449 return -EPERM;
427   - devcgroup->deny_all = false;
  450 + /*
  451 + * If the default policy is to allow by default, try to remove
  452 + * an matching exception instead. And be silent about it: we
  453 + * don't want to break compatibility
  454 + */
  455 + if (devcgroup->deny_all == false) {
  456 + dev_whitelist_rm(devcgroup, &wh);
  457 + return 0;
  458 + }
428 459 return dev_whitelist_add(devcgroup, &wh);
429 460 case DEVCG_DENY:
430   - dev_whitelist_rm(devcgroup, &wh);
431   - devcgroup->deny_all = true;
432   - break;
  461 + /*
  462 + * If the default policy is to deny by default, try to remove
  463 + * an matching exception instead. And be silent about it: we
  464 + * don't want to break compatibility
  465 + */
  466 + if (devcgroup->deny_all == true) {
  467 + dev_whitelist_rm(devcgroup, &wh);
  468 + return 0;
  469 + }
  470 + return dev_whitelist_add(devcgroup, &wh);
433 471 default:
434 472 return -EINVAL;
435 473 }
436 474  
437 475  
438 476  
439 477  
440 478  
441 479  
442 480  
443 481  
444 482  
445 483  
446 484  
447 485  
... ... @@ -485,74 +523,72 @@
485 523 .broken_hierarchy = true,
486 524 };
487 525  
488   -int __devcgroup_inode_permission(struct inode *inode, int mask)
  526 +/**
  527 + * __devcgroup_check_permission - checks if an inode operation is permitted
  528 + * @dev_cgroup: the dev cgroup to be tested against
  529 + * @type: device type
  530 + * @major: device major number
  531 + * @minor: device minor number
  532 + * @access: combination of ACC_WRITE, ACC_READ and ACC_MKNOD
  533 + *
  534 + * returns 0 on success, -EPERM case the operation is not permitted
  535 + */
  536 +static int __devcgroup_check_permission(struct dev_cgroup *dev_cgroup,
  537 + short type, u32 major, u32 minor,
  538 + short access)
489 539 {
490   - struct dev_cgroup *dev_cgroup;
491   - struct dev_whitelist_item *wh;
  540 + struct dev_whitelist_item wh;
  541 + int rc;
492 542  
  543 + memset(&wh, 0, sizeof(wh));
  544 + wh.type = type;
  545 + wh.major = major;
  546 + wh.minor = minor;
  547 + wh.access = access;
  548 +
493 549 rcu_read_lock();
  550 + rc = may_access_whitelist(dev_cgroup, &wh);
  551 + rcu_read_unlock();
494 552  
495   - dev_cgroup = task_devcgroup(current);
  553 + if (!rc)
  554 + return -EPERM;
496 555  
497   - list_for_each_entry_rcu(wh, &dev_cgroup->whitelist, list) {
498   - if (wh->type & DEV_ALL)
499   - goto found;
500   - if ((wh->type & DEV_BLOCK) && !S_ISBLK(inode->i_mode))
501   - continue;
502   - if ((wh->type & DEV_CHAR) && !S_ISCHR(inode->i_mode))
503   - continue;
504   - if (wh->major != ~0 && wh->major != imajor(inode))
505   - continue;
506   - if (wh->minor != ~0 && wh->minor != iminor(inode))
507   - continue;
  556 + return 0;
  557 +}
508 558  
509   - if ((mask & MAY_WRITE) && !(wh->access & ACC_WRITE))
510   - continue;
511   - if ((mask & MAY_READ) && !(wh->access & ACC_READ))
512   - continue;
513   -found:
514   - rcu_read_unlock();
515   - return 0;
516   - }
  559 +int __devcgroup_inode_permission(struct inode *inode, int mask)
  560 +{
  561 + struct dev_cgroup *dev_cgroup = task_devcgroup(current);
  562 + short type, access = 0;
517 563  
518   - rcu_read_unlock();
  564 + if (S_ISBLK(inode->i_mode))
  565 + type = DEV_BLOCK;
  566 + if (S_ISCHR(inode->i_mode))
  567 + type = DEV_CHAR;
  568 + if (mask & MAY_WRITE)
  569 + access |= ACC_WRITE;
  570 + if (mask & MAY_READ)
  571 + access |= ACC_READ;
519 572  
520   - return -EPERM;
  573 + return __devcgroup_check_permission(dev_cgroup, type, imajor(inode),
  574 + iminor(inode), access);
521 575 }
522 576  
523 577 int devcgroup_inode_mknod(int mode, dev_t dev)
524 578 {
525   - struct dev_cgroup *dev_cgroup;
526   - struct dev_whitelist_item *wh;
  579 + struct dev_cgroup *dev_cgroup = task_devcgroup(current);
  580 + short type;
527 581  
528 582 if (!S_ISBLK(mode) && !S_ISCHR(mode))
529 583 return 0;
530 584  
531   - rcu_read_lock();
  585 + if (S_ISBLK(mode))
  586 + type = DEV_BLOCK;
  587 + else
  588 + type = DEV_CHAR;
532 589  
533   - dev_cgroup = task_devcgroup(current);
  590 + return __devcgroup_check_permission(dev_cgroup, type, MAJOR(dev),
  591 + MINOR(dev), ACC_MKNOD);
534 592  
535   - list_for_each_entry_rcu(wh, &dev_cgroup->whitelist, list) {
536   - if (wh->type & DEV_ALL)
537   - goto found;
538   - if ((wh->type & DEV_BLOCK) && !S_ISBLK(mode))
539   - continue;
540   - if ((wh->type & DEV_CHAR) && !S_ISCHR(mode))
541   - continue;
542   - if (wh->major != ~0 && wh->major != MAJOR(dev))
543   - continue;
544   - if (wh->minor != ~0 && wh->minor != MINOR(dev))
545   - continue;
546   -
547   - if (!(wh->access & ACC_MKNOD))
548   - continue;
549   -found:
550   - rcu_read_unlock();
551   - return 0;
552   - }
553   -
554   - rcu_read_unlock();
555   -
556   - return -EPERM;
557 593 }