Commit 14a40ffccd6163bbcd1d6f32b28a88ffe6149fc6
1 parent
2e109a2855
Exists in
master
and in
20 other branches
sched: replace PF_THREAD_BOUND with PF_NO_SETAFFINITY
PF_THREAD_BOUND was originally used to mark kernel threads which were bound to a specific CPU using kthread_bind() and a task with the flag set allows cpus_allowed modifications only to itself. Workqueue is currently abusing it to prevent userland from meddling with cpus_allowed of workqueue workers. What we need is a flag to prevent userland from messing with cpus_allowed of certain kernel tasks. In kernel, anyone can (incorrectly) squash the flag, and, for worker-type usages, restricting cpus_allowed modification to the task itself doesn't provide meaningful extra proection as other tasks can inject work items to the task anyway. This patch replaces PF_THREAD_BOUND with PF_NO_SETAFFINITY. sched_setaffinity() checks the flag and return -EINVAL if set. set_cpus_allowed_ptr() is no longer affected by the flag. This will allow simplifying workqueue worker CPU affinity management. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Ingo Molnar <mingo@kernel.org> Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de>
Showing 6 changed files with 19 additions and 24 deletions Side-by-side Diff
include/linux/sched.h
... | ... | @@ -1793,7 +1793,7 @@ |
1793 | 1793 | #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ |
1794 | 1794 | #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ |
1795 | 1795 | #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ |
1796 | -#define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */ | |
1796 | +#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */ | |
1797 | 1797 | #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ |
1798 | 1798 | #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ |
1799 | 1799 | #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ |
kernel/cgroup.c
... | ... | @@ -2224,11 +2224,11 @@ |
2224 | 2224 | tsk = tsk->group_leader; |
2225 | 2225 | |
2226 | 2226 | /* |
2227 | - * Workqueue threads may acquire PF_THREAD_BOUND and become | |
2227 | + * Workqueue threads may acquire PF_NO_SETAFFINITY and become | |
2228 | 2228 | * trapped in a cpuset, or RT worker may be born in a cgroup |
2229 | 2229 | * with no rt_runtime allocated. Just say no. |
2230 | 2230 | */ |
2231 | - if (tsk == kthreadd_task || (tsk->flags & PF_THREAD_BOUND)) { | |
2231 | + if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) { | |
2232 | 2232 | ret = -EINVAL; |
2233 | 2233 | rcu_read_unlock(); |
2234 | 2234 | goto out_unlock_cgroup; |
kernel/cpuset.c
... | ... | @@ -1388,16 +1388,16 @@ |
1388 | 1388 | |
1389 | 1389 | cgroup_taskset_for_each(task, cgrp, tset) { |
1390 | 1390 | /* |
1391 | - * Kthreads bound to specific cpus cannot be moved to a new | |
1392 | - * cpuset; we cannot change their cpu affinity and | |
1393 | - * isolating such threads by their set of allowed nodes is | |
1394 | - * unnecessary. Thus, cpusets are not applicable for such | |
1395 | - * threads. This prevents checking for success of | |
1396 | - * set_cpus_allowed_ptr() on all attached tasks before | |
1397 | - * cpus_allowed may be changed. | |
1391 | + * Kthreads which disallow setaffinity shouldn't be moved | |
1392 | + * to a new cpuset; we don't want to change their cpu | |
1393 | + * affinity and isolating such threads by their set of | |
1394 | + * allowed nodes is unnecessary. Thus, cpusets are not | |
1395 | + * applicable for such threads. This prevents checking for | |
1396 | + * success of set_cpus_allowed_ptr() on all attached tasks | |
1397 | + * before cpus_allowed may be changed. | |
1398 | 1398 | */ |
1399 | 1399 | ret = -EINVAL; |
1400 | - if (task->flags & PF_THREAD_BOUND) | |
1400 | + if (task->flags & PF_NO_SETAFFINITY) | |
1401 | 1401 | goto out_unlock; |
1402 | 1402 | ret = security_task_setscheduler(task); |
1403 | 1403 | if (ret) |
kernel/kthread.c
kernel/sched/core.c
... | ... | @@ -4126,6 +4126,10 @@ |
4126 | 4126 | get_task_struct(p); |
4127 | 4127 | rcu_read_unlock(); |
4128 | 4128 | |
4129 | + if (p->flags & PF_NO_SETAFFINITY) { | |
4130 | + retval = -EINVAL; | |
4131 | + goto out_put_task; | |
4132 | + } | |
4129 | 4133 | if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { |
4130 | 4134 | retval = -ENOMEM; |
4131 | 4135 | goto out_put_task; |
... | ... | @@ -4769,11 +4773,6 @@ |
4769 | 4773 | goto out; |
4770 | 4774 | |
4771 | 4775 | if (!cpumask_intersects(new_mask, cpu_active_mask)) { |
4772 | - ret = -EINVAL; | |
4773 | - goto out; | |
4774 | - } | |
4775 | - | |
4776 | - if (unlikely((p->flags & PF_THREAD_BOUND) && p != current)) { | |
4777 | 4776 | ret = -EINVAL; |
4778 | 4777 | goto out; |
4779 | 4778 | } |
kernel/workqueue.c
... | ... | @@ -1757,12 +1757,8 @@ |
1757 | 1757 | set_user_nice(worker->task, pool->attrs->nice); |
1758 | 1758 | set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask); |
1759 | 1759 | |
1760 | - /* | |
1761 | - * %PF_THREAD_BOUND is used to prevent userland from meddling with | |
1762 | - * cpumask of workqueue workers. This is an abuse. We need | |
1763 | - * %PF_NO_SETAFFINITY. | |
1764 | - */ | |
1765 | - worker->task->flags |= PF_THREAD_BOUND; | |
1760 | + /* prevent userland from meddling with cpumask of workqueue workers */ | |
1761 | + worker->task->flags |= PF_NO_SETAFFINITY; | |
1766 | 1762 | |
1767 | 1763 | /* |
1768 | 1764 | * The caller is responsible for ensuring %POOL_DISASSOCIATED |
... | ... | @@ -3876,7 +3872,7 @@ |
3876 | 3872 | } |
3877 | 3873 | |
3878 | 3874 | wq->rescuer = rescuer; |
3879 | - rescuer->task->flags |= PF_THREAD_BOUND; | |
3875 | + rescuer->task->flags |= PF_NO_SETAFFINITY; | |
3880 | 3876 | wake_up_process(rescuer->task); |
3881 | 3877 | } |
3882 | 3878 |