Commit 14a40ffccd6163bbcd1d6f32b28a88ffe6149fc6

Authored by Tejun Heo
1 parent 2e109a2855

sched: replace PF_THREAD_BOUND with PF_NO_SETAFFINITY

PF_THREAD_BOUND was originally used to mark kernel threads which were
bound to a specific CPU using kthread_bind() and a task with the flag
set allows cpus_allowed modifications only to itself.  Workqueue is
currently abusing it to prevent userland from meddling with
cpus_allowed of workqueue workers.

What we need is a flag to prevent userland from messing with
cpus_allowed of certain kernel tasks.  In kernel, anyone can
(incorrectly) squash the flag, and, for worker-type usages,
restricting cpus_allowed modification to the task itself doesn't
provide meaningful extra proection as other tasks can inject work
items to the task anyway.

This patch replaces PF_THREAD_BOUND with PF_NO_SETAFFINITY.
sched_setaffinity() checks the flag and return -EINVAL if set.
set_cpus_allowed_ptr() is no longer affected by the flag.

This will allow simplifying workqueue worker CPU affinity management.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>

Showing 6 changed files with 19 additions and 24 deletions Side-by-side Diff

include/linux/sched.h
... ... @@ -1793,7 +1793,7 @@
1793 1793 #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
1794 1794 #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */
1795 1795 #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */
1796   -#define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */
  1796 +#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */
1797 1797 #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
1798 1798 #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */
1799 1799 #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */
... ... @@ -2224,11 +2224,11 @@
2224 2224 tsk = tsk->group_leader;
2225 2225  
2226 2226 /*
2227   - * Workqueue threads may acquire PF_THREAD_BOUND and become
  2227 + * Workqueue threads may acquire PF_NO_SETAFFINITY and become
2228 2228 * trapped in a cpuset, or RT worker may be born in a cgroup
2229 2229 * with no rt_runtime allocated. Just say no.
2230 2230 */
2231   - if (tsk == kthreadd_task || (tsk->flags & PF_THREAD_BOUND)) {
  2231 + if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) {
2232 2232 ret = -EINVAL;
2233 2233 rcu_read_unlock();
2234 2234 goto out_unlock_cgroup;
... ... @@ -1388,16 +1388,16 @@
1388 1388  
1389 1389 cgroup_taskset_for_each(task, cgrp, tset) {
1390 1390 /*
1391   - * Kthreads bound to specific cpus cannot be moved to a new
1392   - * cpuset; we cannot change their cpu affinity and
1393   - * isolating such threads by their set of allowed nodes is
1394   - * unnecessary. Thus, cpusets are not applicable for such
1395   - * threads. This prevents checking for success of
1396   - * set_cpus_allowed_ptr() on all attached tasks before
1397   - * cpus_allowed may be changed.
  1391 + * Kthreads which disallow setaffinity shouldn't be moved
  1392 + * to a new cpuset; we don't want to change their cpu
  1393 + * affinity and isolating such threads by their set of
  1394 + * allowed nodes is unnecessary. Thus, cpusets are not
  1395 + * applicable for such threads. This prevents checking for
  1396 + * success of set_cpus_allowed_ptr() on all attached tasks
  1397 + * before cpus_allowed may be changed.
1398 1398 */
1399 1399 ret = -EINVAL;
1400   - if (task->flags & PF_THREAD_BOUND)
  1400 + if (task->flags & PF_NO_SETAFFINITY)
1401 1401 goto out_unlock;
1402 1402 ret = security_task_setscheduler(task);
1403 1403 if (ret)
... ... @@ -260,7 +260,7 @@
260 260 {
261 261 /* It's safe because the task is inactive. */
262 262 do_set_cpus_allowed(p, cpumask_of(cpu));
263   - p->flags |= PF_THREAD_BOUND;
  263 + p->flags |= PF_NO_SETAFFINITY;
264 264 }
265 265  
266 266 /**
... ... @@ -4126,6 +4126,10 @@
4126 4126 get_task_struct(p);
4127 4127 rcu_read_unlock();
4128 4128  
  4129 + if (p->flags & PF_NO_SETAFFINITY) {
  4130 + retval = -EINVAL;
  4131 + goto out_put_task;
  4132 + }
4129 4133 if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
4130 4134 retval = -ENOMEM;
4131 4135 goto out_put_task;
... ... @@ -4769,11 +4773,6 @@
4769 4773 goto out;
4770 4774  
4771 4775 if (!cpumask_intersects(new_mask, cpu_active_mask)) {
4772   - ret = -EINVAL;
4773   - goto out;
4774   - }
4775   -
4776   - if (unlikely((p->flags & PF_THREAD_BOUND) && p != current)) {
4777 4776 ret = -EINVAL;
4778 4777 goto out;
4779 4778 }
... ... @@ -1757,12 +1757,8 @@
1757 1757 set_user_nice(worker->task, pool->attrs->nice);
1758 1758 set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
1759 1759  
1760   - /*
1761   - * %PF_THREAD_BOUND is used to prevent userland from meddling with
1762   - * cpumask of workqueue workers. This is an abuse. We need
1763   - * %PF_NO_SETAFFINITY.
1764   - */
1765   - worker->task->flags |= PF_THREAD_BOUND;
  1760 + /* prevent userland from meddling with cpumask of workqueue workers */
  1761 + worker->task->flags |= PF_NO_SETAFFINITY;
1766 1762  
1767 1763 /*
1768 1764 * The caller is responsible for ensuring %POOL_DISASSOCIATED
... ... @@ -3876,7 +3872,7 @@
3876 3872 }
3877 3873  
3878 3874 wq->rescuer = rescuer;
3879   - rescuer->task->flags |= PF_THREAD_BOUND;
  3875 + rescuer->task->flags |= PF_NO_SETAFFINITY;
3880 3876 wake_up_process(rescuer->task);
3881 3877 }
3882 3878