Commit 6111da3432b10b2c56a21a5d8671aee46435326d

Authored by Linus Torvalds

Merge branch 'for-3.17-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull cgroup fixes from Tejun Heo:
 "This is quite late but these need to be backported anyway.

  This is the fix for a long-standing cpuset bug which existed from
  2009.  cpuset makes use of PF_SPREAD_{PAGE|SLAB} flags to modify the
  task's memory allocation behavior according to the settings of the
  cpuset it belongs to; unfortunately, when those flags have to be
  changed, cpuset did so directly even whlie the target task is running,
  which is obviously racy as task->flags may be modified by the task
  itself at any time.  This obscure bug manifested as corrupt
  PF_USED_MATH flag leading to a weird crash.

  The bug is fixed by moving the flag to task->atomic_flags.  The first
  two are prepatory ones to help defining atomic_flags accessors and the
  third one is the actual fix"

* 'for-3.17-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cpuset: PF_SPREAD_PAGE and PF_SPREAD_SLAB should be atomic flags
  sched: add macros to define bitops for task atomic flags
  sched: fix confusing PFA_NO_NEW_PRIVS constant

Showing 6 changed files Side-by-side Diff

Documentation/cgroups/cpusets.txt
... ... @@ -345,14 +345,14 @@
345 345 The implementation is simple.
346 346  
347 347 Setting the flag 'cpuset.memory_spread_page' turns on a per-process flag
348   -PF_SPREAD_PAGE for each task that is in that cpuset or subsequently
  348 +PFA_SPREAD_PAGE for each task that is in that cpuset or subsequently
349 349 joins that cpuset. The page allocation calls for the page cache
350   -is modified to perform an inline check for this PF_SPREAD_PAGE task
  350 +is modified to perform an inline check for this PFA_SPREAD_PAGE task
351 351 flag, and if set, a call to a new routine cpuset_mem_spread_node()
352 352 returns the node to prefer for the allocation.
353 353  
354 354 Similarly, setting 'cpuset.memory_spread_slab' turns on the flag
355   -PF_SPREAD_SLAB, and appropriately marked slab caches will allocate
  355 +PFA_SPREAD_SLAB, and appropriately marked slab caches will allocate
356 356 pages from the node returned by cpuset_mem_spread_node().
357 357  
358 358 The cpuset_mem_spread_node() routine is also simple. It uses the
include/linux/cpuset.h
... ... @@ -93,12 +93,12 @@
93 93  
94 94 static inline int cpuset_do_page_mem_spread(void)
95 95 {
96   - return current->flags & PF_SPREAD_PAGE;
  96 + return task_spread_page(current);
97 97 }
98 98  
99 99 static inline int cpuset_do_slab_mem_spread(void)
100 100 {
101   - return current->flags & PF_SPREAD_SLAB;
  101 + return task_spread_slab(current);
102 102 }
103 103  
104 104 extern int current_cpuset_is_being_rebound(void);
include/linux/sched.h
... ... @@ -1903,8 +1903,6 @@
1903 1903 #define PF_KTHREAD 0x00200000 /* I am a kernel thread */
1904 1904 #define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */
1905 1905 #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
1906   -#define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */
1907   -#define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */
1908 1906 #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */
1909 1907 #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
1910 1908 #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */
1911 1909  
1912 1910  
... ... @@ -1957,17 +1955,31 @@
1957 1955 }
1958 1956  
1959 1957 /* Per-process atomic flags. */
1960   -#define PFA_NO_NEW_PRIVS 0x00000001 /* May not gain new privileges. */
  1958 +#define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */
  1959 +#define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */
  1960 +#define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */
1961 1961  
1962   -static inline bool task_no_new_privs(struct task_struct *p)
1963   -{
1964   - return test_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags);
1965   -}
1966 1962  
1967   -static inline void task_set_no_new_privs(struct task_struct *p)
1968   -{
1969   - set_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags);
1970   -}
  1963 +#define TASK_PFA_TEST(name, func) \
  1964 + static inline bool task_##func(struct task_struct *p) \
  1965 + { return test_bit(PFA_##name, &p->atomic_flags); }
  1966 +#define TASK_PFA_SET(name, func) \
  1967 + static inline void task_set_##func(struct task_struct *p) \
  1968 + { set_bit(PFA_##name, &p->atomic_flags); }
  1969 +#define TASK_PFA_CLEAR(name, func) \
  1970 + static inline void task_clear_##func(struct task_struct *p) \
  1971 + { clear_bit(PFA_##name, &p->atomic_flags); }
  1972 +
  1973 +TASK_PFA_TEST(NO_NEW_PRIVS, no_new_privs)
  1974 +TASK_PFA_SET(NO_NEW_PRIVS, no_new_privs)
  1975 +
  1976 +TASK_PFA_TEST(SPREAD_PAGE, spread_page)
  1977 +TASK_PFA_SET(SPREAD_PAGE, spread_page)
  1978 +TASK_PFA_CLEAR(SPREAD_PAGE, spread_page)
  1979 +
  1980 +TASK_PFA_TEST(SPREAD_SLAB, spread_slab)
  1981 +TASK_PFA_SET(SPREAD_SLAB, spread_slab)
  1982 +TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
1971 1983  
1972 1984 /*
1973 1985 * task->jobctl flags
... ... @@ -365,13 +365,14 @@
365 365 struct task_struct *tsk)
366 366 {
367 367 if (is_spread_page(cs))
368   - tsk->flags |= PF_SPREAD_PAGE;
  368 + task_set_spread_page(tsk);
369 369 else
370   - tsk->flags &= ~PF_SPREAD_PAGE;
  370 + task_clear_spread_page(tsk);
  371 +
371 372 if (is_spread_slab(cs))
372   - tsk->flags |= PF_SPREAD_SLAB;
  373 + task_set_spread_slab(tsk);
373 374 else
374   - tsk->flags &= ~PF_SPREAD_SLAB;
  375 + task_clear_spread_slab(tsk);
375 376 }
376 377  
377 378 /*
... ... @@ -2987,7 +2987,7 @@
2987 2987  
2988 2988 #ifdef CONFIG_NUMA
2989 2989 /*
2990   - * Try allocating on another node if PF_SPREAD_SLAB is a mempolicy is set.
  2990 + * Try allocating on another node if PFA_SPREAD_SLAB is a mempolicy is set.
2991 2991 *
2992 2992 * If we are in_interrupt, then process context, including cpusets and
2993 2993 * mempolicy, may not apply and should not be used for allocation policy.
... ... @@ -3219,7 +3219,7 @@
3219 3219 {
3220 3220 void *objp;
3221 3221  
3222   - if (current->mempolicy || unlikely(current->flags & PF_SPREAD_SLAB)) {
  3222 + if (current->mempolicy || cpuset_do_slab_mem_spread()) {
3223 3223 objp = alternate_node_alloc(cache, flags);
3224 3224 if (objp)
3225 3225 goto out;
... ... @@ -197,6 +197,9 @@
197 197 --regex-c++='/SETPCGFLAG\(([^,)]*).*/SetPageCgroup\1/' \
198 198 --regex-c++='/CLEARPCGFLAG\(([^,)]*).*/ClearPageCgroup\1/' \
199 199 --regex-c++='/TESTCLEARPCGFLAG\(([^,)]*).*/TestClearPageCgroup\1/' \
  200 + --regex-c++='/TASK_PFA_TEST\([^,]*,\s*([^)]*)\)/task_\1/' \
  201 + --regex-c++='/TASK_PFA_SET\([^,]*,\s*([^)]*)\)/task_set_\1/' \
  202 + --regex-c++='/TASK_PFA_CLEAR\([^,]*,\s*([^)]*)\)/task_clear_\1/'\
200 203 --regex-c='/PCI_OP_READ\((\w*).*[1-4]\)/pci_bus_read_config_\1/' \
201 204 --regex-c='/PCI_OP_WRITE\((\w*).*[1-4]\)/pci_bus_write_config_\1/' \
202 205 --regex-c='/DEFINE_(MUTEX|SEMAPHORE|SPINLOCK)\((\w*)/\2/v/' \
... ... @@ -260,6 +263,9 @@
260 263 --regex='/SETPCGFLAG\(([^,)]*).*/SetPageCgroup\1/' \
261 264 --regex='/CLEARPCGFLAG\(([^,)]*).*/ClearPageCgroup\1/' \
262 265 --regex='/TESTCLEARPCGFLAG\(([^,)]*).*/TestClearPageCgroup\1/' \
  266 + --regex='/TASK_PFA_TEST\([^,]*,\s*([^)]*)\)/task_\1/' \
  267 + --regex='/TASK_PFA_SET\([^,]*,\s*([^)]*)\)/task_set_\1/' \
  268 + --regex='/TASK_PFA_CLEAR\([^,]*,\s*([^)]*)\)/task_clear_\1/' \
263 269 --regex='/_PE(\([^,)]*\).*/PEVENT_ERRNO__\1/' \
264 270 --regex='/PCI_OP_READ(\([a-z]*[a-z]\).*[1-4])/pci_bus_read_config_\1/' \
265 271 --regex='/PCI_OP_WRITE(\([a-z]*[a-z]\).*[1-4])/pci_bus_write_config_\1/'\