Commit 6111da3432b10b2c56a21a5d8671aee46435326d
Exists in
ti-lsk-linux-4.1.y
and in
10 other branches
Merge branch 'for-3.17-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup fixes from Tejun Heo: "This is quite late but these need to be backported anyway. This is the fix for a long-standing cpuset bug which existed from 2009. cpuset makes use of PF_SPREAD_{PAGE|SLAB} flags to modify the task's memory allocation behavior according to the settings of the cpuset it belongs to; unfortunately, when those flags have to be changed, cpuset did so directly even whlie the target task is running, which is obviously racy as task->flags may be modified by the task itself at any time. This obscure bug manifested as corrupt PF_USED_MATH flag leading to a weird crash. The bug is fixed by moving the flag to task->atomic_flags. The first two are prepatory ones to help defining atomic_flags accessors and the third one is the actual fix" * 'for-3.17-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: cpuset: PF_SPREAD_PAGE and PF_SPREAD_SLAB should be atomic flags sched: add macros to define bitops for task atomic flags sched: fix confusing PFA_NO_NEW_PRIVS constant
Showing 6 changed files Side-by-side Diff
Documentation/cgroups/cpusets.txt
... | ... | @@ -345,14 +345,14 @@ |
345 | 345 | The implementation is simple. |
346 | 346 | |
347 | 347 | Setting the flag 'cpuset.memory_spread_page' turns on a per-process flag |
348 | -PF_SPREAD_PAGE for each task that is in that cpuset or subsequently | |
348 | +PFA_SPREAD_PAGE for each task that is in that cpuset or subsequently | |
349 | 349 | joins that cpuset. The page allocation calls for the page cache |
350 | -is modified to perform an inline check for this PF_SPREAD_PAGE task | |
350 | +is modified to perform an inline check for this PFA_SPREAD_PAGE task | |
351 | 351 | flag, and if set, a call to a new routine cpuset_mem_spread_node() |
352 | 352 | returns the node to prefer for the allocation. |
353 | 353 | |
354 | 354 | Similarly, setting 'cpuset.memory_spread_slab' turns on the flag |
355 | -PF_SPREAD_SLAB, and appropriately marked slab caches will allocate | |
355 | +PFA_SPREAD_SLAB, and appropriately marked slab caches will allocate | |
356 | 356 | pages from the node returned by cpuset_mem_spread_node(). |
357 | 357 | |
358 | 358 | The cpuset_mem_spread_node() routine is also simple. It uses the |
include/linux/cpuset.h
... | ... | @@ -93,12 +93,12 @@ |
93 | 93 | |
94 | 94 | static inline int cpuset_do_page_mem_spread(void) |
95 | 95 | { |
96 | - return current->flags & PF_SPREAD_PAGE; | |
96 | + return task_spread_page(current); | |
97 | 97 | } |
98 | 98 | |
99 | 99 | static inline int cpuset_do_slab_mem_spread(void) |
100 | 100 | { |
101 | - return current->flags & PF_SPREAD_SLAB; | |
101 | + return task_spread_slab(current); | |
102 | 102 | } |
103 | 103 | |
104 | 104 | extern int current_cpuset_is_being_rebound(void); |
include/linux/sched.h
... | ... | @@ -1903,8 +1903,6 @@ |
1903 | 1903 | #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ |
1904 | 1904 | #define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */ |
1905 | 1905 | #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ |
1906 | -#define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ | |
1907 | -#define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ | |
1908 | 1906 | #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */ |
1909 | 1907 | #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ |
1910 | 1908 | #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ |
1911 | 1909 | |
1912 | 1910 | |
... | ... | @@ -1957,17 +1955,31 @@ |
1957 | 1955 | } |
1958 | 1956 | |
1959 | 1957 | /* Per-process atomic flags. */ |
1960 | -#define PFA_NO_NEW_PRIVS 0x00000001 /* May not gain new privileges. */ | |
1958 | +#define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */ | |
1959 | +#define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ | |
1960 | +#define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ | |
1961 | 1961 | |
1962 | -static inline bool task_no_new_privs(struct task_struct *p) | |
1963 | -{ | |
1964 | - return test_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags); | |
1965 | -} | |
1966 | 1962 | |
1967 | -static inline void task_set_no_new_privs(struct task_struct *p) | |
1968 | -{ | |
1969 | - set_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags); | |
1970 | -} | |
1963 | +#define TASK_PFA_TEST(name, func) \ | |
1964 | + static inline bool task_##func(struct task_struct *p) \ | |
1965 | + { return test_bit(PFA_##name, &p->atomic_flags); } | |
1966 | +#define TASK_PFA_SET(name, func) \ | |
1967 | + static inline void task_set_##func(struct task_struct *p) \ | |
1968 | + { set_bit(PFA_##name, &p->atomic_flags); } | |
1969 | +#define TASK_PFA_CLEAR(name, func) \ | |
1970 | + static inline void task_clear_##func(struct task_struct *p) \ | |
1971 | + { clear_bit(PFA_##name, &p->atomic_flags); } | |
1972 | + | |
1973 | +TASK_PFA_TEST(NO_NEW_PRIVS, no_new_privs) | |
1974 | +TASK_PFA_SET(NO_NEW_PRIVS, no_new_privs) | |
1975 | + | |
1976 | +TASK_PFA_TEST(SPREAD_PAGE, spread_page) | |
1977 | +TASK_PFA_SET(SPREAD_PAGE, spread_page) | |
1978 | +TASK_PFA_CLEAR(SPREAD_PAGE, spread_page) | |
1979 | + | |
1980 | +TASK_PFA_TEST(SPREAD_SLAB, spread_slab) | |
1981 | +TASK_PFA_SET(SPREAD_SLAB, spread_slab) | |
1982 | +TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab) | |
1971 | 1983 | |
1972 | 1984 | /* |
1973 | 1985 | * task->jobctl flags |
kernel/cpuset.c
... | ... | @@ -365,13 +365,14 @@ |
365 | 365 | struct task_struct *tsk) |
366 | 366 | { |
367 | 367 | if (is_spread_page(cs)) |
368 | - tsk->flags |= PF_SPREAD_PAGE; | |
368 | + task_set_spread_page(tsk); | |
369 | 369 | else |
370 | - tsk->flags &= ~PF_SPREAD_PAGE; | |
370 | + task_clear_spread_page(tsk); | |
371 | + | |
371 | 372 | if (is_spread_slab(cs)) |
372 | - tsk->flags |= PF_SPREAD_SLAB; | |
373 | + task_set_spread_slab(tsk); | |
373 | 374 | else |
374 | - tsk->flags &= ~PF_SPREAD_SLAB; | |
375 | + task_clear_spread_slab(tsk); | |
375 | 376 | } |
376 | 377 | |
377 | 378 | /* |
mm/slab.c
... | ... | @@ -2987,7 +2987,7 @@ |
2987 | 2987 | |
2988 | 2988 | #ifdef CONFIG_NUMA |
2989 | 2989 | /* |
2990 | - * Try allocating on another node if PF_SPREAD_SLAB is a mempolicy is set. | |
2990 | + * Try allocating on another node if PFA_SPREAD_SLAB is a mempolicy is set. | |
2991 | 2991 | * |
2992 | 2992 | * If we are in_interrupt, then process context, including cpusets and |
2993 | 2993 | * mempolicy, may not apply and should not be used for allocation policy. |
... | ... | @@ -3219,7 +3219,7 @@ |
3219 | 3219 | { |
3220 | 3220 | void *objp; |
3221 | 3221 | |
3222 | - if (current->mempolicy || unlikely(current->flags & PF_SPREAD_SLAB)) { | |
3222 | + if (current->mempolicy || cpuset_do_slab_mem_spread()) { | |
3223 | 3223 | objp = alternate_node_alloc(cache, flags); |
3224 | 3224 | if (objp) |
3225 | 3225 | goto out; |
scripts/tags.sh
... | ... | @@ -197,6 +197,9 @@ |
197 | 197 | --regex-c++='/SETPCGFLAG\(([^,)]*).*/SetPageCgroup\1/' \ |
198 | 198 | --regex-c++='/CLEARPCGFLAG\(([^,)]*).*/ClearPageCgroup\1/' \ |
199 | 199 | --regex-c++='/TESTCLEARPCGFLAG\(([^,)]*).*/TestClearPageCgroup\1/' \ |
200 | + --regex-c++='/TASK_PFA_TEST\([^,]*,\s*([^)]*)\)/task_\1/' \ | |
201 | + --regex-c++='/TASK_PFA_SET\([^,]*,\s*([^)]*)\)/task_set_\1/' \ | |
202 | + --regex-c++='/TASK_PFA_CLEAR\([^,]*,\s*([^)]*)\)/task_clear_\1/'\ | |
200 | 203 | --regex-c='/PCI_OP_READ\((\w*).*[1-4]\)/pci_bus_read_config_\1/' \ |
201 | 204 | --regex-c='/PCI_OP_WRITE\((\w*).*[1-4]\)/pci_bus_write_config_\1/' \ |
202 | 205 | --regex-c='/DEFINE_(MUTEX|SEMAPHORE|SPINLOCK)\((\w*)/\2/v/' \ |
... | ... | @@ -260,6 +263,9 @@ |
260 | 263 | --regex='/SETPCGFLAG\(([^,)]*).*/SetPageCgroup\1/' \ |
261 | 264 | --regex='/CLEARPCGFLAG\(([^,)]*).*/ClearPageCgroup\1/' \ |
262 | 265 | --regex='/TESTCLEARPCGFLAG\(([^,)]*).*/TestClearPageCgroup\1/' \ |
266 | + --regex='/TASK_PFA_TEST\([^,]*,\s*([^)]*)\)/task_\1/' \ | |
267 | + --regex='/TASK_PFA_SET\([^,]*,\s*([^)]*)\)/task_set_\1/' \ | |
268 | + --regex='/TASK_PFA_CLEAR\([^,]*,\s*([^)]*)\)/task_clear_\1/' \ | |
263 | 269 | --regex='/_PE(\([^,)]*\).*/PEVENT_ERRNO__\1/' \ |
264 | 270 | --regex='/PCI_OP_READ(\([a-z]*[a-z]\).*[1-4])/pci_bus_read_config_\1/' \ |
265 | 271 | --regex='/PCI_OP_WRITE(\([a-z]*[a-z]\).*[1-4])/pci_bus_write_config_\1/'\ |