Commit 7ddf96b02fe8dd441f452deef879040def5f7b34

Authored by Srivatsa S. Bhat
Committed by Ingo Molnar
1 parent 80d1fa6463

cpusets, hotplug: Restructure functions that are invoked during hotplug

Separate out the cpuset related handling for CPU/Memory online/offline.
This also helps us exploit the most obvious and basic level of optimization
that any notification mechanism (CPU/Mem online/offline) has to offer us:
"We *know* why we have been invoked. So stop pretending that we are lost,
and do only the necessary amount of processing!".

And while at it, rename scan_for_empty_cpusets() to
scan_cpusets_upon_hotplug(), which is more appropriate considering how
it is restructured.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20120524141650.3692.48637.stgit@srivatsabhat.in.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

Showing 3 changed files with 65 additions and 31 deletions Side-by-side Diff

include/linux/cpuset.h
... ... @@ -20,7 +20,7 @@
20 20  
21 21 extern int cpuset_init(void);
22 22 extern void cpuset_init_smp(void);
23   -extern void cpuset_update_active_cpus(void);
  23 +extern void cpuset_update_active_cpus(bool cpu_online);
24 24 extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
25 25 extern void cpuset_cpus_allowed_fallback(struct task_struct *p);
26 26 extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
... ... @@ -124,7 +124,7 @@
124 124 static inline int cpuset_init(void) { return 0; }
125 125 static inline void cpuset_init_smp(void) {}
126 126  
127   -static inline void cpuset_update_active_cpus(void)
  127 +static inline void cpuset_update_active_cpus(bool cpu_online)
128 128 {
129 129 partition_sched_domains(1, NULL, NULL);
130 130 }
... ... @@ -147,6 +147,12 @@
147 147 CS_SPREAD_SLAB,
148 148 } cpuset_flagbits_t;
149 149  
  150 +/* the type of hotplug event */
  151 +enum hotplug_event {
  152 + CPUSET_CPU_OFFLINE,
  153 + CPUSET_MEM_OFFLINE,
  154 +};
  155 +
150 156 /* convenient tests for these bits */
151 157 static inline int is_cpu_exclusive(const struct cpuset *cs)
152 158 {
... ... @@ -2016,8 +2022,10 @@
2016 2022  
2017 2023  
2018 2024 /*
2019   - * Walk the specified cpuset subtree and look for empty cpusets.
2020   - * The tasks of such cpuset must be moved to a parent cpuset.
  2025 + * Walk the specified cpuset subtree upon a hotplug operation (CPU/Memory
  2026 + * online/offline) and update the cpusets accordingly.
  2027 + * For regular CPU/Mem hotplug, look for empty cpusets; the tasks of such
  2028 + * cpuset must be moved to a parent cpuset.
2021 2029 *
2022 2030 * Called with cgroup_mutex held. We take callback_mutex to modify
2023 2031 * cpus_allowed and mems_allowed.
2024 2032  
2025 2033  
2026 2034  
2027 2035  
2028 2036  
2029 2037  
2030 2038  
... ... @@ -2030,38 +2038,58 @@
2030 2038 * that has tasks along with an empty 'mems'. But if we did see such
2031 2039 * a cpuset, we'd handle it just like we do if its 'cpus' was empty.
2032 2040 */
2033   -static void scan_for_empty_cpusets(struct cpuset *root)
  2041 +static void
  2042 +scan_cpusets_upon_hotplug(struct cpuset *root, enum hotplug_event event)
2034 2043 {
2035 2044 LIST_HEAD(queue);
2036   - struct cpuset *cp; /* scans cpusets being updated */
  2045 + struct cpuset *cp; /* scans cpusets being updated */
2037 2046 static nodemask_t oldmems; /* protected by cgroup_mutex */
2038 2047  
2039 2048 list_add_tail((struct list_head *)&root->stack_list, &queue);
2040 2049  
2041   - while ((cp = cpuset_next(&queue)) != NULL) {
  2050 + switch (event) {
  2051 + case CPUSET_CPU_OFFLINE:
  2052 + while ((cp = cpuset_next(&queue)) != NULL) {
2042 2053  
2043   - /* Continue past cpusets with all cpus, mems online */
2044   - if (cpumask_subset(cp->cpus_allowed, cpu_active_mask) &&
2045   - nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
2046   - continue;
  2054 + /* Continue past cpusets with all cpus online */
  2055 + if (cpumask_subset(cp->cpus_allowed, cpu_active_mask))
  2056 + continue;
2047 2057  
2048   - oldmems = cp->mems_allowed;
  2058 + /* Remove offline cpus from this cpuset. */
  2059 + mutex_lock(&callback_mutex);
  2060 + cpumask_and(cp->cpus_allowed, cp->cpus_allowed,
  2061 + cpu_active_mask);
  2062 + mutex_unlock(&callback_mutex);
2049 2063  
2050   - /* Remove offline cpus and mems from this cpuset. */
2051   - mutex_lock(&callback_mutex);
2052   - cpumask_and(cp->cpus_allowed, cp->cpus_allowed,
2053   - cpu_active_mask);
2054   - nodes_and(cp->mems_allowed, cp->mems_allowed,
  2064 + /* Move tasks from the empty cpuset to a parent */
  2065 + if (cpumask_empty(cp->cpus_allowed))
  2066 + remove_tasks_in_empty_cpuset(cp);
  2067 + else
  2068 + update_tasks_cpumask(cp, NULL);
  2069 + }
  2070 + break;
  2071 +
  2072 + case CPUSET_MEM_OFFLINE:
  2073 + while ((cp = cpuset_next(&queue)) != NULL) {
  2074 +
  2075 + /* Continue past cpusets with all mems online */
  2076 + if (nodes_subset(cp->mems_allowed,
  2077 + node_states[N_HIGH_MEMORY]))
  2078 + continue;
  2079 +
  2080 + oldmems = cp->mems_allowed;
  2081 +
  2082 + /* Remove offline mems from this cpuset. */
  2083 + mutex_lock(&callback_mutex);
  2084 + nodes_and(cp->mems_allowed, cp->mems_allowed,
2055 2085 node_states[N_HIGH_MEMORY]);
2056   - mutex_unlock(&callback_mutex);
  2086 + mutex_unlock(&callback_mutex);
2057 2087  
2058   - /* Move tasks from the empty cpuset to a parent */
2059   - if (cpumask_empty(cp->cpus_allowed) ||
2060   - nodes_empty(cp->mems_allowed))
2061   - remove_tasks_in_empty_cpuset(cp);
2062   - else {
2063   - update_tasks_cpumask(cp, NULL);
2064   - update_tasks_nodemask(cp, &oldmems, NULL);
  2088 + /* Move tasks from the empty cpuset to a parent */
  2089 + if (nodes_empty(cp->mems_allowed))
  2090 + remove_tasks_in_empty_cpuset(cp);
  2091 + else
  2092 + update_tasks_nodemask(cp, &oldmems, NULL);
2065 2093 }
2066 2094 }
2067 2095 }
2068 2096  
... ... @@ -2080,8 +2108,11 @@
2080 2108 *
2081 2109 * Called within get_online_cpus(). Needs to call cgroup_lock()
2082 2110 * before calling generate_sched_domains().
  2111 + *
  2112 + * @cpu_online: Indicates whether this is a CPU online event (true) or
  2113 + * a CPU offline event (false).
2083 2114 */
2084   -void cpuset_update_active_cpus(void)
  2115 +void cpuset_update_active_cpus(bool cpu_online)
2085 2116 {
2086 2117 struct sched_domain_attr *attr;
2087 2118 cpumask_var_t *doms;
... ... @@ -2091,7 +2122,10 @@
2091 2122 mutex_lock(&callback_mutex);
2092 2123 cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
2093 2124 mutex_unlock(&callback_mutex);
2094   - scan_for_empty_cpusets(&top_cpuset);
  2125 +
  2126 + if (!cpu_online)
  2127 + scan_cpusets_upon_hotplug(&top_cpuset, CPUSET_CPU_OFFLINE);
  2128 +
2095 2129 ndoms = generate_sched_domains(&doms, &attr);
2096 2130 cgroup_unlock();
2097 2131  
2098 2132  
... ... @@ -2122,9 +2156,9 @@
2122 2156 case MEM_OFFLINE:
2123 2157 /*
2124 2158 * needn't update top_cpuset.mems_allowed explicitly because
2125   - * scan_for_empty_cpusets() will update it.
  2159 + * scan_cpusets_upon_hotplug() will update it.
2126 2160 */
2127   - scan_for_empty_cpusets(&top_cpuset);
  2161 + scan_cpusets_upon_hotplug(&top_cpuset, CPUSET_MEM_OFFLINE);
2128 2162 break;
2129 2163 default:
2130 2164 break;
... ... @@ -7134,7 +7134,7 @@
7134 7134  
7135 7135 case CPU_ONLINE:
7136 7136 case CPU_DOWN_FAILED:
7137   - cpuset_update_active_cpus();
  7137 + cpuset_update_active_cpus(true);
7138 7138 break;
7139 7139 default:
7140 7140 return NOTIFY_DONE;
... ... @@ -7147,7 +7147,7 @@
7147 7147 {
7148 7148 switch (action) {
7149 7149 case CPU_DOWN_PREPARE:
7150   - cpuset_update_active_cpus();
  7150 + cpuset_update_active_cpus(false);
7151 7151 break;
7152 7152 case CPU_DOWN_PREPARE_FROZEN:
7153 7153 num_cpus_frozen++;