Commit aa95387774039096c11803c04011f1aa42d85758

Authored by Linus Torvalds
1 parent 2cd7cbdf4b

cpu hotplug: simplify and hopefully fix locking

The CPU hotplug locking was quite messy, with a recursive lock to
handle the fact that both the actual up/down sequence wanted to
protect itself from being re-entered, but the callbacks that it
called also tended to want to protect themselves from CPU events.

This splits the lock into two (one to serialize the whole hotplug
sequence, the other to protect against the CPU present bitmaps
changing). The latter still allows recursive usage because some
subsystems (ondemand policy for cpufreq at least) had already gotten
too used to the lax locking, but the locking mistakes are hopefully
now less fundamental, and we now warn about recursive lock usage
when we see it, in the hope that it can be fixed.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>

Showing 2 changed files with 34 additions and 47 deletions Side-by-side Diff

... ... @@ -48,7 +48,6 @@
48 48 {
49 49 }
50 50 #endif
51   -extern int current_in_cpu_hotplug(void);
52 51  
53 52 int cpu_up(unsigned int cpu);
54 53  
... ... @@ -61,10 +60,6 @@
61 60 static inline void unregister_cpu_notifier(struct notifier_block *nb)
62 61 {
63 62 }
64   -static inline int current_in_cpu_hotplug(void)
65   -{
66   - return 0;
67   -}
68 63  
69 64 #endif /* CONFIG_SMP */
70 65 extern struct sysdev_class cpu_sysdev_class;
... ... @@ -73,7 +68,6 @@
73 68 /* Stop CPUs going up and down. */
74 69 extern void lock_cpu_hotplug(void);
75 70 extern void unlock_cpu_hotplug(void);
76   -extern int lock_cpu_hotplug_interruptible(void);
77 71 #define hotcpu_notifier(fn, pri) { \
78 72 static struct notifier_block fn##_nb = \
79 73 { .notifier_call = fn, .priority = pri }; \
... ... @@ -16,56 +16,48 @@
16 16 #include <linux/mutex.h>
17 17  
18 18 /* This protects CPUs going up and down... */
19   -static DEFINE_MUTEX(cpucontrol);
  19 +static DEFINE_MUTEX(cpu_add_remove_lock);
  20 +static DEFINE_MUTEX(cpu_bitmask_lock);
20 21  
21 22 static __cpuinitdata BLOCKING_NOTIFIER_HEAD(cpu_chain);
22 23  
23 24 #ifdef CONFIG_HOTPLUG_CPU
24   -static struct task_struct *lock_cpu_hotplug_owner;
25   -static int lock_cpu_hotplug_depth;
26 25  
27   -static int __lock_cpu_hotplug(int interruptible)
  26 +/* Crappy recursive lock-takers in cpufreq! Complain loudly about idiots */
  27 +static struct task_struct *recursive;
  28 +static int recursive_depth;
  29 +
  30 +void lock_cpu_hotplug(void)
28 31 {
29   - int ret = 0;
  32 + struct task_struct *tsk = current;
30 33  
31   - if (lock_cpu_hotplug_owner != current) {
32   - if (interruptible)
33   - ret = mutex_lock_interruptible(&cpucontrol);
34   - else
35   - mutex_lock(&cpucontrol);
  34 + if (tsk == recursive) {
  35 + static int warnings = 10;
  36 + if (warnings) {
  37 + printk(KERN_ERR "Lukewarm IQ detected in hotplug locking\n");
  38 + WARN_ON(1);
  39 + warnings--;
  40 + }
  41 + recursive_depth++;
  42 + return;
36 43 }
37   -
38   - /*
39   - * Set only if we succeed in locking
40   - */
41   - if (!ret) {
42   - lock_cpu_hotplug_depth++;
43   - lock_cpu_hotplug_owner = current;
44   - }
45   -
46   - return ret;
  44 + mutex_lock(&cpu_bitmask_lock);
  45 + recursive = tsk;
47 46 }
48   -
49   -void lock_cpu_hotplug(void)
50   -{
51   - __lock_cpu_hotplug(0);
52   -}
53 47 EXPORT_SYMBOL_GPL(lock_cpu_hotplug);
54 48  
55 49 void unlock_cpu_hotplug(void)
56 50 {
57   - if (--lock_cpu_hotplug_depth == 0) {
58   - lock_cpu_hotplug_owner = NULL;
59   - mutex_unlock(&cpucontrol);
  51 + WARN_ON(recursive != current);
  52 + if (recursive_depth) {
  53 + recursive_depth--;
  54 + return;
60 55 }
  56 + mutex_unlock(&cpu_bitmask_lock);
  57 + recursive = NULL;
61 58 }
62 59 EXPORT_SYMBOL_GPL(unlock_cpu_hotplug);
63 60  
64   -int lock_cpu_hotplug_interruptible(void)
65   -{
66   - return __lock_cpu_hotplug(1);
67   -}
68   -EXPORT_SYMBOL_GPL(lock_cpu_hotplug_interruptible);
69 61 #endif /* CONFIG_HOTPLUG_CPU */
70 62  
71 63 /* Need to know about CPUs going up/down? */
... ... @@ -122,9 +114,7 @@
122 114 struct task_struct *p;
123 115 cpumask_t old_allowed, tmp;
124 116  
125   - if ((err = lock_cpu_hotplug_interruptible()) != 0)
126   - return err;
127   -
  117 + mutex_lock(&cpu_add_remove_lock);
128 118 if (num_online_cpus() == 1) {
129 119 err = -EBUSY;
130 120 goto out;
131 121  
... ... @@ -150,7 +140,10 @@
150 140 cpu_clear(cpu, tmp);
151 141 set_cpus_allowed(current, tmp);
152 142  
  143 + mutex_lock(&cpu_bitmask_lock);
153 144 p = __stop_machine_run(take_cpu_down, NULL, cpu);
  145 + mutex_unlock(&cpu_bitmask_lock);
  146 +
154 147 if (IS_ERR(p)) {
155 148 /* CPU didn't die: tell everyone. Can't complain. */
156 149 if (blocking_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED,
... ... @@ -187,7 +180,7 @@
187 180 out_allowed:
188 181 set_cpus_allowed(current, old_allowed);
189 182 out:
190   - unlock_cpu_hotplug();
  183 + mutex_unlock(&cpu_add_remove_lock);
191 184 return err;
192 185 }
193 186 #endif /*CONFIG_HOTPLUG_CPU*/
... ... @@ -197,9 +190,7 @@
197 190 int ret;
198 191 void *hcpu = (void *)(long)cpu;
199 192  
200   - if ((ret = lock_cpu_hotplug_interruptible()) != 0)
201   - return ret;
202   -
  193 + mutex_lock(&cpu_add_remove_lock);
203 194 if (cpu_online(cpu) || !cpu_present(cpu)) {
204 195 ret = -EINVAL;
205 196 goto out;
206 197  
... ... @@ -214,7 +205,9 @@
214 205 }
215 206  
216 207 /* Arch-specific enabling code. */
  208 + mutex_lock(&cpu_bitmask_lock);
217 209 ret = __cpu_up(cpu);
  210 + mutex_unlock(&cpu_bitmask_lock);
218 211 if (ret != 0)
219 212 goto out_notify;
220 213 BUG_ON(!cpu_online(cpu));
... ... @@ -227,7 +220,7 @@
227 220 blocking_notifier_call_chain(&cpu_chain,
228 221 CPU_UP_CANCELED, hcpu);
229 222 out:
230   - unlock_cpu_hotplug();
  223 + mutex_unlock(&cpu_add_remove_lock);
231 224 return ret;
232 225 }