Commit a382bf934449ddeb625167537ae81daa0211b477
1 parent
a831881be2
Exists in
smarc-l5.0.0_1.0.0-ga
and in
5 other branches
nohz: Assign timekeeping duty to a CPU outside the full dynticks range
This way the full nohz CPUs can safely run with the tick stopped with a guarantee that somebody else is taking care of the jiffies and GTOD progression. Once the duty is attributed to a CPU, it won't change. Also that CPU can't enter into dyntick idle mode or be hot unplugged. This may later be improved from a power consumption POV. At least we should be able to share the duty amongst all CPUs outside the full dynticks range. Then the duty could even be shared with full dynticks CPUs when those can't stop their tick for any reason. But let's start with that very simple approach first. Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Chris Metcalf <cmetcalf@tilera.com> Cc: Christoph Lameter <cl@linux.com> Cc: Geoff Levand <geoff@infradead.org> Cc: Gilad Ben Yossef <gilad@benyossef.com> Cc: Hakan Akkan <hakanakkan@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Kevin Hilman <khilman@linaro.org> Cc: Li Zhong <zhong@linux.vnet.ibm.com> Cc: Namhyung Kim <namhyung.kim@lge.com> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Paul Gortmaker <paul.gortmaker@windriver.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Thomas Gleixner <tglx@linutronix.de> [fix have_nohz_full_mask offcase] Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Showing 3 changed files with 51 additions and 4 deletions Inline Diff
kernel/time/tick-broadcast.c
1 | /* | 1 | /* |
2 | * linux/kernel/time/tick-broadcast.c | 2 | * linux/kernel/time/tick-broadcast.c |
3 | * | 3 | * |
4 | * This file contains functions which emulate a local clock-event | 4 | * This file contains functions which emulate a local clock-event |
5 | * device via a broadcast event source. | 5 | * device via a broadcast event source. |
6 | * | 6 | * |
7 | * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> | 7 | * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> |
8 | * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar | 8 | * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar |
9 | * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner | 9 | * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner |
10 | * | 10 | * |
11 | * This code is licenced under the GPL version 2. For details see | 11 | * This code is licenced under the GPL version 2. For details see |
12 | * kernel-base/COPYING. | 12 | * kernel-base/COPYING. |
13 | */ | 13 | */ |
14 | #include <linux/cpu.h> | 14 | #include <linux/cpu.h> |
15 | #include <linux/err.h> | 15 | #include <linux/err.h> |
16 | #include <linux/hrtimer.h> | 16 | #include <linux/hrtimer.h> |
17 | #include <linux/interrupt.h> | 17 | #include <linux/interrupt.h> |
18 | #include <linux/percpu.h> | 18 | #include <linux/percpu.h> |
19 | #include <linux/profile.h> | 19 | #include <linux/profile.h> |
20 | #include <linux/sched.h> | 20 | #include <linux/sched.h> |
21 | #include <linux/smp.h> | 21 | #include <linux/smp.h> |
22 | 22 | ||
23 | #include "tick-internal.h" | 23 | #include "tick-internal.h" |
24 | 24 | ||
25 | /* | 25 | /* |
26 | * Broadcast support for broken x86 hardware, where the local apic | 26 | * Broadcast support for broken x86 hardware, where the local apic |
27 | * timer stops in C3 state. | 27 | * timer stops in C3 state. |
28 | */ | 28 | */ |
29 | 29 | ||
30 | static struct tick_device tick_broadcast_device; | 30 | static struct tick_device tick_broadcast_device; |
31 | /* FIXME: Use cpumask_var_t. */ | 31 | /* FIXME: Use cpumask_var_t. */ |
32 | static DECLARE_BITMAP(tick_broadcast_mask, NR_CPUS); | 32 | static DECLARE_BITMAP(tick_broadcast_mask, NR_CPUS); |
33 | static DECLARE_BITMAP(tmpmask, NR_CPUS); | 33 | static DECLARE_BITMAP(tmpmask, NR_CPUS); |
34 | static DEFINE_RAW_SPINLOCK(tick_broadcast_lock); | 34 | static DEFINE_RAW_SPINLOCK(tick_broadcast_lock); |
35 | static int tick_broadcast_force; | 35 | static int tick_broadcast_force; |
36 | 36 | ||
37 | #ifdef CONFIG_TICK_ONESHOT | 37 | #ifdef CONFIG_TICK_ONESHOT |
38 | static void tick_broadcast_clear_oneshot(int cpu); | 38 | static void tick_broadcast_clear_oneshot(int cpu); |
39 | #else | 39 | #else |
40 | static inline void tick_broadcast_clear_oneshot(int cpu) { } | 40 | static inline void tick_broadcast_clear_oneshot(int cpu) { } |
41 | #endif | 41 | #endif |
42 | 42 | ||
43 | /* | 43 | /* |
44 | * Debugging: see timer_list.c | 44 | * Debugging: see timer_list.c |
45 | */ | 45 | */ |
46 | struct tick_device *tick_get_broadcast_device(void) | 46 | struct tick_device *tick_get_broadcast_device(void) |
47 | { | 47 | { |
48 | return &tick_broadcast_device; | 48 | return &tick_broadcast_device; |
49 | } | 49 | } |
50 | 50 | ||
51 | struct cpumask *tick_get_broadcast_mask(void) | 51 | struct cpumask *tick_get_broadcast_mask(void) |
52 | { | 52 | { |
53 | return to_cpumask(tick_broadcast_mask); | 53 | return to_cpumask(tick_broadcast_mask); |
54 | } | 54 | } |
55 | 55 | ||
56 | /* | 56 | /* |
57 | * Start the device in periodic mode | 57 | * Start the device in periodic mode |
58 | */ | 58 | */ |
59 | static void tick_broadcast_start_periodic(struct clock_event_device *bc) | 59 | static void tick_broadcast_start_periodic(struct clock_event_device *bc) |
60 | { | 60 | { |
61 | if (bc) | 61 | if (bc) |
62 | tick_setup_periodic(bc, 1); | 62 | tick_setup_periodic(bc, 1); |
63 | } | 63 | } |
64 | 64 | ||
65 | /* | 65 | /* |
66 | * Check, if the device can be utilized as broadcast device: | 66 | * Check, if the device can be utilized as broadcast device: |
67 | */ | 67 | */ |
68 | int tick_check_broadcast_device(struct clock_event_device *dev) | 68 | int tick_check_broadcast_device(struct clock_event_device *dev) |
69 | { | 69 | { |
70 | if ((tick_broadcast_device.evtdev && | 70 | if ((tick_broadcast_device.evtdev && |
71 | tick_broadcast_device.evtdev->rating >= dev->rating) || | 71 | tick_broadcast_device.evtdev->rating >= dev->rating) || |
72 | (dev->features & CLOCK_EVT_FEAT_C3STOP)) | 72 | (dev->features & CLOCK_EVT_FEAT_C3STOP)) |
73 | return 0; | 73 | return 0; |
74 | 74 | ||
75 | clockevents_exchange_device(tick_broadcast_device.evtdev, dev); | 75 | clockevents_exchange_device(tick_broadcast_device.evtdev, dev); |
76 | tick_broadcast_device.evtdev = dev; | 76 | tick_broadcast_device.evtdev = dev; |
77 | if (!cpumask_empty(tick_get_broadcast_mask())) | 77 | if (!cpumask_empty(tick_get_broadcast_mask())) |
78 | tick_broadcast_start_periodic(dev); | 78 | tick_broadcast_start_periodic(dev); |
79 | return 1; | 79 | return 1; |
80 | } | 80 | } |
81 | 81 | ||
82 | /* | 82 | /* |
83 | * Check, if the device is the broadcast device | 83 | * Check, if the device is the broadcast device |
84 | */ | 84 | */ |
85 | int tick_is_broadcast_device(struct clock_event_device *dev) | 85 | int tick_is_broadcast_device(struct clock_event_device *dev) |
86 | { | 86 | { |
87 | return (dev && tick_broadcast_device.evtdev == dev); | 87 | return (dev && tick_broadcast_device.evtdev == dev); |
88 | } | 88 | } |
89 | 89 | ||
90 | static void err_broadcast(const struct cpumask *mask) | 90 | static void err_broadcast(const struct cpumask *mask) |
91 | { | 91 | { |
92 | pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n"); | 92 | pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n"); |
93 | } | 93 | } |
94 | 94 | ||
95 | static void tick_device_setup_broadcast_func(struct clock_event_device *dev) | 95 | static void tick_device_setup_broadcast_func(struct clock_event_device *dev) |
96 | { | 96 | { |
97 | if (!dev->broadcast) | 97 | if (!dev->broadcast) |
98 | dev->broadcast = tick_broadcast; | 98 | dev->broadcast = tick_broadcast; |
99 | if (!dev->broadcast) { | 99 | if (!dev->broadcast) { |
100 | pr_warn_once("%s depends on broadcast, but no broadcast function available\n", | 100 | pr_warn_once("%s depends on broadcast, but no broadcast function available\n", |
101 | dev->name); | 101 | dev->name); |
102 | dev->broadcast = err_broadcast; | 102 | dev->broadcast = err_broadcast; |
103 | } | 103 | } |
104 | } | 104 | } |
105 | 105 | ||
106 | /* | 106 | /* |
107 | * Check, if the device is disfunctional and a place holder, which | 107 | * Check, if the device is disfunctional and a place holder, which |
108 | * needs to be handled by the broadcast device. | 108 | * needs to be handled by the broadcast device. |
109 | */ | 109 | */ |
110 | int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) | 110 | int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) |
111 | { | 111 | { |
112 | unsigned long flags; | 112 | unsigned long flags; |
113 | int ret = 0; | 113 | int ret = 0; |
114 | 114 | ||
115 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); | 115 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); |
116 | 116 | ||
117 | /* | 117 | /* |
118 | * Devices might be registered with both periodic and oneshot | 118 | * Devices might be registered with both periodic and oneshot |
119 | * mode disabled. This signals, that the device needs to be | 119 | * mode disabled. This signals, that the device needs to be |
120 | * operated from the broadcast device and is a placeholder for | 120 | * operated from the broadcast device and is a placeholder for |
121 | * the cpu local device. | 121 | * the cpu local device. |
122 | */ | 122 | */ |
123 | if (!tick_device_is_functional(dev)) { | 123 | if (!tick_device_is_functional(dev)) { |
124 | dev->event_handler = tick_handle_periodic; | 124 | dev->event_handler = tick_handle_periodic; |
125 | tick_device_setup_broadcast_func(dev); | 125 | tick_device_setup_broadcast_func(dev); |
126 | cpumask_set_cpu(cpu, tick_get_broadcast_mask()); | 126 | cpumask_set_cpu(cpu, tick_get_broadcast_mask()); |
127 | tick_broadcast_start_periodic(tick_broadcast_device.evtdev); | 127 | tick_broadcast_start_periodic(tick_broadcast_device.evtdev); |
128 | ret = 1; | 128 | ret = 1; |
129 | } else { | 129 | } else { |
130 | /* | 130 | /* |
131 | * When the new device is not affected by the stop | 131 | * When the new device is not affected by the stop |
132 | * feature and the cpu is marked in the broadcast mask | 132 | * feature and the cpu is marked in the broadcast mask |
133 | * then clear the broadcast bit. | 133 | * then clear the broadcast bit. |
134 | */ | 134 | */ |
135 | if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) { | 135 | if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) { |
136 | int cpu = smp_processor_id(); | 136 | int cpu = smp_processor_id(); |
137 | cpumask_clear_cpu(cpu, tick_get_broadcast_mask()); | 137 | cpumask_clear_cpu(cpu, tick_get_broadcast_mask()); |
138 | tick_broadcast_clear_oneshot(cpu); | 138 | tick_broadcast_clear_oneshot(cpu); |
139 | } else { | 139 | } else { |
140 | tick_device_setup_broadcast_func(dev); | 140 | tick_device_setup_broadcast_func(dev); |
141 | } | 141 | } |
142 | } | 142 | } |
143 | raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); | 143 | raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); |
144 | return ret; | 144 | return ret; |
145 | } | 145 | } |
146 | 146 | ||
147 | #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST | 147 | #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST |
148 | int tick_receive_broadcast(void) | 148 | int tick_receive_broadcast(void) |
149 | { | 149 | { |
150 | struct tick_device *td = this_cpu_ptr(&tick_cpu_device); | 150 | struct tick_device *td = this_cpu_ptr(&tick_cpu_device); |
151 | struct clock_event_device *evt = td->evtdev; | 151 | struct clock_event_device *evt = td->evtdev; |
152 | 152 | ||
153 | if (!evt) | 153 | if (!evt) |
154 | return -ENODEV; | 154 | return -ENODEV; |
155 | 155 | ||
156 | if (!evt->event_handler) | 156 | if (!evt->event_handler) |
157 | return -EINVAL; | 157 | return -EINVAL; |
158 | 158 | ||
159 | evt->event_handler(evt); | 159 | evt->event_handler(evt); |
160 | return 0; | 160 | return 0; |
161 | } | 161 | } |
162 | #endif | 162 | #endif |
163 | 163 | ||
164 | /* | 164 | /* |
165 | * Broadcast the event to the cpus, which are set in the mask (mangled). | 165 | * Broadcast the event to the cpus, which are set in the mask (mangled). |
166 | */ | 166 | */ |
167 | static void tick_do_broadcast(struct cpumask *mask) | 167 | static void tick_do_broadcast(struct cpumask *mask) |
168 | { | 168 | { |
169 | int cpu = smp_processor_id(); | 169 | int cpu = smp_processor_id(); |
170 | struct tick_device *td; | 170 | struct tick_device *td; |
171 | 171 | ||
172 | /* | 172 | /* |
173 | * Check, if the current cpu is in the mask | 173 | * Check, if the current cpu is in the mask |
174 | */ | 174 | */ |
175 | if (cpumask_test_cpu(cpu, mask)) { | 175 | if (cpumask_test_cpu(cpu, mask)) { |
176 | cpumask_clear_cpu(cpu, mask); | 176 | cpumask_clear_cpu(cpu, mask); |
177 | td = &per_cpu(tick_cpu_device, cpu); | 177 | td = &per_cpu(tick_cpu_device, cpu); |
178 | td->evtdev->event_handler(td->evtdev); | 178 | td->evtdev->event_handler(td->evtdev); |
179 | } | 179 | } |
180 | 180 | ||
181 | if (!cpumask_empty(mask)) { | 181 | if (!cpumask_empty(mask)) { |
182 | /* | 182 | /* |
183 | * It might be necessary to actually check whether the devices | 183 | * It might be necessary to actually check whether the devices |
184 | * have different broadcast functions. For now, just use the | 184 | * have different broadcast functions. For now, just use the |
185 | * one of the first device. This works as long as we have this | 185 | * one of the first device. This works as long as we have this |
186 | * misfeature only on x86 (lapic) | 186 | * misfeature only on x86 (lapic) |
187 | */ | 187 | */ |
188 | td = &per_cpu(tick_cpu_device, cpumask_first(mask)); | 188 | td = &per_cpu(tick_cpu_device, cpumask_first(mask)); |
189 | td->evtdev->broadcast(mask); | 189 | td->evtdev->broadcast(mask); |
190 | } | 190 | } |
191 | } | 191 | } |
192 | 192 | ||
193 | /* | 193 | /* |
194 | * Periodic broadcast: | 194 | * Periodic broadcast: |
195 | * - invoke the broadcast handlers | 195 | * - invoke the broadcast handlers |
196 | */ | 196 | */ |
197 | static void tick_do_periodic_broadcast(void) | 197 | static void tick_do_periodic_broadcast(void) |
198 | { | 198 | { |
199 | raw_spin_lock(&tick_broadcast_lock); | 199 | raw_spin_lock(&tick_broadcast_lock); |
200 | 200 | ||
201 | cpumask_and(to_cpumask(tmpmask), | 201 | cpumask_and(to_cpumask(tmpmask), |
202 | cpu_online_mask, tick_get_broadcast_mask()); | 202 | cpu_online_mask, tick_get_broadcast_mask()); |
203 | tick_do_broadcast(to_cpumask(tmpmask)); | 203 | tick_do_broadcast(to_cpumask(tmpmask)); |
204 | 204 | ||
205 | raw_spin_unlock(&tick_broadcast_lock); | 205 | raw_spin_unlock(&tick_broadcast_lock); |
206 | } | 206 | } |
207 | 207 | ||
208 | /* | 208 | /* |
209 | * Event handler for periodic broadcast ticks | 209 | * Event handler for periodic broadcast ticks |
210 | */ | 210 | */ |
211 | static void tick_handle_periodic_broadcast(struct clock_event_device *dev) | 211 | static void tick_handle_periodic_broadcast(struct clock_event_device *dev) |
212 | { | 212 | { |
213 | ktime_t next; | 213 | ktime_t next; |
214 | 214 | ||
215 | tick_do_periodic_broadcast(); | 215 | tick_do_periodic_broadcast(); |
216 | 216 | ||
217 | /* | 217 | /* |
218 | * The device is in periodic mode. No reprogramming necessary: | 218 | * The device is in periodic mode. No reprogramming necessary: |
219 | */ | 219 | */ |
220 | if (dev->mode == CLOCK_EVT_MODE_PERIODIC) | 220 | if (dev->mode == CLOCK_EVT_MODE_PERIODIC) |
221 | return; | 221 | return; |
222 | 222 | ||
223 | /* | 223 | /* |
224 | * Setup the next period for devices, which do not have | 224 | * Setup the next period for devices, which do not have |
225 | * periodic mode. We read dev->next_event first and add to it | 225 | * periodic mode. We read dev->next_event first and add to it |
226 | * when the event already expired. clockevents_program_event() | 226 | * when the event already expired. clockevents_program_event() |
227 | * sets dev->next_event only when the event is really | 227 | * sets dev->next_event only when the event is really |
228 | * programmed to the device. | 228 | * programmed to the device. |
229 | */ | 229 | */ |
230 | for (next = dev->next_event; ;) { | 230 | for (next = dev->next_event; ;) { |
231 | next = ktime_add(next, tick_period); | 231 | next = ktime_add(next, tick_period); |
232 | 232 | ||
233 | if (!clockevents_program_event(dev, next, false)) | 233 | if (!clockevents_program_event(dev, next, false)) |
234 | return; | 234 | return; |
235 | tick_do_periodic_broadcast(); | 235 | tick_do_periodic_broadcast(); |
236 | } | 236 | } |
237 | } | 237 | } |
238 | 238 | ||
239 | /* | 239 | /* |
240 | * Powerstate information: The system enters/leaves a state, where | 240 | * Powerstate information: The system enters/leaves a state, where |
241 | * affected devices might stop | 241 | * affected devices might stop |
242 | */ | 242 | */ |
243 | static void tick_do_broadcast_on_off(unsigned long *reason) | 243 | static void tick_do_broadcast_on_off(unsigned long *reason) |
244 | { | 244 | { |
245 | struct clock_event_device *bc, *dev; | 245 | struct clock_event_device *bc, *dev; |
246 | struct tick_device *td; | 246 | struct tick_device *td; |
247 | unsigned long flags; | 247 | unsigned long flags; |
248 | int cpu, bc_stopped; | 248 | int cpu, bc_stopped; |
249 | 249 | ||
250 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); | 250 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); |
251 | 251 | ||
252 | cpu = smp_processor_id(); | 252 | cpu = smp_processor_id(); |
253 | td = &per_cpu(tick_cpu_device, cpu); | 253 | td = &per_cpu(tick_cpu_device, cpu); |
254 | dev = td->evtdev; | 254 | dev = td->evtdev; |
255 | bc = tick_broadcast_device.evtdev; | 255 | bc = tick_broadcast_device.evtdev; |
256 | 256 | ||
257 | /* | 257 | /* |
258 | * Is the device not affected by the powerstate ? | 258 | * Is the device not affected by the powerstate ? |
259 | */ | 259 | */ |
260 | if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP)) | 260 | if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP)) |
261 | goto out; | 261 | goto out; |
262 | 262 | ||
263 | if (!tick_device_is_functional(dev)) | 263 | if (!tick_device_is_functional(dev)) |
264 | goto out; | 264 | goto out; |
265 | 265 | ||
266 | bc_stopped = cpumask_empty(tick_get_broadcast_mask()); | 266 | bc_stopped = cpumask_empty(tick_get_broadcast_mask()); |
267 | 267 | ||
268 | switch (*reason) { | 268 | switch (*reason) { |
269 | case CLOCK_EVT_NOTIFY_BROADCAST_ON: | 269 | case CLOCK_EVT_NOTIFY_BROADCAST_ON: |
270 | case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: | 270 | case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: |
271 | if (!cpumask_test_cpu(cpu, tick_get_broadcast_mask())) { | 271 | if (!cpumask_test_cpu(cpu, tick_get_broadcast_mask())) { |
272 | cpumask_set_cpu(cpu, tick_get_broadcast_mask()); | 272 | cpumask_set_cpu(cpu, tick_get_broadcast_mask()); |
273 | if (tick_broadcast_device.mode == | 273 | if (tick_broadcast_device.mode == |
274 | TICKDEV_MODE_PERIODIC) | 274 | TICKDEV_MODE_PERIODIC) |
275 | clockevents_shutdown(dev); | 275 | clockevents_shutdown(dev); |
276 | } | 276 | } |
277 | if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE) | 277 | if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE) |
278 | tick_broadcast_force = 1; | 278 | tick_broadcast_force = 1; |
279 | break; | 279 | break; |
280 | case CLOCK_EVT_NOTIFY_BROADCAST_OFF: | 280 | case CLOCK_EVT_NOTIFY_BROADCAST_OFF: |
281 | if (!tick_broadcast_force && | 281 | if (!tick_broadcast_force && |
282 | cpumask_test_cpu(cpu, tick_get_broadcast_mask())) { | 282 | cpumask_test_cpu(cpu, tick_get_broadcast_mask())) { |
283 | cpumask_clear_cpu(cpu, tick_get_broadcast_mask()); | 283 | cpumask_clear_cpu(cpu, tick_get_broadcast_mask()); |
284 | if (tick_broadcast_device.mode == | 284 | if (tick_broadcast_device.mode == |
285 | TICKDEV_MODE_PERIODIC) | 285 | TICKDEV_MODE_PERIODIC) |
286 | tick_setup_periodic(dev, 0); | 286 | tick_setup_periodic(dev, 0); |
287 | } | 287 | } |
288 | break; | 288 | break; |
289 | } | 289 | } |
290 | 290 | ||
291 | if (cpumask_empty(tick_get_broadcast_mask())) { | 291 | if (cpumask_empty(tick_get_broadcast_mask())) { |
292 | if (!bc_stopped) | 292 | if (!bc_stopped) |
293 | clockevents_shutdown(bc); | 293 | clockevents_shutdown(bc); |
294 | } else if (bc_stopped) { | 294 | } else if (bc_stopped) { |
295 | if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) | 295 | if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) |
296 | tick_broadcast_start_periodic(bc); | 296 | tick_broadcast_start_periodic(bc); |
297 | else | 297 | else |
298 | tick_broadcast_setup_oneshot(bc); | 298 | tick_broadcast_setup_oneshot(bc); |
299 | } | 299 | } |
300 | out: | 300 | out: |
301 | raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); | 301 | raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); |
302 | } | 302 | } |
303 | 303 | ||
304 | /* | 304 | /* |
305 | * Powerstate information: The system enters/leaves a state, where | 305 | * Powerstate information: The system enters/leaves a state, where |
306 | * affected devices might stop. | 306 | * affected devices might stop. |
307 | */ | 307 | */ |
308 | void tick_broadcast_on_off(unsigned long reason, int *oncpu) | 308 | void tick_broadcast_on_off(unsigned long reason, int *oncpu) |
309 | { | 309 | { |
310 | if (!cpumask_test_cpu(*oncpu, cpu_online_mask)) | 310 | if (!cpumask_test_cpu(*oncpu, cpu_online_mask)) |
311 | printk(KERN_ERR "tick-broadcast: ignoring broadcast for " | 311 | printk(KERN_ERR "tick-broadcast: ignoring broadcast for " |
312 | "offline CPU #%d\n", *oncpu); | 312 | "offline CPU #%d\n", *oncpu); |
313 | else | 313 | else |
314 | tick_do_broadcast_on_off(&reason); | 314 | tick_do_broadcast_on_off(&reason); |
315 | } | 315 | } |
316 | 316 | ||
317 | /* | 317 | /* |
318 | * Set the periodic handler depending on broadcast on/off | 318 | * Set the periodic handler depending on broadcast on/off |
319 | */ | 319 | */ |
320 | void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast) | 320 | void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast) |
321 | { | 321 | { |
322 | if (!broadcast) | 322 | if (!broadcast) |
323 | dev->event_handler = tick_handle_periodic; | 323 | dev->event_handler = tick_handle_periodic; |
324 | else | 324 | else |
325 | dev->event_handler = tick_handle_periodic_broadcast; | 325 | dev->event_handler = tick_handle_periodic_broadcast; |
326 | } | 326 | } |
327 | 327 | ||
328 | /* | 328 | /* |
329 | * Remove a CPU from broadcasting | 329 | * Remove a CPU from broadcasting |
330 | */ | 330 | */ |
331 | void tick_shutdown_broadcast(unsigned int *cpup) | 331 | void tick_shutdown_broadcast(unsigned int *cpup) |
332 | { | 332 | { |
333 | struct clock_event_device *bc; | 333 | struct clock_event_device *bc; |
334 | unsigned long flags; | 334 | unsigned long flags; |
335 | unsigned int cpu = *cpup; | 335 | unsigned int cpu = *cpup; |
336 | 336 | ||
337 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); | 337 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); |
338 | 338 | ||
339 | bc = tick_broadcast_device.evtdev; | 339 | bc = tick_broadcast_device.evtdev; |
340 | cpumask_clear_cpu(cpu, tick_get_broadcast_mask()); | 340 | cpumask_clear_cpu(cpu, tick_get_broadcast_mask()); |
341 | 341 | ||
342 | if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { | 342 | if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { |
343 | if (bc && cpumask_empty(tick_get_broadcast_mask())) | 343 | if (bc && cpumask_empty(tick_get_broadcast_mask())) |
344 | clockevents_shutdown(bc); | 344 | clockevents_shutdown(bc); |
345 | } | 345 | } |
346 | 346 | ||
347 | raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); | 347 | raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); |
348 | } | 348 | } |
349 | 349 | ||
350 | void tick_suspend_broadcast(void) | 350 | void tick_suspend_broadcast(void) |
351 | { | 351 | { |
352 | struct clock_event_device *bc; | 352 | struct clock_event_device *bc; |
353 | unsigned long flags; | 353 | unsigned long flags; |
354 | 354 | ||
355 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); | 355 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); |
356 | 356 | ||
357 | bc = tick_broadcast_device.evtdev; | 357 | bc = tick_broadcast_device.evtdev; |
358 | if (bc) | 358 | if (bc) |
359 | clockevents_shutdown(bc); | 359 | clockevents_shutdown(bc); |
360 | 360 | ||
361 | raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); | 361 | raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); |
362 | } | 362 | } |
363 | 363 | ||
364 | int tick_resume_broadcast(void) | 364 | int tick_resume_broadcast(void) |
365 | { | 365 | { |
366 | struct clock_event_device *bc; | 366 | struct clock_event_device *bc; |
367 | unsigned long flags; | 367 | unsigned long flags; |
368 | int broadcast = 0; | 368 | int broadcast = 0; |
369 | 369 | ||
370 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); | 370 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); |
371 | 371 | ||
372 | bc = tick_broadcast_device.evtdev; | 372 | bc = tick_broadcast_device.evtdev; |
373 | 373 | ||
374 | if (bc) { | 374 | if (bc) { |
375 | clockevents_set_mode(bc, CLOCK_EVT_MODE_RESUME); | 375 | clockevents_set_mode(bc, CLOCK_EVT_MODE_RESUME); |
376 | 376 | ||
377 | switch (tick_broadcast_device.mode) { | 377 | switch (tick_broadcast_device.mode) { |
378 | case TICKDEV_MODE_PERIODIC: | 378 | case TICKDEV_MODE_PERIODIC: |
379 | if (!cpumask_empty(tick_get_broadcast_mask())) | 379 | if (!cpumask_empty(tick_get_broadcast_mask())) |
380 | tick_broadcast_start_periodic(bc); | 380 | tick_broadcast_start_periodic(bc); |
381 | broadcast = cpumask_test_cpu(smp_processor_id(), | 381 | broadcast = cpumask_test_cpu(smp_processor_id(), |
382 | tick_get_broadcast_mask()); | 382 | tick_get_broadcast_mask()); |
383 | break; | 383 | break; |
384 | case TICKDEV_MODE_ONESHOT: | 384 | case TICKDEV_MODE_ONESHOT: |
385 | if (!cpumask_empty(tick_get_broadcast_mask())) | 385 | if (!cpumask_empty(tick_get_broadcast_mask())) |
386 | broadcast = tick_resume_broadcast_oneshot(bc); | 386 | broadcast = tick_resume_broadcast_oneshot(bc); |
387 | break; | 387 | break; |
388 | } | 388 | } |
389 | } | 389 | } |
390 | raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); | 390 | raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); |
391 | 391 | ||
392 | return broadcast; | 392 | return broadcast; |
393 | } | 393 | } |
394 | 394 | ||
395 | 395 | ||
396 | #ifdef CONFIG_TICK_ONESHOT | 396 | #ifdef CONFIG_TICK_ONESHOT |
397 | 397 | ||
398 | /* FIXME: use cpumask_var_t. */ | 398 | /* FIXME: use cpumask_var_t. */ |
399 | static DECLARE_BITMAP(tick_broadcast_oneshot_mask, NR_CPUS); | 399 | static DECLARE_BITMAP(tick_broadcast_oneshot_mask, NR_CPUS); |
400 | 400 | ||
401 | /* | 401 | /* |
402 | * Exposed for debugging: see timer_list.c | 402 | * Exposed for debugging: see timer_list.c |
403 | */ | 403 | */ |
404 | struct cpumask *tick_get_broadcast_oneshot_mask(void) | 404 | struct cpumask *tick_get_broadcast_oneshot_mask(void) |
405 | { | 405 | { |
406 | return to_cpumask(tick_broadcast_oneshot_mask); | 406 | return to_cpumask(tick_broadcast_oneshot_mask); |
407 | } | 407 | } |
408 | 408 | ||
409 | static int tick_broadcast_set_event(ktime_t expires, int force) | 409 | static int tick_broadcast_set_event(ktime_t expires, int force) |
410 | { | 410 | { |
411 | struct clock_event_device *bc = tick_broadcast_device.evtdev; | 411 | struct clock_event_device *bc = tick_broadcast_device.evtdev; |
412 | 412 | ||
413 | if (bc->mode != CLOCK_EVT_MODE_ONESHOT) | 413 | if (bc->mode != CLOCK_EVT_MODE_ONESHOT) |
414 | clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); | 414 | clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); |
415 | 415 | ||
416 | return clockevents_program_event(bc, expires, force); | 416 | return clockevents_program_event(bc, expires, force); |
417 | } | 417 | } |
418 | 418 | ||
419 | int tick_resume_broadcast_oneshot(struct clock_event_device *bc) | 419 | int tick_resume_broadcast_oneshot(struct clock_event_device *bc) |
420 | { | 420 | { |
421 | clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); | 421 | clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); |
422 | return 0; | 422 | return 0; |
423 | } | 423 | } |
424 | 424 | ||
425 | /* | 425 | /* |
426 | * Called from irq_enter() when idle was interrupted to reenable the | 426 | * Called from irq_enter() when idle was interrupted to reenable the |
427 | * per cpu device. | 427 | * per cpu device. |
428 | */ | 428 | */ |
429 | void tick_check_oneshot_broadcast(int cpu) | 429 | void tick_check_oneshot_broadcast(int cpu) |
430 | { | 430 | { |
431 | if (cpumask_test_cpu(cpu, to_cpumask(tick_broadcast_oneshot_mask))) { | 431 | if (cpumask_test_cpu(cpu, to_cpumask(tick_broadcast_oneshot_mask))) { |
432 | struct tick_device *td = &per_cpu(tick_cpu_device, cpu); | 432 | struct tick_device *td = &per_cpu(tick_cpu_device, cpu); |
433 | 433 | ||
434 | clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT); | 434 | clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT); |
435 | } | 435 | } |
436 | } | 436 | } |
437 | 437 | ||
438 | /* | 438 | /* |
439 | * Handle oneshot mode broadcasting | 439 | * Handle oneshot mode broadcasting |
440 | */ | 440 | */ |
441 | static void tick_handle_oneshot_broadcast(struct clock_event_device *dev) | 441 | static void tick_handle_oneshot_broadcast(struct clock_event_device *dev) |
442 | { | 442 | { |
443 | struct tick_device *td; | 443 | struct tick_device *td; |
444 | ktime_t now, next_event; | 444 | ktime_t now, next_event; |
445 | int cpu; | 445 | int cpu; |
446 | 446 | ||
447 | raw_spin_lock(&tick_broadcast_lock); | 447 | raw_spin_lock(&tick_broadcast_lock); |
448 | again: | 448 | again: |
449 | dev->next_event.tv64 = KTIME_MAX; | 449 | dev->next_event.tv64 = KTIME_MAX; |
450 | next_event.tv64 = KTIME_MAX; | 450 | next_event.tv64 = KTIME_MAX; |
451 | cpumask_clear(to_cpumask(tmpmask)); | 451 | cpumask_clear(to_cpumask(tmpmask)); |
452 | now = ktime_get(); | 452 | now = ktime_get(); |
453 | /* Find all expired events */ | 453 | /* Find all expired events */ |
454 | for_each_cpu(cpu, tick_get_broadcast_oneshot_mask()) { | 454 | for_each_cpu(cpu, tick_get_broadcast_oneshot_mask()) { |
455 | td = &per_cpu(tick_cpu_device, cpu); | 455 | td = &per_cpu(tick_cpu_device, cpu); |
456 | if (td->evtdev->next_event.tv64 <= now.tv64) | 456 | if (td->evtdev->next_event.tv64 <= now.tv64) |
457 | cpumask_set_cpu(cpu, to_cpumask(tmpmask)); | 457 | cpumask_set_cpu(cpu, to_cpumask(tmpmask)); |
458 | else if (td->evtdev->next_event.tv64 < next_event.tv64) | 458 | else if (td->evtdev->next_event.tv64 < next_event.tv64) |
459 | next_event.tv64 = td->evtdev->next_event.tv64; | 459 | next_event.tv64 = td->evtdev->next_event.tv64; |
460 | } | 460 | } |
461 | 461 | ||
462 | /* | 462 | /* |
463 | * Wakeup the cpus which have an expired event. | 463 | * Wakeup the cpus which have an expired event. |
464 | */ | 464 | */ |
465 | tick_do_broadcast(to_cpumask(tmpmask)); | 465 | tick_do_broadcast(to_cpumask(tmpmask)); |
466 | 466 | ||
467 | /* | 467 | /* |
468 | * Two reasons for reprogram: | 468 | * Two reasons for reprogram: |
469 | * | 469 | * |
470 | * - The global event did not expire any CPU local | 470 | * - The global event did not expire any CPU local |
471 | * events. This happens in dyntick mode, as the maximum PIT | 471 | * events. This happens in dyntick mode, as the maximum PIT |
472 | * delta is quite small. | 472 | * delta is quite small. |
473 | * | 473 | * |
474 | * - There are pending events on sleeping CPUs which were not | 474 | * - There are pending events on sleeping CPUs which were not |
475 | * in the event mask | 475 | * in the event mask |
476 | */ | 476 | */ |
477 | if (next_event.tv64 != KTIME_MAX) { | 477 | if (next_event.tv64 != KTIME_MAX) { |
478 | /* | 478 | /* |
479 | * Rearm the broadcast device. If event expired, | 479 | * Rearm the broadcast device. If event expired, |
480 | * repeat the above | 480 | * repeat the above |
481 | */ | 481 | */ |
482 | if (tick_broadcast_set_event(next_event, 0)) | 482 | if (tick_broadcast_set_event(next_event, 0)) |
483 | goto again; | 483 | goto again; |
484 | } | 484 | } |
485 | raw_spin_unlock(&tick_broadcast_lock); | 485 | raw_spin_unlock(&tick_broadcast_lock); |
486 | } | 486 | } |
487 | 487 | ||
488 | /* | 488 | /* |
489 | * Powerstate information: The system enters/leaves a state, where | 489 | * Powerstate information: The system enters/leaves a state, where |
490 | * affected devices might stop | 490 | * affected devices might stop |
491 | */ | 491 | */ |
492 | void tick_broadcast_oneshot_control(unsigned long reason) | 492 | void tick_broadcast_oneshot_control(unsigned long reason) |
493 | { | 493 | { |
494 | struct clock_event_device *bc, *dev; | 494 | struct clock_event_device *bc, *dev; |
495 | struct tick_device *td; | 495 | struct tick_device *td; |
496 | unsigned long flags; | 496 | unsigned long flags; |
497 | int cpu; | 497 | int cpu; |
498 | 498 | ||
499 | /* | 499 | /* |
500 | * Periodic mode does not care about the enter/exit of power | 500 | * Periodic mode does not care about the enter/exit of power |
501 | * states | 501 | * states |
502 | */ | 502 | */ |
503 | if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) | 503 | if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) |
504 | return; | 504 | return; |
505 | 505 | ||
506 | /* | 506 | /* |
507 | * We are called with preemtion disabled from the depth of the | 507 | * We are called with preemtion disabled from the depth of the |
508 | * idle code, so we can't be moved away. | 508 | * idle code, so we can't be moved away. |
509 | */ | 509 | */ |
510 | cpu = smp_processor_id(); | 510 | cpu = smp_processor_id(); |
511 | td = &per_cpu(tick_cpu_device, cpu); | 511 | td = &per_cpu(tick_cpu_device, cpu); |
512 | dev = td->evtdev; | 512 | dev = td->evtdev; |
513 | 513 | ||
514 | if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) | 514 | if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) |
515 | return; | 515 | return; |
516 | 516 | ||
517 | bc = tick_broadcast_device.evtdev; | 517 | bc = tick_broadcast_device.evtdev; |
518 | 518 | ||
519 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); | 519 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); |
520 | if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) { | 520 | if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) { |
521 | if (!cpumask_test_cpu(cpu, tick_get_broadcast_oneshot_mask())) { | 521 | if (!cpumask_test_cpu(cpu, tick_get_broadcast_oneshot_mask())) { |
522 | cpumask_set_cpu(cpu, tick_get_broadcast_oneshot_mask()); | 522 | cpumask_set_cpu(cpu, tick_get_broadcast_oneshot_mask()); |
523 | clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN); | 523 | clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN); |
524 | if (dev->next_event.tv64 < bc->next_event.tv64) | 524 | if (dev->next_event.tv64 < bc->next_event.tv64) |
525 | tick_broadcast_set_event(dev->next_event, 1); | 525 | tick_broadcast_set_event(dev->next_event, 1); |
526 | } | 526 | } |
527 | } else { | 527 | } else { |
528 | if (cpumask_test_cpu(cpu, tick_get_broadcast_oneshot_mask())) { | 528 | if (cpumask_test_cpu(cpu, tick_get_broadcast_oneshot_mask())) { |
529 | cpumask_clear_cpu(cpu, | 529 | cpumask_clear_cpu(cpu, |
530 | tick_get_broadcast_oneshot_mask()); | 530 | tick_get_broadcast_oneshot_mask()); |
531 | clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); | 531 | clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); |
532 | if (dev->next_event.tv64 != KTIME_MAX) | 532 | if (dev->next_event.tv64 != KTIME_MAX) |
533 | tick_program_event(dev->next_event, 1); | 533 | tick_program_event(dev->next_event, 1); |
534 | } | 534 | } |
535 | } | 535 | } |
536 | raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); | 536 | raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); |
537 | } | 537 | } |
538 | 538 | ||
539 | /* | 539 | /* |
540 | * Reset the one shot broadcast for a cpu | 540 | * Reset the one shot broadcast for a cpu |
541 | * | 541 | * |
542 | * Called with tick_broadcast_lock held | 542 | * Called with tick_broadcast_lock held |
543 | */ | 543 | */ |
544 | static void tick_broadcast_clear_oneshot(int cpu) | 544 | static void tick_broadcast_clear_oneshot(int cpu) |
545 | { | 545 | { |
546 | cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask()); | 546 | cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask()); |
547 | } | 547 | } |
548 | 548 | ||
549 | static void tick_broadcast_init_next_event(struct cpumask *mask, | 549 | static void tick_broadcast_init_next_event(struct cpumask *mask, |
550 | ktime_t expires) | 550 | ktime_t expires) |
551 | { | 551 | { |
552 | struct tick_device *td; | 552 | struct tick_device *td; |
553 | int cpu; | 553 | int cpu; |
554 | 554 | ||
555 | for_each_cpu(cpu, mask) { | 555 | for_each_cpu(cpu, mask) { |
556 | td = &per_cpu(tick_cpu_device, cpu); | 556 | td = &per_cpu(tick_cpu_device, cpu); |
557 | if (td->evtdev) | 557 | if (td->evtdev) |
558 | td->evtdev->next_event = expires; | 558 | td->evtdev->next_event = expires; |
559 | } | 559 | } |
560 | } | 560 | } |
561 | 561 | ||
562 | /** | 562 | /** |
563 | * tick_broadcast_setup_oneshot - setup the broadcast device | 563 | * tick_broadcast_setup_oneshot - setup the broadcast device |
564 | */ | 564 | */ |
565 | void tick_broadcast_setup_oneshot(struct clock_event_device *bc) | 565 | void tick_broadcast_setup_oneshot(struct clock_event_device *bc) |
566 | { | 566 | { |
567 | int cpu = smp_processor_id(); | 567 | int cpu = smp_processor_id(); |
568 | 568 | ||
569 | /* Set it up only once ! */ | 569 | /* Set it up only once ! */ |
570 | if (bc->event_handler != tick_handle_oneshot_broadcast) { | 570 | if (bc->event_handler != tick_handle_oneshot_broadcast) { |
571 | int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC; | 571 | int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC; |
572 | 572 | ||
573 | bc->event_handler = tick_handle_oneshot_broadcast; | 573 | bc->event_handler = tick_handle_oneshot_broadcast; |
574 | 574 | ||
575 | /* Take the do_timer update */ | 575 | /* Take the do_timer update */ |
576 | tick_do_timer_cpu = cpu; | 576 | if (!tick_nohz_extended_cpu(cpu)) |
577 | tick_do_timer_cpu = cpu; | ||
577 | 578 | ||
578 | /* | 579 | /* |
579 | * We must be careful here. There might be other CPUs | 580 | * We must be careful here. There might be other CPUs |
580 | * waiting for periodic broadcast. We need to set the | 581 | * waiting for periodic broadcast. We need to set the |
581 | * oneshot_mask bits for those and program the | 582 | * oneshot_mask bits for those and program the |
582 | * broadcast device to fire. | 583 | * broadcast device to fire. |
583 | */ | 584 | */ |
584 | cpumask_copy(to_cpumask(tmpmask), tick_get_broadcast_mask()); | 585 | cpumask_copy(to_cpumask(tmpmask), tick_get_broadcast_mask()); |
585 | cpumask_clear_cpu(cpu, to_cpumask(tmpmask)); | 586 | cpumask_clear_cpu(cpu, to_cpumask(tmpmask)); |
586 | cpumask_or(tick_get_broadcast_oneshot_mask(), | 587 | cpumask_or(tick_get_broadcast_oneshot_mask(), |
587 | tick_get_broadcast_oneshot_mask(), | 588 | tick_get_broadcast_oneshot_mask(), |
588 | to_cpumask(tmpmask)); | 589 | to_cpumask(tmpmask)); |
589 | 590 | ||
590 | if (was_periodic && !cpumask_empty(to_cpumask(tmpmask))) { | 591 | if (was_periodic && !cpumask_empty(to_cpumask(tmpmask))) { |
591 | clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); | 592 | clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); |
592 | tick_broadcast_init_next_event(to_cpumask(tmpmask), | 593 | tick_broadcast_init_next_event(to_cpumask(tmpmask), |
593 | tick_next_period); | 594 | tick_next_period); |
594 | tick_broadcast_set_event(tick_next_period, 1); | 595 | tick_broadcast_set_event(tick_next_period, 1); |
595 | } else | 596 | } else |
596 | bc->next_event.tv64 = KTIME_MAX; | 597 | bc->next_event.tv64 = KTIME_MAX; |
597 | } else { | 598 | } else { |
598 | /* | 599 | /* |
599 | * The first cpu which switches to oneshot mode sets | 600 | * The first cpu which switches to oneshot mode sets |
600 | * the bit for all other cpus which are in the general | 601 | * the bit for all other cpus which are in the general |
601 | * (periodic) broadcast mask. So the bit is set and | 602 | * (periodic) broadcast mask. So the bit is set and |
602 | * would prevent the first broadcast enter after this | 603 | * would prevent the first broadcast enter after this |
603 | * to program the bc device. | 604 | * to program the bc device. |
604 | */ | 605 | */ |
605 | tick_broadcast_clear_oneshot(cpu); | 606 | tick_broadcast_clear_oneshot(cpu); |
606 | } | 607 | } |
607 | } | 608 | } |
608 | 609 | ||
609 | /* | 610 | /* |
610 | * Select oneshot operating mode for the broadcast device | 611 | * Select oneshot operating mode for the broadcast device |
611 | */ | 612 | */ |
612 | void tick_broadcast_switch_to_oneshot(void) | 613 | void tick_broadcast_switch_to_oneshot(void) |
613 | { | 614 | { |
614 | struct clock_event_device *bc; | 615 | struct clock_event_device *bc; |
615 | unsigned long flags; | 616 | unsigned long flags; |
616 | 617 | ||
617 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); | 618 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); |
618 | 619 | ||
619 | tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT; | 620 | tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT; |
620 | bc = tick_broadcast_device.evtdev; | 621 | bc = tick_broadcast_device.evtdev; |
621 | if (bc) | 622 | if (bc) |
622 | tick_broadcast_setup_oneshot(bc); | 623 | tick_broadcast_setup_oneshot(bc); |
623 | 624 | ||
624 | raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); | 625 | raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); |
625 | } | 626 | } |
626 | 627 | ||
627 | 628 | ||
628 | /* | 629 | /* |
629 | * Remove a dead CPU from broadcasting | 630 | * Remove a dead CPU from broadcasting |
630 | */ | 631 | */ |
631 | void tick_shutdown_broadcast_oneshot(unsigned int *cpup) | 632 | void tick_shutdown_broadcast_oneshot(unsigned int *cpup) |
632 | { | 633 | { |
633 | unsigned long flags; | 634 | unsigned long flags; |
634 | unsigned int cpu = *cpup; | 635 | unsigned int cpu = *cpup; |
635 | 636 | ||
636 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); | 637 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); |
637 | 638 | ||
638 | /* | 639 | /* |
639 | * Clear the broadcast mask flag for the dead cpu, but do not | 640 | * Clear the broadcast mask flag for the dead cpu, but do not |
640 | * stop the broadcast device! | 641 | * stop the broadcast device! |
641 | */ | 642 | */ |
642 | cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask()); | 643 | cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask()); |
643 | 644 | ||
644 | raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); | 645 | raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); |
645 | } | 646 | } |
646 | 647 | ||
647 | /* | 648 | /* |
648 | * Check, whether the broadcast device is in one shot mode | 649 | * Check, whether the broadcast device is in one shot mode |
649 | */ | 650 | */ |
650 | int tick_broadcast_oneshot_active(void) | 651 | int tick_broadcast_oneshot_active(void) |
651 | { | 652 | { |
652 | return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT; | 653 | return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT; |
653 | } | 654 | } |
654 | 655 | ||
655 | /* | 656 | /* |
656 | * Check whether the broadcast device supports oneshot. | 657 | * Check whether the broadcast device supports oneshot. |
657 | */ | 658 | */ |
658 | bool tick_broadcast_oneshot_available(void) | 659 | bool tick_broadcast_oneshot_available(void) |
659 | { | 660 | { |
660 | struct clock_event_device *bc = tick_broadcast_device.evtdev; | 661 | struct clock_event_device *bc = tick_broadcast_device.evtdev; |
661 | 662 | ||
662 | return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false; | 663 | return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false; |
663 | } | 664 | } |
664 | 665 | ||
665 | #endif | 666 | #endif |
666 | 667 |
kernel/time/tick-common.c
1 | /* | 1 | /* |
2 | * linux/kernel/time/tick-common.c | 2 | * linux/kernel/time/tick-common.c |
3 | * | 3 | * |
4 | * This file contains the base functions to manage periodic tick | 4 | * This file contains the base functions to manage periodic tick |
5 | * related events. | 5 | * related events. |
6 | * | 6 | * |
7 | * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> | 7 | * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> |
8 | * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar | 8 | * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar |
9 | * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner | 9 | * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner |
10 | * | 10 | * |
11 | * This code is licenced under the GPL version 2. For details see | 11 | * This code is licenced under the GPL version 2. For details see |
12 | * kernel-base/COPYING. | 12 | * kernel-base/COPYING. |
13 | */ | 13 | */ |
14 | #include <linux/cpu.h> | 14 | #include <linux/cpu.h> |
15 | #include <linux/err.h> | 15 | #include <linux/err.h> |
16 | #include <linux/hrtimer.h> | 16 | #include <linux/hrtimer.h> |
17 | #include <linux/interrupt.h> | 17 | #include <linux/interrupt.h> |
18 | #include <linux/percpu.h> | 18 | #include <linux/percpu.h> |
19 | #include <linux/profile.h> | 19 | #include <linux/profile.h> |
20 | #include <linux/sched.h> | 20 | #include <linux/sched.h> |
21 | 21 | ||
22 | #include <asm/irq_regs.h> | 22 | #include <asm/irq_regs.h> |
23 | 23 | ||
24 | #include "tick-internal.h" | 24 | #include "tick-internal.h" |
25 | 25 | ||
26 | /* | 26 | /* |
27 | * Tick devices | 27 | * Tick devices |
28 | */ | 28 | */ |
29 | DEFINE_PER_CPU(struct tick_device, tick_cpu_device); | 29 | DEFINE_PER_CPU(struct tick_device, tick_cpu_device); |
30 | /* | 30 | /* |
31 | * Tick next event: keeps track of the tick time | 31 | * Tick next event: keeps track of the tick time |
32 | */ | 32 | */ |
33 | ktime_t tick_next_period; | 33 | ktime_t tick_next_period; |
34 | ktime_t tick_period; | 34 | ktime_t tick_period; |
35 | int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT; | 35 | int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT; |
36 | static DEFINE_RAW_SPINLOCK(tick_device_lock); | 36 | static DEFINE_RAW_SPINLOCK(tick_device_lock); |
37 | 37 | ||
38 | /* | 38 | /* |
39 | * Debugging: see timer_list.c | 39 | * Debugging: see timer_list.c |
40 | */ | 40 | */ |
41 | struct tick_device *tick_get_device(int cpu) | 41 | struct tick_device *tick_get_device(int cpu) |
42 | { | 42 | { |
43 | return &per_cpu(tick_cpu_device, cpu); | 43 | return &per_cpu(tick_cpu_device, cpu); |
44 | } | 44 | } |
45 | 45 | ||
46 | /** | 46 | /** |
47 | * tick_is_oneshot_available - check for a oneshot capable event device | 47 | * tick_is_oneshot_available - check for a oneshot capable event device |
48 | */ | 48 | */ |
49 | int tick_is_oneshot_available(void) | 49 | int tick_is_oneshot_available(void) |
50 | { | 50 | { |
51 | struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); | 51 | struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); |
52 | 52 | ||
53 | if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT)) | 53 | if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT)) |
54 | return 0; | 54 | return 0; |
55 | if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) | 55 | if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) |
56 | return 1; | 56 | return 1; |
57 | return tick_broadcast_oneshot_available(); | 57 | return tick_broadcast_oneshot_available(); |
58 | } | 58 | } |
59 | 59 | ||
60 | /* | 60 | /* |
61 | * Periodic tick | 61 | * Periodic tick |
62 | */ | 62 | */ |
63 | static void tick_periodic(int cpu) | 63 | static void tick_periodic(int cpu) |
64 | { | 64 | { |
65 | if (tick_do_timer_cpu == cpu) { | 65 | if (tick_do_timer_cpu == cpu) { |
66 | write_seqlock(&jiffies_lock); | 66 | write_seqlock(&jiffies_lock); |
67 | 67 | ||
68 | /* Keep track of the next tick event */ | 68 | /* Keep track of the next tick event */ |
69 | tick_next_period = ktime_add(tick_next_period, tick_period); | 69 | tick_next_period = ktime_add(tick_next_period, tick_period); |
70 | 70 | ||
71 | do_timer(1); | 71 | do_timer(1); |
72 | write_sequnlock(&jiffies_lock); | 72 | write_sequnlock(&jiffies_lock); |
73 | } | 73 | } |
74 | 74 | ||
75 | update_process_times(user_mode(get_irq_regs())); | 75 | update_process_times(user_mode(get_irq_regs())); |
76 | profile_tick(CPU_PROFILING); | 76 | profile_tick(CPU_PROFILING); |
77 | } | 77 | } |
78 | 78 | ||
79 | /* | 79 | /* |
80 | * Event handler for periodic ticks | 80 | * Event handler for periodic ticks |
81 | */ | 81 | */ |
82 | void tick_handle_periodic(struct clock_event_device *dev) | 82 | void tick_handle_periodic(struct clock_event_device *dev) |
83 | { | 83 | { |
84 | int cpu = smp_processor_id(); | 84 | int cpu = smp_processor_id(); |
85 | ktime_t next; | 85 | ktime_t next; |
86 | 86 | ||
87 | tick_periodic(cpu); | 87 | tick_periodic(cpu); |
88 | 88 | ||
89 | if (dev->mode != CLOCK_EVT_MODE_ONESHOT) | 89 | if (dev->mode != CLOCK_EVT_MODE_ONESHOT) |
90 | return; | 90 | return; |
91 | /* | 91 | /* |
92 | * Setup the next period for devices, which do not have | 92 | * Setup the next period for devices, which do not have |
93 | * periodic mode: | 93 | * periodic mode: |
94 | */ | 94 | */ |
95 | next = ktime_add(dev->next_event, tick_period); | 95 | next = ktime_add(dev->next_event, tick_period); |
96 | for (;;) { | 96 | for (;;) { |
97 | if (!clockevents_program_event(dev, next, false)) | 97 | if (!clockevents_program_event(dev, next, false)) |
98 | return; | 98 | return; |
99 | /* | 99 | /* |
100 | * Have to be careful here. If we're in oneshot mode, | 100 | * Have to be careful here. If we're in oneshot mode, |
101 | * before we call tick_periodic() in a loop, we need | 101 | * before we call tick_periodic() in a loop, we need |
102 | * to be sure we're using a real hardware clocksource. | 102 | * to be sure we're using a real hardware clocksource. |
103 | * Otherwise we could get trapped in an infinite | 103 | * Otherwise we could get trapped in an infinite |
104 | * loop, as the tick_periodic() increments jiffies, | 104 | * loop, as the tick_periodic() increments jiffies, |
105 | * when then will increment time, posibly causing | 105 | * when then will increment time, posibly causing |
106 | * the loop to trigger again and again. | 106 | * the loop to trigger again and again. |
107 | */ | 107 | */ |
108 | if (timekeeping_valid_for_hres()) | 108 | if (timekeeping_valid_for_hres()) |
109 | tick_periodic(cpu); | 109 | tick_periodic(cpu); |
110 | next = ktime_add(next, tick_period); | 110 | next = ktime_add(next, tick_period); |
111 | } | 111 | } |
112 | } | 112 | } |
113 | 113 | ||
114 | /* | 114 | /* |
115 | * Setup the device for a periodic tick | 115 | * Setup the device for a periodic tick |
116 | */ | 116 | */ |
117 | void tick_setup_periodic(struct clock_event_device *dev, int broadcast) | 117 | void tick_setup_periodic(struct clock_event_device *dev, int broadcast) |
118 | { | 118 | { |
119 | tick_set_periodic_handler(dev, broadcast); | 119 | tick_set_periodic_handler(dev, broadcast); |
120 | 120 | ||
121 | /* Broadcast setup ? */ | 121 | /* Broadcast setup ? */ |
122 | if (!tick_device_is_functional(dev)) | 122 | if (!tick_device_is_functional(dev)) |
123 | return; | 123 | return; |
124 | 124 | ||
125 | if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) && | 125 | if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) && |
126 | !tick_broadcast_oneshot_active()) { | 126 | !tick_broadcast_oneshot_active()) { |
127 | clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC); | 127 | clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC); |
128 | } else { | 128 | } else { |
129 | unsigned long seq; | 129 | unsigned long seq; |
130 | ktime_t next; | 130 | ktime_t next; |
131 | 131 | ||
132 | do { | 132 | do { |
133 | seq = read_seqbegin(&jiffies_lock); | 133 | seq = read_seqbegin(&jiffies_lock); |
134 | next = tick_next_period; | 134 | next = tick_next_period; |
135 | } while (read_seqretry(&jiffies_lock, seq)); | 135 | } while (read_seqretry(&jiffies_lock, seq)); |
136 | 136 | ||
137 | clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); | 137 | clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); |
138 | 138 | ||
139 | for (;;) { | 139 | for (;;) { |
140 | if (!clockevents_program_event(dev, next, false)) | 140 | if (!clockevents_program_event(dev, next, false)) |
141 | return; | 141 | return; |
142 | next = ktime_add(next, tick_period); | 142 | next = ktime_add(next, tick_period); |
143 | } | 143 | } |
144 | } | 144 | } |
145 | } | 145 | } |
146 | 146 | ||
147 | /* | 147 | /* |
148 | * Setup the tick device | 148 | * Setup the tick device |
149 | */ | 149 | */ |
150 | static void tick_setup_device(struct tick_device *td, | 150 | static void tick_setup_device(struct tick_device *td, |
151 | struct clock_event_device *newdev, int cpu, | 151 | struct clock_event_device *newdev, int cpu, |
152 | const struct cpumask *cpumask) | 152 | const struct cpumask *cpumask) |
153 | { | 153 | { |
154 | ktime_t next_event; | 154 | ktime_t next_event; |
155 | void (*handler)(struct clock_event_device *) = NULL; | 155 | void (*handler)(struct clock_event_device *) = NULL; |
156 | 156 | ||
157 | /* | 157 | /* |
158 | * First device setup ? | 158 | * First device setup ? |
159 | */ | 159 | */ |
160 | if (!td->evtdev) { | 160 | if (!td->evtdev) { |
161 | /* | 161 | /* |
162 | * If no cpu took the do_timer update, assign it to | 162 | * If no cpu took the do_timer update, assign it to |
163 | * this cpu: | 163 | * this cpu: |
164 | */ | 164 | */ |
165 | if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) { | 165 | if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) { |
166 | tick_do_timer_cpu = cpu; | 166 | if (!tick_nohz_extended_cpu(cpu)) |
167 | tick_do_timer_cpu = cpu; | ||
168 | else | ||
169 | tick_do_timer_cpu = TICK_DO_TIMER_NONE; | ||
167 | tick_next_period = ktime_get(); | 170 | tick_next_period = ktime_get(); |
168 | tick_period = ktime_set(0, NSEC_PER_SEC / HZ); | 171 | tick_period = ktime_set(0, NSEC_PER_SEC / HZ); |
169 | } | 172 | } |
170 | 173 | ||
171 | /* | 174 | /* |
172 | * Startup in periodic mode first. | 175 | * Startup in periodic mode first. |
173 | */ | 176 | */ |
174 | td->mode = TICKDEV_MODE_PERIODIC; | 177 | td->mode = TICKDEV_MODE_PERIODIC; |
175 | } else { | 178 | } else { |
176 | handler = td->evtdev->event_handler; | 179 | handler = td->evtdev->event_handler; |
177 | next_event = td->evtdev->next_event; | 180 | next_event = td->evtdev->next_event; |
178 | td->evtdev->event_handler = clockevents_handle_noop; | 181 | td->evtdev->event_handler = clockevents_handle_noop; |
179 | } | 182 | } |
180 | 183 | ||
181 | td->evtdev = newdev; | 184 | td->evtdev = newdev; |
182 | 185 | ||
183 | /* | 186 | /* |
184 | * When the device is not per cpu, pin the interrupt to the | 187 | * When the device is not per cpu, pin the interrupt to the |
185 | * current cpu: | 188 | * current cpu: |
186 | */ | 189 | */ |
187 | if (!cpumask_equal(newdev->cpumask, cpumask)) | 190 | if (!cpumask_equal(newdev->cpumask, cpumask)) |
188 | irq_set_affinity(newdev->irq, cpumask); | 191 | irq_set_affinity(newdev->irq, cpumask); |
189 | 192 | ||
190 | /* | 193 | /* |
191 | * When global broadcasting is active, check if the current | 194 | * When global broadcasting is active, check if the current |
192 | * device is registered as a placeholder for broadcast mode. | 195 | * device is registered as a placeholder for broadcast mode. |
193 | * This allows us to handle this x86 misfeature in a generic | 196 | * This allows us to handle this x86 misfeature in a generic |
194 | * way. | 197 | * way. |
195 | */ | 198 | */ |
196 | if (tick_device_uses_broadcast(newdev, cpu)) | 199 | if (tick_device_uses_broadcast(newdev, cpu)) |
197 | return; | 200 | return; |
198 | 201 | ||
199 | if (td->mode == TICKDEV_MODE_PERIODIC) | 202 | if (td->mode == TICKDEV_MODE_PERIODIC) |
200 | tick_setup_periodic(newdev, 0); | 203 | tick_setup_periodic(newdev, 0); |
201 | else | 204 | else |
202 | tick_setup_oneshot(newdev, handler, next_event); | 205 | tick_setup_oneshot(newdev, handler, next_event); |
203 | } | 206 | } |
204 | 207 | ||
205 | /* | 208 | /* |
206 | * Check, if the new registered device should be used. | 209 | * Check, if the new registered device should be used. |
207 | */ | 210 | */ |
208 | static int tick_check_new_device(struct clock_event_device *newdev) | 211 | static int tick_check_new_device(struct clock_event_device *newdev) |
209 | { | 212 | { |
210 | struct clock_event_device *curdev; | 213 | struct clock_event_device *curdev; |
211 | struct tick_device *td; | 214 | struct tick_device *td; |
212 | int cpu, ret = NOTIFY_OK; | 215 | int cpu, ret = NOTIFY_OK; |
213 | unsigned long flags; | 216 | unsigned long flags; |
214 | 217 | ||
215 | raw_spin_lock_irqsave(&tick_device_lock, flags); | 218 | raw_spin_lock_irqsave(&tick_device_lock, flags); |
216 | 219 | ||
217 | cpu = smp_processor_id(); | 220 | cpu = smp_processor_id(); |
218 | if (!cpumask_test_cpu(cpu, newdev->cpumask)) | 221 | if (!cpumask_test_cpu(cpu, newdev->cpumask)) |
219 | goto out_bc; | 222 | goto out_bc; |
220 | 223 | ||
221 | td = &per_cpu(tick_cpu_device, cpu); | 224 | td = &per_cpu(tick_cpu_device, cpu); |
222 | curdev = td->evtdev; | 225 | curdev = td->evtdev; |
223 | 226 | ||
224 | /* cpu local device ? */ | 227 | /* cpu local device ? */ |
225 | if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu))) { | 228 | if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu))) { |
226 | 229 | ||
227 | /* | 230 | /* |
228 | * If the cpu affinity of the device interrupt can not | 231 | * If the cpu affinity of the device interrupt can not |
229 | * be set, ignore it. | 232 | * be set, ignore it. |
230 | */ | 233 | */ |
231 | if (!irq_can_set_affinity(newdev->irq)) | 234 | if (!irq_can_set_affinity(newdev->irq)) |
232 | goto out_bc; | 235 | goto out_bc; |
233 | 236 | ||
234 | /* | 237 | /* |
235 | * If we have a cpu local device already, do not replace it | 238 | * If we have a cpu local device already, do not replace it |
236 | * by a non cpu local device | 239 | * by a non cpu local device |
237 | */ | 240 | */ |
238 | if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu))) | 241 | if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu))) |
239 | goto out_bc; | 242 | goto out_bc; |
240 | } | 243 | } |
241 | 244 | ||
242 | /* | 245 | /* |
243 | * If we have an active device, then check the rating and the oneshot | 246 | * If we have an active device, then check the rating and the oneshot |
244 | * feature. | 247 | * feature. |
245 | */ | 248 | */ |
246 | if (curdev) { | 249 | if (curdev) { |
247 | /* | 250 | /* |
248 | * Prefer one shot capable devices ! | 251 | * Prefer one shot capable devices ! |
249 | */ | 252 | */ |
250 | if ((curdev->features & CLOCK_EVT_FEAT_ONESHOT) && | 253 | if ((curdev->features & CLOCK_EVT_FEAT_ONESHOT) && |
251 | !(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) | 254 | !(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) |
252 | goto out_bc; | 255 | goto out_bc; |
253 | /* | 256 | /* |
254 | * Check the rating | 257 | * Check the rating |
255 | */ | 258 | */ |
256 | if (curdev->rating >= newdev->rating) | 259 | if (curdev->rating >= newdev->rating) |
257 | goto out_bc; | 260 | goto out_bc; |
258 | } | 261 | } |
259 | 262 | ||
260 | /* | 263 | /* |
261 | * Replace the eventually existing device by the new | 264 | * Replace the eventually existing device by the new |
262 | * device. If the current device is the broadcast device, do | 265 | * device. If the current device is the broadcast device, do |
263 | * not give it back to the clockevents layer ! | 266 | * not give it back to the clockevents layer ! |
264 | */ | 267 | */ |
265 | if (tick_is_broadcast_device(curdev)) { | 268 | if (tick_is_broadcast_device(curdev)) { |
266 | clockevents_shutdown(curdev); | 269 | clockevents_shutdown(curdev); |
267 | curdev = NULL; | 270 | curdev = NULL; |
268 | } | 271 | } |
269 | clockevents_exchange_device(curdev, newdev); | 272 | clockevents_exchange_device(curdev, newdev); |
270 | tick_setup_device(td, newdev, cpu, cpumask_of(cpu)); | 273 | tick_setup_device(td, newdev, cpu, cpumask_of(cpu)); |
271 | if (newdev->features & CLOCK_EVT_FEAT_ONESHOT) | 274 | if (newdev->features & CLOCK_EVT_FEAT_ONESHOT) |
272 | tick_oneshot_notify(); | 275 | tick_oneshot_notify(); |
273 | 276 | ||
274 | raw_spin_unlock_irqrestore(&tick_device_lock, flags); | 277 | raw_spin_unlock_irqrestore(&tick_device_lock, flags); |
275 | return NOTIFY_STOP; | 278 | return NOTIFY_STOP; |
276 | 279 | ||
277 | out_bc: | 280 | out_bc: |
278 | /* | 281 | /* |
279 | * Can the new device be used as a broadcast device ? | 282 | * Can the new device be used as a broadcast device ? |
280 | */ | 283 | */ |
281 | if (tick_check_broadcast_device(newdev)) | 284 | if (tick_check_broadcast_device(newdev)) |
282 | ret = NOTIFY_STOP; | 285 | ret = NOTIFY_STOP; |
283 | 286 | ||
284 | raw_spin_unlock_irqrestore(&tick_device_lock, flags); | 287 | raw_spin_unlock_irqrestore(&tick_device_lock, flags); |
285 | 288 | ||
286 | return ret; | 289 | return ret; |
287 | } | 290 | } |
288 | 291 | ||
289 | /* | 292 | /* |
290 | * Transfer the do_timer job away from a dying cpu. | 293 | * Transfer the do_timer job away from a dying cpu. |
291 | * | 294 | * |
292 | * Called with interrupts disabled. | 295 | * Called with interrupts disabled. |
293 | */ | 296 | */ |
294 | static void tick_handover_do_timer(int *cpup) | 297 | static void tick_handover_do_timer(int *cpup) |
295 | { | 298 | { |
296 | if (*cpup == tick_do_timer_cpu) { | 299 | if (*cpup == tick_do_timer_cpu) { |
297 | int cpu = cpumask_first(cpu_online_mask); | 300 | int cpu = cpumask_first(cpu_online_mask); |
298 | 301 | ||
299 | tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu : | 302 | tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu : |
300 | TICK_DO_TIMER_NONE; | 303 | TICK_DO_TIMER_NONE; |
301 | } | 304 | } |
302 | } | 305 | } |
303 | 306 | ||
304 | /* | 307 | /* |
305 | * Shutdown an event device on a given cpu: | 308 | * Shutdown an event device on a given cpu: |
306 | * | 309 | * |
307 | * This is called on a life CPU, when a CPU is dead. So we cannot | 310 | * This is called on a life CPU, when a CPU is dead. So we cannot |
308 | * access the hardware device itself. | 311 | * access the hardware device itself. |
309 | * We just set the mode and remove it from the lists. | 312 | * We just set the mode and remove it from the lists. |
310 | */ | 313 | */ |
311 | static void tick_shutdown(unsigned int *cpup) | 314 | static void tick_shutdown(unsigned int *cpup) |
312 | { | 315 | { |
313 | struct tick_device *td = &per_cpu(tick_cpu_device, *cpup); | 316 | struct tick_device *td = &per_cpu(tick_cpu_device, *cpup); |
314 | struct clock_event_device *dev = td->evtdev; | 317 | struct clock_event_device *dev = td->evtdev; |
315 | unsigned long flags; | 318 | unsigned long flags; |
316 | 319 | ||
317 | raw_spin_lock_irqsave(&tick_device_lock, flags); | 320 | raw_spin_lock_irqsave(&tick_device_lock, flags); |
318 | td->mode = TICKDEV_MODE_PERIODIC; | 321 | td->mode = TICKDEV_MODE_PERIODIC; |
319 | if (dev) { | 322 | if (dev) { |
320 | /* | 323 | /* |
321 | * Prevent that the clock events layer tries to call | 324 | * Prevent that the clock events layer tries to call |
322 | * the set mode function! | 325 | * the set mode function! |
323 | */ | 326 | */ |
324 | dev->mode = CLOCK_EVT_MODE_UNUSED; | 327 | dev->mode = CLOCK_EVT_MODE_UNUSED; |
325 | clockevents_exchange_device(dev, NULL); | 328 | clockevents_exchange_device(dev, NULL); |
326 | td->evtdev = NULL; | 329 | td->evtdev = NULL; |
327 | } | 330 | } |
328 | raw_spin_unlock_irqrestore(&tick_device_lock, flags); | 331 | raw_spin_unlock_irqrestore(&tick_device_lock, flags); |
329 | } | 332 | } |
330 | 333 | ||
331 | static void tick_suspend(void) | 334 | static void tick_suspend(void) |
332 | { | 335 | { |
333 | struct tick_device *td = &__get_cpu_var(tick_cpu_device); | 336 | struct tick_device *td = &__get_cpu_var(tick_cpu_device); |
334 | unsigned long flags; | 337 | unsigned long flags; |
335 | 338 | ||
336 | raw_spin_lock_irqsave(&tick_device_lock, flags); | 339 | raw_spin_lock_irqsave(&tick_device_lock, flags); |
337 | clockevents_shutdown(td->evtdev); | 340 | clockevents_shutdown(td->evtdev); |
338 | raw_spin_unlock_irqrestore(&tick_device_lock, flags); | 341 | raw_spin_unlock_irqrestore(&tick_device_lock, flags); |
339 | } | 342 | } |
340 | 343 | ||
341 | static void tick_resume(void) | 344 | static void tick_resume(void) |
342 | { | 345 | { |
343 | struct tick_device *td = &__get_cpu_var(tick_cpu_device); | 346 | struct tick_device *td = &__get_cpu_var(tick_cpu_device); |
344 | unsigned long flags; | 347 | unsigned long flags; |
345 | int broadcast = tick_resume_broadcast(); | 348 | int broadcast = tick_resume_broadcast(); |
346 | 349 | ||
347 | raw_spin_lock_irqsave(&tick_device_lock, flags); | 350 | raw_spin_lock_irqsave(&tick_device_lock, flags); |
348 | clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME); | 351 | clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME); |
349 | 352 | ||
350 | if (!broadcast) { | 353 | if (!broadcast) { |
351 | if (td->mode == TICKDEV_MODE_PERIODIC) | 354 | if (td->mode == TICKDEV_MODE_PERIODIC) |
352 | tick_setup_periodic(td->evtdev, 0); | 355 | tick_setup_periodic(td->evtdev, 0); |
353 | else | 356 | else |
354 | tick_resume_oneshot(); | 357 | tick_resume_oneshot(); |
355 | } | 358 | } |
356 | raw_spin_unlock_irqrestore(&tick_device_lock, flags); | 359 | raw_spin_unlock_irqrestore(&tick_device_lock, flags); |
357 | } | 360 | } |
358 | 361 | ||
359 | /* | 362 | /* |
360 | * Notification about clock event devices | 363 | * Notification about clock event devices |
361 | */ | 364 | */ |
362 | static int tick_notify(struct notifier_block *nb, unsigned long reason, | 365 | static int tick_notify(struct notifier_block *nb, unsigned long reason, |
363 | void *dev) | 366 | void *dev) |
364 | { | 367 | { |
365 | switch (reason) { | 368 | switch (reason) { |
366 | 369 | ||
367 | case CLOCK_EVT_NOTIFY_ADD: | 370 | case CLOCK_EVT_NOTIFY_ADD: |
368 | return tick_check_new_device(dev); | 371 | return tick_check_new_device(dev); |
369 | 372 | ||
370 | case CLOCK_EVT_NOTIFY_BROADCAST_ON: | 373 | case CLOCK_EVT_NOTIFY_BROADCAST_ON: |
371 | case CLOCK_EVT_NOTIFY_BROADCAST_OFF: | 374 | case CLOCK_EVT_NOTIFY_BROADCAST_OFF: |
372 | case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: | 375 | case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: |
373 | tick_broadcast_on_off(reason, dev); | 376 | tick_broadcast_on_off(reason, dev); |
374 | break; | 377 | break; |
375 | 378 | ||
376 | case CLOCK_EVT_NOTIFY_BROADCAST_ENTER: | 379 | case CLOCK_EVT_NOTIFY_BROADCAST_ENTER: |
377 | case CLOCK_EVT_NOTIFY_BROADCAST_EXIT: | 380 | case CLOCK_EVT_NOTIFY_BROADCAST_EXIT: |
378 | tick_broadcast_oneshot_control(reason); | 381 | tick_broadcast_oneshot_control(reason); |
379 | break; | 382 | break; |
380 | 383 | ||
381 | case CLOCK_EVT_NOTIFY_CPU_DYING: | 384 | case CLOCK_EVT_NOTIFY_CPU_DYING: |
382 | tick_handover_do_timer(dev); | 385 | tick_handover_do_timer(dev); |
383 | break; | 386 | break; |
384 | 387 | ||
385 | case CLOCK_EVT_NOTIFY_CPU_DEAD: | 388 | case CLOCK_EVT_NOTIFY_CPU_DEAD: |
386 | tick_shutdown_broadcast_oneshot(dev); | 389 | tick_shutdown_broadcast_oneshot(dev); |
387 | tick_shutdown_broadcast(dev); | 390 | tick_shutdown_broadcast(dev); |
388 | tick_shutdown(dev); | 391 | tick_shutdown(dev); |
389 | break; | 392 | break; |
390 | 393 | ||
391 | case CLOCK_EVT_NOTIFY_SUSPEND: | 394 | case CLOCK_EVT_NOTIFY_SUSPEND: |
392 | tick_suspend(); | 395 | tick_suspend(); |
393 | tick_suspend_broadcast(); | 396 | tick_suspend_broadcast(); |
394 | break; | 397 | break; |
395 | 398 | ||
396 | case CLOCK_EVT_NOTIFY_RESUME: | 399 | case CLOCK_EVT_NOTIFY_RESUME: |
397 | tick_resume(); | 400 | tick_resume(); |
398 | break; | 401 | break; |
399 | 402 | ||
400 | default: | 403 | default: |
401 | break; | 404 | break; |
402 | } | 405 | } |
403 | 406 | ||
404 | return NOTIFY_OK; | 407 | return NOTIFY_OK; |
405 | } | 408 | } |
406 | 409 | ||
407 | static struct notifier_block tick_notifier = { | 410 | static struct notifier_block tick_notifier = { |
408 | .notifier_call = tick_notify, | 411 | .notifier_call = tick_notify, |
409 | }; | 412 | }; |
410 | 413 | ||
411 | /** | 414 | /** |
412 | * tick_init - initialize the tick control | 415 | * tick_init - initialize the tick control |
413 | * | 416 | * |
414 | * Register the notifier with the clockevents framework | 417 | * Register the notifier with the clockevents framework |
415 | */ | 418 | */ |
416 | void __init tick_init(void) | 419 | void __init tick_init(void) |
417 | { | 420 | { |
418 | clockevents_register_notifier(&tick_notifier); | 421 | clockevents_register_notifier(&tick_notifier); |
419 | } | 422 | } |
420 | 423 |
kernel/time/tick-sched.c
1 | /* | 1 | /* |
2 | * linux/kernel/time/tick-sched.c | 2 | * linux/kernel/time/tick-sched.c |
3 | * | 3 | * |
4 | * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> | 4 | * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> |
5 | * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar | 5 | * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar |
6 | * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner | 6 | * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner |
7 | * | 7 | * |
8 | * No idle tick implementation for low and high resolution timers | 8 | * No idle tick implementation for low and high resolution timers |
9 | * | 9 | * |
10 | * Started by: Thomas Gleixner and Ingo Molnar | 10 | * Started by: Thomas Gleixner and Ingo Molnar |
11 | * | 11 | * |
12 | * Distribute under GPLv2. | 12 | * Distribute under GPLv2. |
13 | */ | 13 | */ |
14 | #include <linux/cpu.h> | 14 | #include <linux/cpu.h> |
15 | #include <linux/err.h> | 15 | #include <linux/err.h> |
16 | #include <linux/hrtimer.h> | 16 | #include <linux/hrtimer.h> |
17 | #include <linux/interrupt.h> | 17 | #include <linux/interrupt.h> |
18 | #include <linux/kernel_stat.h> | 18 | #include <linux/kernel_stat.h> |
19 | #include <linux/percpu.h> | 19 | #include <linux/percpu.h> |
20 | #include <linux/profile.h> | 20 | #include <linux/profile.h> |
21 | #include <linux/sched.h> | 21 | #include <linux/sched.h> |
22 | #include <linux/module.h> | 22 | #include <linux/module.h> |
23 | #include <linux/irq_work.h> | 23 | #include <linux/irq_work.h> |
24 | 24 | ||
25 | #include <asm/irq_regs.h> | 25 | #include <asm/irq_regs.h> |
26 | 26 | ||
27 | #include "tick-internal.h" | 27 | #include "tick-internal.h" |
28 | 28 | ||
29 | /* | 29 | /* |
30 | * Per cpu nohz control structure | 30 | * Per cpu nohz control structure |
31 | */ | 31 | */ |
32 | DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); | 32 | DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); |
33 | 33 | ||
34 | /* | 34 | /* |
35 | * The time, when the last jiffy update happened. Protected by jiffies_lock. | 35 | * The time, when the last jiffy update happened. Protected by jiffies_lock. |
36 | */ | 36 | */ |
37 | static ktime_t last_jiffies_update; | 37 | static ktime_t last_jiffies_update; |
38 | 38 | ||
39 | struct tick_sched *tick_get_tick_sched(int cpu) | 39 | struct tick_sched *tick_get_tick_sched(int cpu) |
40 | { | 40 | { |
41 | return &per_cpu(tick_cpu_sched, cpu); | 41 | return &per_cpu(tick_cpu_sched, cpu); |
42 | } | 42 | } |
43 | 43 | ||
44 | /* | 44 | /* |
45 | * Must be called with interrupts disabled ! | 45 | * Must be called with interrupts disabled ! |
46 | */ | 46 | */ |
47 | static void tick_do_update_jiffies64(ktime_t now) | 47 | static void tick_do_update_jiffies64(ktime_t now) |
48 | { | 48 | { |
49 | unsigned long ticks = 0; | 49 | unsigned long ticks = 0; |
50 | ktime_t delta; | 50 | ktime_t delta; |
51 | 51 | ||
52 | /* | 52 | /* |
53 | * Do a quick check without holding jiffies_lock: | 53 | * Do a quick check without holding jiffies_lock: |
54 | */ | 54 | */ |
55 | delta = ktime_sub(now, last_jiffies_update); | 55 | delta = ktime_sub(now, last_jiffies_update); |
56 | if (delta.tv64 < tick_period.tv64) | 56 | if (delta.tv64 < tick_period.tv64) |
57 | return; | 57 | return; |
58 | 58 | ||
59 | /* Reevalute with jiffies_lock held */ | 59 | /* Reevalute with jiffies_lock held */ |
60 | write_seqlock(&jiffies_lock); | 60 | write_seqlock(&jiffies_lock); |
61 | 61 | ||
62 | delta = ktime_sub(now, last_jiffies_update); | 62 | delta = ktime_sub(now, last_jiffies_update); |
63 | if (delta.tv64 >= tick_period.tv64) { | 63 | if (delta.tv64 >= tick_period.tv64) { |
64 | 64 | ||
65 | delta = ktime_sub(delta, tick_period); | 65 | delta = ktime_sub(delta, tick_period); |
66 | last_jiffies_update = ktime_add(last_jiffies_update, | 66 | last_jiffies_update = ktime_add(last_jiffies_update, |
67 | tick_period); | 67 | tick_period); |
68 | 68 | ||
69 | /* Slow path for long timeouts */ | 69 | /* Slow path for long timeouts */ |
70 | if (unlikely(delta.tv64 >= tick_period.tv64)) { | 70 | if (unlikely(delta.tv64 >= tick_period.tv64)) { |
71 | s64 incr = ktime_to_ns(tick_period); | 71 | s64 incr = ktime_to_ns(tick_period); |
72 | 72 | ||
73 | ticks = ktime_divns(delta, incr); | 73 | ticks = ktime_divns(delta, incr); |
74 | 74 | ||
75 | last_jiffies_update = ktime_add_ns(last_jiffies_update, | 75 | last_jiffies_update = ktime_add_ns(last_jiffies_update, |
76 | incr * ticks); | 76 | incr * ticks); |
77 | } | 77 | } |
78 | do_timer(++ticks); | 78 | do_timer(++ticks); |
79 | 79 | ||
80 | /* Keep the tick_next_period variable up to date */ | 80 | /* Keep the tick_next_period variable up to date */ |
81 | tick_next_period = ktime_add(last_jiffies_update, tick_period); | 81 | tick_next_period = ktime_add(last_jiffies_update, tick_period); |
82 | } | 82 | } |
83 | write_sequnlock(&jiffies_lock); | 83 | write_sequnlock(&jiffies_lock); |
84 | } | 84 | } |
85 | 85 | ||
86 | /* | 86 | /* |
87 | * Initialize and return retrieve the jiffies update. | 87 | * Initialize and return retrieve the jiffies update. |
88 | */ | 88 | */ |
89 | static ktime_t tick_init_jiffy_update(void) | 89 | static ktime_t tick_init_jiffy_update(void) |
90 | { | 90 | { |
91 | ktime_t period; | 91 | ktime_t period; |
92 | 92 | ||
93 | write_seqlock(&jiffies_lock); | 93 | write_seqlock(&jiffies_lock); |
94 | /* Did we start the jiffies update yet ? */ | 94 | /* Did we start the jiffies update yet ? */ |
95 | if (last_jiffies_update.tv64 == 0) | 95 | if (last_jiffies_update.tv64 == 0) |
96 | last_jiffies_update = tick_next_period; | 96 | last_jiffies_update = tick_next_period; |
97 | period = last_jiffies_update; | 97 | period = last_jiffies_update; |
98 | write_sequnlock(&jiffies_lock); | 98 | write_sequnlock(&jiffies_lock); |
99 | return period; | 99 | return period; |
100 | } | 100 | } |
101 | 101 | ||
102 | 102 | ||
103 | static void tick_sched_do_timer(ktime_t now) | 103 | static void tick_sched_do_timer(ktime_t now) |
104 | { | 104 | { |
105 | int cpu = smp_processor_id(); | 105 | int cpu = smp_processor_id(); |
106 | 106 | ||
107 | #ifdef CONFIG_NO_HZ | 107 | #ifdef CONFIG_NO_HZ |
108 | /* | 108 | /* |
109 | * Check if the do_timer duty was dropped. We don't care about | 109 | * Check if the do_timer duty was dropped. We don't care about |
110 | * concurrency: This happens only when the cpu in charge went | 110 | * concurrency: This happens only when the cpu in charge went |
111 | * into a long sleep. If two cpus happen to assign themself to | 111 | * into a long sleep. If two cpus happen to assign themself to |
112 | * this duty, then the jiffies update is still serialized by | 112 | * this duty, then the jiffies update is still serialized by |
113 | * jiffies_lock. | 113 | * jiffies_lock. |
114 | */ | 114 | */ |
115 | if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) | 115 | if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE) |
116 | && !tick_nohz_extended_cpu(cpu)) | ||
116 | tick_do_timer_cpu = cpu; | 117 | tick_do_timer_cpu = cpu; |
117 | #endif | 118 | #endif |
118 | 119 | ||
119 | /* Check, if the jiffies need an update */ | 120 | /* Check, if the jiffies need an update */ |
120 | if (tick_do_timer_cpu == cpu) | 121 | if (tick_do_timer_cpu == cpu) |
121 | tick_do_update_jiffies64(now); | 122 | tick_do_update_jiffies64(now); |
122 | } | 123 | } |
123 | 124 | ||
124 | static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) | 125 | static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) |
125 | { | 126 | { |
126 | #ifdef CONFIG_NO_HZ | 127 | #ifdef CONFIG_NO_HZ |
127 | /* | 128 | /* |
128 | * When we are idle and the tick is stopped, we have to touch | 129 | * When we are idle and the tick is stopped, we have to touch |
129 | * the watchdog as we might not schedule for a really long | 130 | * the watchdog as we might not schedule for a really long |
130 | * time. This happens on complete idle SMP systems while | 131 | * time. This happens on complete idle SMP systems while |
131 | * waiting on the login prompt. We also increment the "start of | 132 | * waiting on the login prompt. We also increment the "start of |
132 | * idle" jiffy stamp so the idle accounting adjustment we do | 133 | * idle" jiffy stamp so the idle accounting adjustment we do |
133 | * when we go busy again does not account too much ticks. | 134 | * when we go busy again does not account too much ticks. |
134 | */ | 135 | */ |
135 | if (ts->tick_stopped) { | 136 | if (ts->tick_stopped) { |
136 | touch_softlockup_watchdog(); | 137 | touch_softlockup_watchdog(); |
137 | if (is_idle_task(current)) | 138 | if (is_idle_task(current)) |
138 | ts->idle_jiffies++; | 139 | ts->idle_jiffies++; |
139 | } | 140 | } |
140 | #endif | 141 | #endif |
141 | update_process_times(user_mode(regs)); | 142 | update_process_times(user_mode(regs)); |
142 | profile_tick(CPU_PROFILING); | 143 | profile_tick(CPU_PROFILING); |
143 | } | 144 | } |
144 | 145 | ||
145 | #ifdef CONFIG_NO_HZ_EXTENDED | 146 | #ifdef CONFIG_NO_HZ_EXTENDED |
146 | static cpumask_var_t nohz_extended_mask; | 147 | static cpumask_var_t nohz_extended_mask; |
147 | bool have_nohz_extended_mask; | 148 | bool have_nohz_extended_mask; |
148 | 149 | ||
149 | int tick_nohz_extended_cpu(int cpu) | 150 | int tick_nohz_extended_cpu(int cpu) |
150 | { | 151 | { |
151 | if (!have_nohz_extended_mask) | 152 | if (!have_nohz_extended_mask) |
152 | return 0; | 153 | return 0; |
153 | 154 | ||
154 | return cpumask_test_cpu(cpu, nohz_extended_mask); | 155 | return cpumask_test_cpu(cpu, nohz_extended_mask); |
155 | } | 156 | } |
156 | 157 | ||
157 | /* Parse the boot-time nohz CPU list from the kernel parameters. */ | 158 | /* Parse the boot-time nohz CPU list from the kernel parameters. */ |
158 | static int __init tick_nohz_extended_setup(char *str) | 159 | static int __init tick_nohz_extended_setup(char *str) |
159 | { | 160 | { |
160 | alloc_bootmem_cpumask_var(&nohz_extended_mask); | 161 | alloc_bootmem_cpumask_var(&nohz_extended_mask); |
161 | if (cpulist_parse(str, nohz_extended_mask) < 0) | 162 | if (cpulist_parse(str, nohz_extended_mask) < 0) |
162 | pr_warning("NOHZ: Incorrect nohz_extended cpumask\n"); | 163 | pr_warning("NOHZ: Incorrect nohz_extended cpumask\n"); |
163 | else | 164 | else |
164 | have_nohz_extended_mask = true; | 165 | have_nohz_extended_mask = true; |
165 | return 1; | 166 | return 1; |
166 | } | 167 | } |
167 | __setup("nohz_extended=", tick_nohz_extended_setup); | 168 | __setup("nohz_extended=", tick_nohz_extended_setup); |
168 | 169 | ||
170 | static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb, | ||
171 | unsigned long action, | ||
172 | void *hcpu) | ||
173 | { | ||
174 | unsigned int cpu = (unsigned long)hcpu; | ||
175 | |||
176 | switch (action & ~CPU_TASKS_FROZEN) { | ||
177 | case CPU_DOWN_PREPARE: | ||
178 | /* | ||
179 | * If we handle the timekeeping duty for full dynticks CPUs, | ||
180 | * we can't safely shutdown that CPU. | ||
181 | */ | ||
182 | if (have_nohz_extended_mask && tick_do_timer_cpu == cpu) | ||
183 | return -EINVAL; | ||
184 | break; | ||
185 | } | ||
186 | return NOTIFY_OK; | ||
187 | } | ||
188 | |||
169 | static int __init init_tick_nohz_extended(void) | 189 | static int __init init_tick_nohz_extended(void) |
170 | { | 190 | { |
171 | cpumask_var_t online_nohz; | 191 | cpumask_var_t online_nohz; |
172 | int cpu; | 192 | int cpu; |
173 | 193 | ||
174 | if (!have_nohz_extended_mask) | 194 | if (!have_nohz_extended_mask) |
175 | return 0; | 195 | return 0; |
176 | 196 | ||
197 | cpu_notifier(tick_nohz_cpu_down_callback, 0); | ||
198 | |||
177 | if (!zalloc_cpumask_var(&online_nohz, GFP_KERNEL)) { | 199 | if (!zalloc_cpumask_var(&online_nohz, GFP_KERNEL)) { |
178 | pr_warning("NO_HZ: Not enough memory to check extended nohz mask\n"); | 200 | pr_warning("NO_HZ: Not enough memory to check extended nohz mask\n"); |
179 | return -ENOMEM; | 201 | return -ENOMEM; |
180 | } | 202 | } |
181 | 203 | ||
182 | /* | 204 | /* |
183 | * CPUs can probably not be concurrently offlined on initcall time. | 205 | * CPUs can probably not be concurrently offlined on initcall time. |
184 | * But we are paranoid, aren't we? | 206 | * But we are paranoid, aren't we? |
185 | */ | 207 | */ |
186 | get_online_cpus(); | 208 | get_online_cpus(); |
187 | 209 | ||
188 | /* Ensure we keep a CPU outside the dynticks range for timekeeping */ | 210 | /* Ensure we keep a CPU outside the dynticks range for timekeeping */ |
189 | cpumask_and(online_nohz, cpu_online_mask, nohz_extended_mask); | 211 | cpumask_and(online_nohz, cpu_online_mask, nohz_extended_mask); |
190 | if (cpumask_equal(online_nohz, cpu_online_mask)) { | 212 | if (cpumask_equal(online_nohz, cpu_online_mask)) { |
191 | cpu = cpumask_any(cpu_online_mask); | ||
192 | pr_warning("NO_HZ: Must keep at least one online CPU " | 213 | pr_warning("NO_HZ: Must keep at least one online CPU " |
193 | "out of nohz_extended range\n"); | 214 | "out of nohz_extended range\n"); |
215 | /* | ||
216 | * We know the current CPU doesn't have its tick stopped. | ||
217 | * Let's use it for the timekeeping duty. | ||
218 | */ | ||
219 | preempt_disable(); | ||
220 | cpu = smp_processor_id(); | ||
194 | pr_warning("NO_HZ: Clearing %d from nohz_extended range\n", cpu); | 221 | pr_warning("NO_HZ: Clearing %d from nohz_extended range\n", cpu); |
195 | cpumask_clear_cpu(cpu, nohz_extended_mask); | 222 | cpumask_clear_cpu(cpu, nohz_extended_mask); |
223 | preempt_enable(); | ||
196 | } | 224 | } |
197 | put_online_cpus(); | 225 | put_online_cpus(); |
198 | free_cpumask_var(online_nohz); | 226 | free_cpumask_var(online_nohz); |
199 | 227 | ||
200 | return 0; | 228 | return 0; |
201 | } | 229 | } |
202 | core_initcall(init_tick_nohz_extended); | 230 | core_initcall(init_tick_nohz_extended); |
203 | #else | 231 | #else |
204 | #define have_nohz_extended_mask (0) | 232 | #define have_nohz_extended_mask (0) |
205 | #endif | 233 | #endif |
206 | 234 | ||
207 | /* | 235 | /* |
208 | * NOHZ - aka dynamic tick functionality | 236 | * NOHZ - aka dynamic tick functionality |
209 | */ | 237 | */ |
210 | #ifdef CONFIG_NO_HZ | 238 | #ifdef CONFIG_NO_HZ |
211 | /* | 239 | /* |
212 | * NO HZ enabled ? | 240 | * NO HZ enabled ? |
213 | */ | 241 | */ |
214 | int tick_nohz_enabled __read_mostly = 1; | 242 | int tick_nohz_enabled __read_mostly = 1; |
215 | 243 | ||
216 | /* | 244 | /* |
217 | * Enable / Disable tickless mode | 245 | * Enable / Disable tickless mode |
218 | */ | 246 | */ |
219 | static int __init setup_tick_nohz(char *str) | 247 | static int __init setup_tick_nohz(char *str) |
220 | { | 248 | { |
221 | if (!strcmp(str, "off")) | 249 | if (!strcmp(str, "off")) |
222 | tick_nohz_enabled = 0; | 250 | tick_nohz_enabled = 0; |
223 | else if (!strcmp(str, "on")) | 251 | else if (!strcmp(str, "on")) |
224 | tick_nohz_enabled = 1; | 252 | tick_nohz_enabled = 1; |
225 | else | 253 | else |
226 | return 0; | 254 | return 0; |
227 | return 1; | 255 | return 1; |
228 | } | 256 | } |
229 | 257 | ||
230 | __setup("nohz=", setup_tick_nohz); | 258 | __setup("nohz=", setup_tick_nohz); |
231 | 259 | ||
232 | /** | 260 | /** |
233 | * tick_nohz_update_jiffies - update jiffies when idle was interrupted | 261 | * tick_nohz_update_jiffies - update jiffies when idle was interrupted |
234 | * | 262 | * |
235 | * Called from interrupt entry when the CPU was idle | 263 | * Called from interrupt entry when the CPU was idle |
236 | * | 264 | * |
237 | * In case the sched_tick was stopped on this CPU, we have to check if jiffies | 265 | * In case the sched_tick was stopped on this CPU, we have to check if jiffies |
238 | * must be updated. Otherwise an interrupt handler could use a stale jiffy | 266 | * must be updated. Otherwise an interrupt handler could use a stale jiffy |
239 | * value. We do this unconditionally on any cpu, as we don't know whether the | 267 | * value. We do this unconditionally on any cpu, as we don't know whether the |
240 | * cpu, which has the update task assigned is in a long sleep. | 268 | * cpu, which has the update task assigned is in a long sleep. |
241 | */ | 269 | */ |
242 | static void tick_nohz_update_jiffies(ktime_t now) | 270 | static void tick_nohz_update_jiffies(ktime_t now) |
243 | { | 271 | { |
244 | int cpu = smp_processor_id(); | 272 | int cpu = smp_processor_id(); |
245 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 273 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
246 | unsigned long flags; | 274 | unsigned long flags; |
247 | 275 | ||
248 | ts->idle_waketime = now; | 276 | ts->idle_waketime = now; |
249 | 277 | ||
250 | local_irq_save(flags); | 278 | local_irq_save(flags); |
251 | tick_do_update_jiffies64(now); | 279 | tick_do_update_jiffies64(now); |
252 | local_irq_restore(flags); | 280 | local_irq_restore(flags); |
253 | 281 | ||
254 | touch_softlockup_watchdog(); | 282 | touch_softlockup_watchdog(); |
255 | } | 283 | } |
256 | 284 | ||
257 | /* | 285 | /* |
258 | * Updates the per cpu time idle statistics counters | 286 | * Updates the per cpu time idle statistics counters |
259 | */ | 287 | */ |
260 | static void | 288 | static void |
261 | update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_update_time) | 289 | update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_update_time) |
262 | { | 290 | { |
263 | ktime_t delta; | 291 | ktime_t delta; |
264 | 292 | ||
265 | if (ts->idle_active) { | 293 | if (ts->idle_active) { |
266 | delta = ktime_sub(now, ts->idle_entrytime); | 294 | delta = ktime_sub(now, ts->idle_entrytime); |
267 | if (nr_iowait_cpu(cpu) > 0) | 295 | if (nr_iowait_cpu(cpu) > 0) |
268 | ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta); | 296 | ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta); |
269 | else | 297 | else |
270 | ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); | 298 | ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); |
271 | ts->idle_entrytime = now; | 299 | ts->idle_entrytime = now; |
272 | } | 300 | } |
273 | 301 | ||
274 | if (last_update_time) | 302 | if (last_update_time) |
275 | *last_update_time = ktime_to_us(now); | 303 | *last_update_time = ktime_to_us(now); |
276 | 304 | ||
277 | } | 305 | } |
278 | 306 | ||
279 | static void tick_nohz_stop_idle(int cpu, ktime_t now) | 307 | static void tick_nohz_stop_idle(int cpu, ktime_t now) |
280 | { | 308 | { |
281 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 309 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
282 | 310 | ||
283 | update_ts_time_stats(cpu, ts, now, NULL); | 311 | update_ts_time_stats(cpu, ts, now, NULL); |
284 | ts->idle_active = 0; | 312 | ts->idle_active = 0; |
285 | 313 | ||
286 | sched_clock_idle_wakeup_event(0); | 314 | sched_clock_idle_wakeup_event(0); |
287 | } | 315 | } |
288 | 316 | ||
289 | static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts) | 317 | static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts) |
290 | { | 318 | { |
291 | ktime_t now = ktime_get(); | 319 | ktime_t now = ktime_get(); |
292 | 320 | ||
293 | ts->idle_entrytime = now; | 321 | ts->idle_entrytime = now; |
294 | ts->idle_active = 1; | 322 | ts->idle_active = 1; |
295 | sched_clock_idle_sleep_event(); | 323 | sched_clock_idle_sleep_event(); |
296 | return now; | 324 | return now; |
297 | } | 325 | } |
298 | 326 | ||
299 | /** | 327 | /** |
300 | * get_cpu_idle_time_us - get the total idle time of a cpu | 328 | * get_cpu_idle_time_us - get the total idle time of a cpu |
301 | * @cpu: CPU number to query | 329 | * @cpu: CPU number to query |
302 | * @last_update_time: variable to store update time in. Do not update | 330 | * @last_update_time: variable to store update time in. Do not update |
303 | * counters if NULL. | 331 | * counters if NULL. |
304 | * | 332 | * |
305 | * Return the cummulative idle time (since boot) for a given | 333 | * Return the cummulative idle time (since boot) for a given |
306 | * CPU, in microseconds. | 334 | * CPU, in microseconds. |
307 | * | 335 | * |
308 | * This time is measured via accounting rather than sampling, | 336 | * This time is measured via accounting rather than sampling, |
309 | * and is as accurate as ktime_get() is. | 337 | * and is as accurate as ktime_get() is. |
310 | * | 338 | * |
311 | * This function returns -1 if NOHZ is not enabled. | 339 | * This function returns -1 if NOHZ is not enabled. |
312 | */ | 340 | */ |
313 | u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) | 341 | u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) |
314 | { | 342 | { |
315 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 343 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
316 | ktime_t now, idle; | 344 | ktime_t now, idle; |
317 | 345 | ||
318 | if (!tick_nohz_enabled) | 346 | if (!tick_nohz_enabled) |
319 | return -1; | 347 | return -1; |
320 | 348 | ||
321 | now = ktime_get(); | 349 | now = ktime_get(); |
322 | if (last_update_time) { | 350 | if (last_update_time) { |
323 | update_ts_time_stats(cpu, ts, now, last_update_time); | 351 | update_ts_time_stats(cpu, ts, now, last_update_time); |
324 | idle = ts->idle_sleeptime; | 352 | idle = ts->idle_sleeptime; |
325 | } else { | 353 | } else { |
326 | if (ts->idle_active && !nr_iowait_cpu(cpu)) { | 354 | if (ts->idle_active && !nr_iowait_cpu(cpu)) { |
327 | ktime_t delta = ktime_sub(now, ts->idle_entrytime); | 355 | ktime_t delta = ktime_sub(now, ts->idle_entrytime); |
328 | 356 | ||
329 | idle = ktime_add(ts->idle_sleeptime, delta); | 357 | idle = ktime_add(ts->idle_sleeptime, delta); |
330 | } else { | 358 | } else { |
331 | idle = ts->idle_sleeptime; | 359 | idle = ts->idle_sleeptime; |
332 | } | 360 | } |
333 | } | 361 | } |
334 | 362 | ||
335 | return ktime_to_us(idle); | 363 | return ktime_to_us(idle); |
336 | 364 | ||
337 | } | 365 | } |
338 | EXPORT_SYMBOL_GPL(get_cpu_idle_time_us); | 366 | EXPORT_SYMBOL_GPL(get_cpu_idle_time_us); |
339 | 367 | ||
340 | /** | 368 | /** |
341 | * get_cpu_iowait_time_us - get the total iowait time of a cpu | 369 | * get_cpu_iowait_time_us - get the total iowait time of a cpu |
342 | * @cpu: CPU number to query | 370 | * @cpu: CPU number to query |
343 | * @last_update_time: variable to store update time in. Do not update | 371 | * @last_update_time: variable to store update time in. Do not update |
344 | * counters if NULL. | 372 | * counters if NULL. |
345 | * | 373 | * |
346 | * Return the cummulative iowait time (since boot) for a given | 374 | * Return the cummulative iowait time (since boot) for a given |
347 | * CPU, in microseconds. | 375 | * CPU, in microseconds. |
348 | * | 376 | * |
349 | * This time is measured via accounting rather than sampling, | 377 | * This time is measured via accounting rather than sampling, |
350 | * and is as accurate as ktime_get() is. | 378 | * and is as accurate as ktime_get() is. |
351 | * | 379 | * |
352 | * This function returns -1 if NOHZ is not enabled. | 380 | * This function returns -1 if NOHZ is not enabled. |
353 | */ | 381 | */ |
354 | u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) | 382 | u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) |
355 | { | 383 | { |
356 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 384 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
357 | ktime_t now, iowait; | 385 | ktime_t now, iowait; |
358 | 386 | ||
359 | if (!tick_nohz_enabled) | 387 | if (!tick_nohz_enabled) |
360 | return -1; | 388 | return -1; |
361 | 389 | ||
362 | now = ktime_get(); | 390 | now = ktime_get(); |
363 | if (last_update_time) { | 391 | if (last_update_time) { |
364 | update_ts_time_stats(cpu, ts, now, last_update_time); | 392 | update_ts_time_stats(cpu, ts, now, last_update_time); |
365 | iowait = ts->iowait_sleeptime; | 393 | iowait = ts->iowait_sleeptime; |
366 | } else { | 394 | } else { |
367 | if (ts->idle_active && nr_iowait_cpu(cpu) > 0) { | 395 | if (ts->idle_active && nr_iowait_cpu(cpu) > 0) { |
368 | ktime_t delta = ktime_sub(now, ts->idle_entrytime); | 396 | ktime_t delta = ktime_sub(now, ts->idle_entrytime); |
369 | 397 | ||
370 | iowait = ktime_add(ts->iowait_sleeptime, delta); | 398 | iowait = ktime_add(ts->iowait_sleeptime, delta); |
371 | } else { | 399 | } else { |
372 | iowait = ts->iowait_sleeptime; | 400 | iowait = ts->iowait_sleeptime; |
373 | } | 401 | } |
374 | } | 402 | } |
375 | 403 | ||
376 | return ktime_to_us(iowait); | 404 | return ktime_to_us(iowait); |
377 | } | 405 | } |
378 | EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); | 406 | EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); |
379 | 407 | ||
380 | static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, | 408 | static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, |
381 | ktime_t now, int cpu) | 409 | ktime_t now, int cpu) |
382 | { | 410 | { |
383 | unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; | 411 | unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; |
384 | ktime_t last_update, expires, ret = { .tv64 = 0 }; | 412 | ktime_t last_update, expires, ret = { .tv64 = 0 }; |
385 | unsigned long rcu_delta_jiffies; | 413 | unsigned long rcu_delta_jiffies; |
386 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; | 414 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; |
387 | u64 time_delta; | 415 | u64 time_delta; |
388 | 416 | ||
389 | /* Read jiffies and the time when jiffies were updated last */ | 417 | /* Read jiffies and the time when jiffies were updated last */ |
390 | do { | 418 | do { |
391 | seq = read_seqbegin(&jiffies_lock); | 419 | seq = read_seqbegin(&jiffies_lock); |
392 | last_update = last_jiffies_update; | 420 | last_update = last_jiffies_update; |
393 | last_jiffies = jiffies; | 421 | last_jiffies = jiffies; |
394 | time_delta = timekeeping_max_deferment(); | 422 | time_delta = timekeeping_max_deferment(); |
395 | } while (read_seqretry(&jiffies_lock, seq)); | 423 | } while (read_seqretry(&jiffies_lock, seq)); |
396 | 424 | ||
397 | if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || | 425 | if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || |
398 | arch_needs_cpu(cpu) || irq_work_needs_cpu()) { | 426 | arch_needs_cpu(cpu) || irq_work_needs_cpu()) { |
399 | next_jiffies = last_jiffies + 1; | 427 | next_jiffies = last_jiffies + 1; |
400 | delta_jiffies = 1; | 428 | delta_jiffies = 1; |
401 | } else { | 429 | } else { |
402 | /* Get the next timer wheel timer */ | 430 | /* Get the next timer wheel timer */ |
403 | next_jiffies = get_next_timer_interrupt(last_jiffies); | 431 | next_jiffies = get_next_timer_interrupt(last_jiffies); |
404 | delta_jiffies = next_jiffies - last_jiffies; | 432 | delta_jiffies = next_jiffies - last_jiffies; |
405 | if (rcu_delta_jiffies < delta_jiffies) { | 433 | if (rcu_delta_jiffies < delta_jiffies) { |
406 | next_jiffies = last_jiffies + rcu_delta_jiffies; | 434 | next_jiffies = last_jiffies + rcu_delta_jiffies; |
407 | delta_jiffies = rcu_delta_jiffies; | 435 | delta_jiffies = rcu_delta_jiffies; |
408 | } | 436 | } |
409 | } | 437 | } |
410 | /* | 438 | /* |
411 | * Do not stop the tick, if we are only one off | 439 | * Do not stop the tick, if we are only one off |
412 | * or if the cpu is required for rcu | 440 | * or if the cpu is required for rcu |
413 | */ | 441 | */ |
414 | if (!ts->tick_stopped && delta_jiffies == 1) | 442 | if (!ts->tick_stopped && delta_jiffies == 1) |
415 | goto out; | 443 | goto out; |
416 | 444 | ||
417 | /* Schedule the tick, if we are at least one jiffie off */ | 445 | /* Schedule the tick, if we are at least one jiffie off */ |
418 | if ((long)delta_jiffies >= 1) { | 446 | if ((long)delta_jiffies >= 1) { |
419 | 447 | ||
420 | /* | 448 | /* |
421 | * If this cpu is the one which updates jiffies, then | 449 | * If this cpu is the one which updates jiffies, then |
422 | * give up the assignment and let it be taken by the | 450 | * give up the assignment and let it be taken by the |
423 | * cpu which runs the tick timer next, which might be | 451 | * cpu which runs the tick timer next, which might be |
424 | * this cpu as well. If we don't drop this here the | 452 | * this cpu as well. If we don't drop this here the |
425 | * jiffies might be stale and do_timer() never | 453 | * jiffies might be stale and do_timer() never |
426 | * invoked. Keep track of the fact that it was the one | 454 | * invoked. Keep track of the fact that it was the one |
427 | * which had the do_timer() duty last. If this cpu is | 455 | * which had the do_timer() duty last. If this cpu is |
428 | * the one which had the do_timer() duty last, we | 456 | * the one which had the do_timer() duty last, we |
429 | * limit the sleep time to the timekeeping | 457 | * limit the sleep time to the timekeeping |
430 | * max_deferement value which we retrieved | 458 | * max_deferement value which we retrieved |
431 | * above. Otherwise we can sleep as long as we want. | 459 | * above. Otherwise we can sleep as long as we want. |
432 | */ | 460 | */ |
433 | if (cpu == tick_do_timer_cpu) { | 461 | if (cpu == tick_do_timer_cpu) { |
434 | tick_do_timer_cpu = TICK_DO_TIMER_NONE; | 462 | tick_do_timer_cpu = TICK_DO_TIMER_NONE; |
435 | ts->do_timer_last = 1; | 463 | ts->do_timer_last = 1; |
436 | } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) { | 464 | } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) { |
437 | time_delta = KTIME_MAX; | 465 | time_delta = KTIME_MAX; |
438 | ts->do_timer_last = 0; | 466 | ts->do_timer_last = 0; |
439 | } else if (!ts->do_timer_last) { | 467 | } else if (!ts->do_timer_last) { |
440 | time_delta = KTIME_MAX; | 468 | time_delta = KTIME_MAX; |
441 | } | 469 | } |
442 | 470 | ||
443 | /* | 471 | /* |
444 | * calculate the expiry time for the next timer wheel | 472 | * calculate the expiry time for the next timer wheel |
445 | * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals | 473 | * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals |
446 | * that there is no timer pending or at least extremely | 474 | * that there is no timer pending or at least extremely |
447 | * far into the future (12 days for HZ=1000). In this | 475 | * far into the future (12 days for HZ=1000). In this |
448 | * case we set the expiry to the end of time. | 476 | * case we set the expiry to the end of time. |
449 | */ | 477 | */ |
450 | if (likely(delta_jiffies < NEXT_TIMER_MAX_DELTA)) { | 478 | if (likely(delta_jiffies < NEXT_TIMER_MAX_DELTA)) { |
451 | /* | 479 | /* |
452 | * Calculate the time delta for the next timer event. | 480 | * Calculate the time delta for the next timer event. |
453 | * If the time delta exceeds the maximum time delta | 481 | * If the time delta exceeds the maximum time delta |
454 | * permitted by the current clocksource then adjust | 482 | * permitted by the current clocksource then adjust |
455 | * the time delta accordingly to ensure the | 483 | * the time delta accordingly to ensure the |
456 | * clocksource does not wrap. | 484 | * clocksource does not wrap. |
457 | */ | 485 | */ |
458 | time_delta = min_t(u64, time_delta, | 486 | time_delta = min_t(u64, time_delta, |
459 | tick_period.tv64 * delta_jiffies); | 487 | tick_period.tv64 * delta_jiffies); |
460 | } | 488 | } |
461 | 489 | ||
462 | if (time_delta < KTIME_MAX) | 490 | if (time_delta < KTIME_MAX) |
463 | expires = ktime_add_ns(last_update, time_delta); | 491 | expires = ktime_add_ns(last_update, time_delta); |
464 | else | 492 | else |
465 | expires.tv64 = KTIME_MAX; | 493 | expires.tv64 = KTIME_MAX; |
466 | 494 | ||
467 | /* Skip reprogram of event if its not changed */ | 495 | /* Skip reprogram of event if its not changed */ |
468 | if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) | 496 | if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) |
469 | goto out; | 497 | goto out; |
470 | 498 | ||
471 | ret = expires; | 499 | ret = expires; |
472 | 500 | ||
473 | /* | 501 | /* |
474 | * nohz_stop_sched_tick can be called several times before | 502 | * nohz_stop_sched_tick can be called several times before |
475 | * the nohz_restart_sched_tick is called. This happens when | 503 | * the nohz_restart_sched_tick is called. This happens when |
476 | * interrupts arrive which do not cause a reschedule. In the | 504 | * interrupts arrive which do not cause a reschedule. In the |
477 | * first call we save the current tick time, so we can restart | 505 | * first call we save the current tick time, so we can restart |
478 | * the scheduler tick in nohz_restart_sched_tick. | 506 | * the scheduler tick in nohz_restart_sched_tick. |
479 | */ | 507 | */ |
480 | if (!ts->tick_stopped) { | 508 | if (!ts->tick_stopped) { |
481 | nohz_balance_enter_idle(cpu); | 509 | nohz_balance_enter_idle(cpu); |
482 | calc_load_enter_idle(); | 510 | calc_load_enter_idle(); |
483 | 511 | ||
484 | ts->last_tick = hrtimer_get_expires(&ts->sched_timer); | 512 | ts->last_tick = hrtimer_get_expires(&ts->sched_timer); |
485 | ts->tick_stopped = 1; | 513 | ts->tick_stopped = 1; |
486 | } | 514 | } |
487 | 515 | ||
488 | /* | 516 | /* |
489 | * If the expiration time == KTIME_MAX, then | 517 | * If the expiration time == KTIME_MAX, then |
490 | * in this case we simply stop the tick timer. | 518 | * in this case we simply stop the tick timer. |
491 | */ | 519 | */ |
492 | if (unlikely(expires.tv64 == KTIME_MAX)) { | 520 | if (unlikely(expires.tv64 == KTIME_MAX)) { |
493 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) | 521 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) |
494 | hrtimer_cancel(&ts->sched_timer); | 522 | hrtimer_cancel(&ts->sched_timer); |
495 | goto out; | 523 | goto out; |
496 | } | 524 | } |
497 | 525 | ||
498 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { | 526 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { |
499 | hrtimer_start(&ts->sched_timer, expires, | 527 | hrtimer_start(&ts->sched_timer, expires, |
500 | HRTIMER_MODE_ABS_PINNED); | 528 | HRTIMER_MODE_ABS_PINNED); |
501 | /* Check, if the timer was already in the past */ | 529 | /* Check, if the timer was already in the past */ |
502 | if (hrtimer_active(&ts->sched_timer)) | 530 | if (hrtimer_active(&ts->sched_timer)) |
503 | goto out; | 531 | goto out; |
504 | } else if (!tick_program_event(expires, 0)) | 532 | } else if (!tick_program_event(expires, 0)) |
505 | goto out; | 533 | goto out; |
506 | /* | 534 | /* |
507 | * We are past the event already. So we crossed a | 535 | * We are past the event already. So we crossed a |
508 | * jiffie boundary. Update jiffies and raise the | 536 | * jiffie boundary. Update jiffies and raise the |
509 | * softirq. | 537 | * softirq. |
510 | */ | 538 | */ |
511 | tick_do_update_jiffies64(ktime_get()); | 539 | tick_do_update_jiffies64(ktime_get()); |
512 | } | 540 | } |
513 | raise_softirq_irqoff(TIMER_SOFTIRQ); | 541 | raise_softirq_irqoff(TIMER_SOFTIRQ); |
514 | out: | 542 | out: |
515 | ts->next_jiffies = next_jiffies; | 543 | ts->next_jiffies = next_jiffies; |
516 | ts->last_jiffies = last_jiffies; | 544 | ts->last_jiffies = last_jiffies; |
517 | ts->sleep_length = ktime_sub(dev->next_event, now); | 545 | ts->sleep_length = ktime_sub(dev->next_event, now); |
518 | 546 | ||
519 | return ret; | 547 | return ret; |
520 | } | 548 | } |
521 | 549 | ||
522 | static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) | 550 | static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) |
523 | { | 551 | { |
524 | /* | 552 | /* |
525 | * If this cpu is offline and it is the one which updates | 553 | * If this cpu is offline and it is the one which updates |
526 | * jiffies, then give up the assignment and let it be taken by | 554 | * jiffies, then give up the assignment and let it be taken by |
527 | * the cpu which runs the tick timer next. If we don't drop | 555 | * the cpu which runs the tick timer next. If we don't drop |
528 | * this here the jiffies might be stale and do_timer() never | 556 | * this here the jiffies might be stale and do_timer() never |
529 | * invoked. | 557 | * invoked. |
530 | */ | 558 | */ |
531 | if (unlikely(!cpu_online(cpu))) { | 559 | if (unlikely(!cpu_online(cpu))) { |
532 | if (cpu == tick_do_timer_cpu) | 560 | if (cpu == tick_do_timer_cpu) |
533 | tick_do_timer_cpu = TICK_DO_TIMER_NONE; | 561 | tick_do_timer_cpu = TICK_DO_TIMER_NONE; |
534 | } | 562 | } |
535 | 563 | ||
536 | if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) | 564 | if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) |
537 | return false; | 565 | return false; |
538 | 566 | ||
539 | if (need_resched()) | 567 | if (need_resched()) |
540 | return false; | 568 | return false; |
541 | 569 | ||
542 | if (unlikely(local_softirq_pending() && cpu_online(cpu))) { | 570 | if (unlikely(local_softirq_pending() && cpu_online(cpu))) { |
543 | static int ratelimit; | 571 | static int ratelimit; |
544 | 572 | ||
545 | if (ratelimit < 10 && | 573 | if (ratelimit < 10 && |
546 | (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { | 574 | (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { |
547 | printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", | 575 | printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", |
548 | (unsigned int) local_softirq_pending()); | 576 | (unsigned int) local_softirq_pending()); |
549 | ratelimit++; | 577 | ratelimit++; |
550 | } | 578 | } |
551 | return false; | 579 | return false; |
580 | } | ||
581 | |||
582 | if (have_nohz_extended_mask) { | ||
583 | /* | ||
584 | * Keep the tick alive to guarantee timekeeping progression | ||
585 | * if there are full dynticks CPUs around | ||
586 | */ | ||
587 | if (tick_do_timer_cpu == cpu) | ||
588 | return false; | ||
589 | /* | ||
590 | * Boot safety: make sure the timekeeping duty has been | ||
591 | * assigned before entering dyntick-idle mode, | ||
592 | */ | ||
593 | if (tick_do_timer_cpu == TICK_DO_TIMER_NONE) | ||
594 | return false; | ||
552 | } | 595 | } |
553 | 596 | ||
554 | return true; | 597 | return true; |
555 | } | 598 | } |
556 | 599 | ||
557 | static void __tick_nohz_idle_enter(struct tick_sched *ts) | 600 | static void __tick_nohz_idle_enter(struct tick_sched *ts) |
558 | { | 601 | { |
559 | ktime_t now, expires; | 602 | ktime_t now, expires; |
560 | int cpu = smp_processor_id(); | 603 | int cpu = smp_processor_id(); |
561 | 604 | ||
562 | now = tick_nohz_start_idle(cpu, ts); | 605 | now = tick_nohz_start_idle(cpu, ts); |
563 | 606 | ||
564 | if (can_stop_idle_tick(cpu, ts)) { | 607 | if (can_stop_idle_tick(cpu, ts)) { |
565 | int was_stopped = ts->tick_stopped; | 608 | int was_stopped = ts->tick_stopped; |
566 | 609 | ||
567 | ts->idle_calls++; | 610 | ts->idle_calls++; |
568 | 611 | ||
569 | expires = tick_nohz_stop_sched_tick(ts, now, cpu); | 612 | expires = tick_nohz_stop_sched_tick(ts, now, cpu); |
570 | if (expires.tv64 > 0LL) { | 613 | if (expires.tv64 > 0LL) { |
571 | ts->idle_sleeps++; | 614 | ts->idle_sleeps++; |
572 | ts->idle_expires = expires; | 615 | ts->idle_expires = expires; |
573 | } | 616 | } |
574 | 617 | ||
575 | if (!was_stopped && ts->tick_stopped) | 618 | if (!was_stopped && ts->tick_stopped) |
576 | ts->idle_jiffies = ts->last_jiffies; | 619 | ts->idle_jiffies = ts->last_jiffies; |
577 | } | 620 | } |
578 | } | 621 | } |
579 | 622 | ||
580 | /** | 623 | /** |
581 | * tick_nohz_idle_enter - stop the idle tick from the idle task | 624 | * tick_nohz_idle_enter - stop the idle tick from the idle task |
582 | * | 625 | * |
583 | * When the next event is more than a tick into the future, stop the idle tick | 626 | * When the next event is more than a tick into the future, stop the idle tick |
584 | * Called when we start the idle loop. | 627 | * Called when we start the idle loop. |
585 | * | 628 | * |
586 | * The arch is responsible of calling: | 629 | * The arch is responsible of calling: |
587 | * | 630 | * |
588 | * - rcu_idle_enter() after its last use of RCU before the CPU is put | 631 | * - rcu_idle_enter() after its last use of RCU before the CPU is put |
589 | * to sleep. | 632 | * to sleep. |
590 | * - rcu_idle_exit() before the first use of RCU after the CPU is woken up. | 633 | * - rcu_idle_exit() before the first use of RCU after the CPU is woken up. |
591 | */ | 634 | */ |
592 | void tick_nohz_idle_enter(void) | 635 | void tick_nohz_idle_enter(void) |
593 | { | 636 | { |
594 | struct tick_sched *ts; | 637 | struct tick_sched *ts; |
595 | 638 | ||
596 | WARN_ON_ONCE(irqs_disabled()); | 639 | WARN_ON_ONCE(irqs_disabled()); |
597 | 640 | ||
598 | /* | 641 | /* |
599 | * Update the idle state in the scheduler domain hierarchy | 642 | * Update the idle state in the scheduler domain hierarchy |
600 | * when tick_nohz_stop_sched_tick() is called from the idle loop. | 643 | * when tick_nohz_stop_sched_tick() is called from the idle loop. |
601 | * State will be updated to busy during the first busy tick after | 644 | * State will be updated to busy during the first busy tick after |
602 | * exiting idle. | 645 | * exiting idle. |
603 | */ | 646 | */ |
604 | set_cpu_sd_state_idle(); | 647 | set_cpu_sd_state_idle(); |
605 | 648 | ||
606 | local_irq_disable(); | 649 | local_irq_disable(); |
607 | 650 | ||
608 | ts = &__get_cpu_var(tick_cpu_sched); | 651 | ts = &__get_cpu_var(tick_cpu_sched); |
609 | /* | 652 | /* |
610 | * set ts->inidle unconditionally. even if the system did not | 653 | * set ts->inidle unconditionally. even if the system did not |
611 | * switch to nohz mode the cpu frequency governers rely on the | 654 | * switch to nohz mode the cpu frequency governers rely on the |
612 | * update of the idle time accounting in tick_nohz_start_idle(). | 655 | * update of the idle time accounting in tick_nohz_start_idle(). |
613 | */ | 656 | */ |
614 | ts->inidle = 1; | 657 | ts->inidle = 1; |
615 | __tick_nohz_idle_enter(ts); | 658 | __tick_nohz_idle_enter(ts); |
616 | 659 | ||
617 | local_irq_enable(); | 660 | local_irq_enable(); |
618 | } | 661 | } |
619 | EXPORT_SYMBOL_GPL(tick_nohz_idle_enter); | 662 | EXPORT_SYMBOL_GPL(tick_nohz_idle_enter); |
620 | 663 | ||
621 | /** | 664 | /** |
622 | * tick_nohz_irq_exit - update next tick event from interrupt exit | 665 | * tick_nohz_irq_exit - update next tick event from interrupt exit |
623 | * | 666 | * |
624 | * When an interrupt fires while we are idle and it doesn't cause | 667 | * When an interrupt fires while we are idle and it doesn't cause |
625 | * a reschedule, it may still add, modify or delete a timer, enqueue | 668 | * a reschedule, it may still add, modify or delete a timer, enqueue |
626 | * an RCU callback, etc... | 669 | * an RCU callback, etc... |
627 | * So we need to re-calculate and reprogram the next tick event. | 670 | * So we need to re-calculate and reprogram the next tick event. |
628 | */ | 671 | */ |
629 | void tick_nohz_irq_exit(void) | 672 | void tick_nohz_irq_exit(void) |
630 | { | 673 | { |
631 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 674 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
632 | 675 | ||
633 | if (!ts->inidle) | 676 | if (!ts->inidle) |
634 | return; | 677 | return; |
635 | 678 | ||
636 | /* Cancel the timer because CPU already waken up from the C-states*/ | 679 | /* Cancel the timer because CPU already waken up from the C-states*/ |
637 | menu_hrtimer_cancel(); | 680 | menu_hrtimer_cancel(); |
638 | __tick_nohz_idle_enter(ts); | 681 | __tick_nohz_idle_enter(ts); |
639 | } | 682 | } |
640 | 683 | ||
641 | /** | 684 | /** |
642 | * tick_nohz_get_sleep_length - return the length of the current sleep | 685 | * tick_nohz_get_sleep_length - return the length of the current sleep |
643 | * | 686 | * |
644 | * Called from power state control code with interrupts disabled | 687 | * Called from power state control code with interrupts disabled |
645 | */ | 688 | */ |
646 | ktime_t tick_nohz_get_sleep_length(void) | 689 | ktime_t tick_nohz_get_sleep_length(void) |
647 | { | 690 | { |
648 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 691 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
649 | 692 | ||
650 | return ts->sleep_length; | 693 | return ts->sleep_length; |
651 | } | 694 | } |
652 | 695 | ||
653 | static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) | 696 | static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) |
654 | { | 697 | { |
655 | hrtimer_cancel(&ts->sched_timer); | 698 | hrtimer_cancel(&ts->sched_timer); |
656 | hrtimer_set_expires(&ts->sched_timer, ts->last_tick); | 699 | hrtimer_set_expires(&ts->sched_timer, ts->last_tick); |
657 | 700 | ||
658 | while (1) { | 701 | while (1) { |
659 | /* Forward the time to expire in the future */ | 702 | /* Forward the time to expire in the future */ |
660 | hrtimer_forward(&ts->sched_timer, now, tick_period); | 703 | hrtimer_forward(&ts->sched_timer, now, tick_period); |
661 | 704 | ||
662 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { | 705 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { |
663 | hrtimer_start_expires(&ts->sched_timer, | 706 | hrtimer_start_expires(&ts->sched_timer, |
664 | HRTIMER_MODE_ABS_PINNED); | 707 | HRTIMER_MODE_ABS_PINNED); |
665 | /* Check, if the timer was already in the past */ | 708 | /* Check, if the timer was already in the past */ |
666 | if (hrtimer_active(&ts->sched_timer)) | 709 | if (hrtimer_active(&ts->sched_timer)) |
667 | break; | 710 | break; |
668 | } else { | 711 | } else { |
669 | if (!tick_program_event( | 712 | if (!tick_program_event( |
670 | hrtimer_get_expires(&ts->sched_timer), 0)) | 713 | hrtimer_get_expires(&ts->sched_timer), 0)) |
671 | break; | 714 | break; |
672 | } | 715 | } |
673 | /* Reread time and update jiffies */ | 716 | /* Reread time and update jiffies */ |
674 | now = ktime_get(); | 717 | now = ktime_get(); |
675 | tick_do_update_jiffies64(now); | 718 | tick_do_update_jiffies64(now); |
676 | } | 719 | } |
677 | } | 720 | } |
678 | 721 | ||
679 | static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) | 722 | static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) |
680 | { | 723 | { |
681 | /* Update jiffies first */ | 724 | /* Update jiffies first */ |
682 | tick_do_update_jiffies64(now); | 725 | tick_do_update_jiffies64(now); |
683 | update_cpu_load_nohz(); | 726 | update_cpu_load_nohz(); |
684 | 727 | ||
685 | calc_load_exit_idle(); | 728 | calc_load_exit_idle(); |
686 | touch_softlockup_watchdog(); | 729 | touch_softlockup_watchdog(); |
687 | /* | 730 | /* |
688 | * Cancel the scheduled timer and restore the tick | 731 | * Cancel the scheduled timer and restore the tick |
689 | */ | 732 | */ |
690 | ts->tick_stopped = 0; | 733 | ts->tick_stopped = 0; |
691 | ts->idle_exittime = now; | 734 | ts->idle_exittime = now; |
692 | 735 | ||
693 | tick_nohz_restart(ts, now); | 736 | tick_nohz_restart(ts, now); |
694 | } | 737 | } |
695 | 738 | ||
696 | static void tick_nohz_account_idle_ticks(struct tick_sched *ts) | 739 | static void tick_nohz_account_idle_ticks(struct tick_sched *ts) |
697 | { | 740 | { |
698 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE | 741 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE |
699 | unsigned long ticks; | 742 | unsigned long ticks; |
700 | 743 | ||
701 | if (vtime_accounting_enabled()) | 744 | if (vtime_accounting_enabled()) |
702 | return; | 745 | return; |
703 | /* | 746 | /* |
704 | * We stopped the tick in idle. Update process times would miss the | 747 | * We stopped the tick in idle. Update process times would miss the |
705 | * time we slept as update_process_times does only a 1 tick | 748 | * time we slept as update_process_times does only a 1 tick |
706 | * accounting. Enforce that this is accounted to idle ! | 749 | * accounting. Enforce that this is accounted to idle ! |
707 | */ | 750 | */ |
708 | ticks = jiffies - ts->idle_jiffies; | 751 | ticks = jiffies - ts->idle_jiffies; |
709 | /* | 752 | /* |
710 | * We might be one off. Do not randomly account a huge number of ticks! | 753 | * We might be one off. Do not randomly account a huge number of ticks! |
711 | */ | 754 | */ |
712 | if (ticks && ticks < LONG_MAX) | 755 | if (ticks && ticks < LONG_MAX) |
713 | account_idle_ticks(ticks); | 756 | account_idle_ticks(ticks); |
714 | #endif | 757 | #endif |
715 | } | 758 | } |
716 | 759 | ||
717 | /** | 760 | /** |
718 | * tick_nohz_idle_exit - restart the idle tick from the idle task | 761 | * tick_nohz_idle_exit - restart the idle tick from the idle task |
719 | * | 762 | * |
720 | * Restart the idle tick when the CPU is woken up from idle | 763 | * Restart the idle tick when the CPU is woken up from idle |
721 | * This also exit the RCU extended quiescent state. The CPU | 764 | * This also exit the RCU extended quiescent state. The CPU |
722 | * can use RCU again after this function is called. | 765 | * can use RCU again after this function is called. |
723 | */ | 766 | */ |
724 | void tick_nohz_idle_exit(void) | 767 | void tick_nohz_idle_exit(void) |
725 | { | 768 | { |
726 | int cpu = smp_processor_id(); | 769 | int cpu = smp_processor_id(); |
727 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 770 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
728 | ktime_t now; | 771 | ktime_t now; |
729 | 772 | ||
730 | local_irq_disable(); | 773 | local_irq_disable(); |
731 | 774 | ||
732 | WARN_ON_ONCE(!ts->inidle); | 775 | WARN_ON_ONCE(!ts->inidle); |
733 | 776 | ||
734 | ts->inidle = 0; | 777 | ts->inidle = 0; |
735 | 778 | ||
736 | /* Cancel the timer because CPU already waken up from the C-states*/ | 779 | /* Cancel the timer because CPU already waken up from the C-states*/ |
737 | menu_hrtimer_cancel(); | 780 | menu_hrtimer_cancel(); |
738 | if (ts->idle_active || ts->tick_stopped) | 781 | if (ts->idle_active || ts->tick_stopped) |
739 | now = ktime_get(); | 782 | now = ktime_get(); |
740 | 783 | ||
741 | if (ts->idle_active) | 784 | if (ts->idle_active) |
742 | tick_nohz_stop_idle(cpu, now); | 785 | tick_nohz_stop_idle(cpu, now); |
743 | 786 | ||
744 | if (ts->tick_stopped) { | 787 | if (ts->tick_stopped) { |
745 | tick_nohz_restart_sched_tick(ts, now); | 788 | tick_nohz_restart_sched_tick(ts, now); |
746 | tick_nohz_account_idle_ticks(ts); | 789 | tick_nohz_account_idle_ticks(ts); |
747 | } | 790 | } |
748 | 791 | ||
749 | local_irq_enable(); | 792 | local_irq_enable(); |
750 | } | 793 | } |
751 | EXPORT_SYMBOL_GPL(tick_nohz_idle_exit); | 794 | EXPORT_SYMBOL_GPL(tick_nohz_idle_exit); |
752 | 795 | ||
753 | static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now) | 796 | static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now) |
754 | { | 797 | { |
755 | hrtimer_forward(&ts->sched_timer, now, tick_period); | 798 | hrtimer_forward(&ts->sched_timer, now, tick_period); |
756 | return tick_program_event(hrtimer_get_expires(&ts->sched_timer), 0); | 799 | return tick_program_event(hrtimer_get_expires(&ts->sched_timer), 0); |
757 | } | 800 | } |
758 | 801 | ||
759 | /* | 802 | /* |
760 | * The nohz low res interrupt handler | 803 | * The nohz low res interrupt handler |
761 | */ | 804 | */ |
762 | static void tick_nohz_handler(struct clock_event_device *dev) | 805 | static void tick_nohz_handler(struct clock_event_device *dev) |
763 | { | 806 | { |
764 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 807 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
765 | struct pt_regs *regs = get_irq_regs(); | 808 | struct pt_regs *regs = get_irq_regs(); |
766 | ktime_t now = ktime_get(); | 809 | ktime_t now = ktime_get(); |
767 | 810 | ||
768 | dev->next_event.tv64 = KTIME_MAX; | 811 | dev->next_event.tv64 = KTIME_MAX; |
769 | 812 | ||
770 | tick_sched_do_timer(now); | 813 | tick_sched_do_timer(now); |
771 | tick_sched_handle(ts, regs); | 814 | tick_sched_handle(ts, regs); |
772 | 815 | ||
773 | while (tick_nohz_reprogram(ts, now)) { | 816 | while (tick_nohz_reprogram(ts, now)) { |
774 | now = ktime_get(); | 817 | now = ktime_get(); |
775 | tick_do_update_jiffies64(now); | 818 | tick_do_update_jiffies64(now); |
776 | } | 819 | } |
777 | } | 820 | } |
778 | 821 | ||
779 | /** | 822 | /** |
780 | * tick_nohz_switch_to_nohz - switch to nohz mode | 823 | * tick_nohz_switch_to_nohz - switch to nohz mode |
781 | */ | 824 | */ |
782 | static void tick_nohz_switch_to_nohz(void) | 825 | static void tick_nohz_switch_to_nohz(void) |
783 | { | 826 | { |
784 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 827 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
785 | ktime_t next; | 828 | ktime_t next; |
786 | 829 | ||
787 | if (!tick_nohz_enabled) | 830 | if (!tick_nohz_enabled) |
788 | return; | 831 | return; |
789 | 832 | ||
790 | local_irq_disable(); | 833 | local_irq_disable(); |
791 | if (tick_switch_to_oneshot(tick_nohz_handler)) { | 834 | if (tick_switch_to_oneshot(tick_nohz_handler)) { |
792 | local_irq_enable(); | 835 | local_irq_enable(); |
793 | return; | 836 | return; |
794 | } | 837 | } |
795 | 838 | ||
796 | ts->nohz_mode = NOHZ_MODE_LOWRES; | 839 | ts->nohz_mode = NOHZ_MODE_LOWRES; |
797 | 840 | ||
798 | /* | 841 | /* |
799 | * Recycle the hrtimer in ts, so we can share the | 842 | * Recycle the hrtimer in ts, so we can share the |
800 | * hrtimer_forward with the highres code. | 843 | * hrtimer_forward with the highres code. |
801 | */ | 844 | */ |
802 | hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | 845 | hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); |
803 | /* Get the next period */ | 846 | /* Get the next period */ |
804 | next = tick_init_jiffy_update(); | 847 | next = tick_init_jiffy_update(); |
805 | 848 | ||
806 | for (;;) { | 849 | for (;;) { |
807 | hrtimer_set_expires(&ts->sched_timer, next); | 850 | hrtimer_set_expires(&ts->sched_timer, next); |
808 | if (!tick_program_event(next, 0)) | 851 | if (!tick_program_event(next, 0)) |
809 | break; | 852 | break; |
810 | next = ktime_add(next, tick_period); | 853 | next = ktime_add(next, tick_period); |
811 | } | 854 | } |
812 | local_irq_enable(); | 855 | local_irq_enable(); |
813 | } | 856 | } |
814 | 857 | ||
815 | /* | 858 | /* |
816 | * When NOHZ is enabled and the tick is stopped, we need to kick the | 859 | * When NOHZ is enabled and the tick is stopped, we need to kick the |
817 | * tick timer from irq_enter() so that the jiffies update is kept | 860 | * tick timer from irq_enter() so that the jiffies update is kept |
818 | * alive during long running softirqs. That's ugly as hell, but | 861 | * alive during long running softirqs. That's ugly as hell, but |
819 | * correctness is key even if we need to fix the offending softirq in | 862 | * correctness is key even if we need to fix the offending softirq in |
820 | * the first place. | 863 | * the first place. |
821 | * | 864 | * |
822 | * Note, this is different to tick_nohz_restart. We just kick the | 865 | * Note, this is different to tick_nohz_restart. We just kick the |
823 | * timer and do not touch the other magic bits which need to be done | 866 | * timer and do not touch the other magic bits which need to be done |
824 | * when idle is left. | 867 | * when idle is left. |
825 | */ | 868 | */ |
826 | static void tick_nohz_kick_tick(int cpu, ktime_t now) | 869 | static void tick_nohz_kick_tick(int cpu, ktime_t now) |
827 | { | 870 | { |
828 | #if 0 | 871 | #if 0 |
829 | /* Switch back to 2.6.27 behaviour */ | 872 | /* Switch back to 2.6.27 behaviour */ |
830 | 873 | ||
831 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 874 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
832 | ktime_t delta; | 875 | ktime_t delta; |
833 | 876 | ||
834 | /* | 877 | /* |
835 | * Do not touch the tick device, when the next expiry is either | 878 | * Do not touch the tick device, when the next expiry is either |
836 | * already reached or less/equal than the tick period. | 879 | * already reached or less/equal than the tick period. |
837 | */ | 880 | */ |
838 | delta = ktime_sub(hrtimer_get_expires(&ts->sched_timer), now); | 881 | delta = ktime_sub(hrtimer_get_expires(&ts->sched_timer), now); |
839 | if (delta.tv64 <= tick_period.tv64) | 882 | if (delta.tv64 <= tick_period.tv64) |
840 | return; | 883 | return; |
841 | 884 | ||
842 | tick_nohz_restart(ts, now); | 885 | tick_nohz_restart(ts, now); |
843 | #endif | 886 | #endif |
844 | } | 887 | } |
845 | 888 | ||
846 | static inline void tick_check_nohz(int cpu) | 889 | static inline void tick_check_nohz(int cpu) |
847 | { | 890 | { |
848 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 891 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
849 | ktime_t now; | 892 | ktime_t now; |
850 | 893 | ||
851 | if (!ts->idle_active && !ts->tick_stopped) | 894 | if (!ts->idle_active && !ts->tick_stopped) |
852 | return; | 895 | return; |
853 | now = ktime_get(); | 896 | now = ktime_get(); |
854 | if (ts->idle_active) | 897 | if (ts->idle_active) |
855 | tick_nohz_stop_idle(cpu, now); | 898 | tick_nohz_stop_idle(cpu, now); |
856 | if (ts->tick_stopped) { | 899 | if (ts->tick_stopped) { |
857 | tick_nohz_update_jiffies(now); | 900 | tick_nohz_update_jiffies(now); |
858 | tick_nohz_kick_tick(cpu, now); | 901 | tick_nohz_kick_tick(cpu, now); |
859 | } | 902 | } |
860 | } | 903 | } |
861 | 904 | ||
862 | #else | 905 | #else |
863 | 906 | ||
864 | static inline void tick_nohz_switch_to_nohz(void) { } | 907 | static inline void tick_nohz_switch_to_nohz(void) { } |
865 | static inline void tick_check_nohz(int cpu) { } | 908 | static inline void tick_check_nohz(int cpu) { } |
866 | 909 | ||
867 | #endif /* NO_HZ */ | 910 | #endif /* NO_HZ */ |
868 | 911 | ||
869 | /* | 912 | /* |
870 | * Called from irq_enter to notify about the possible interruption of idle() | 913 | * Called from irq_enter to notify about the possible interruption of idle() |
871 | */ | 914 | */ |
872 | void tick_check_idle(int cpu) | 915 | void tick_check_idle(int cpu) |
873 | { | 916 | { |
874 | tick_check_oneshot_broadcast(cpu); | 917 | tick_check_oneshot_broadcast(cpu); |
875 | tick_check_nohz(cpu); | 918 | tick_check_nohz(cpu); |
876 | } | 919 | } |
877 | 920 | ||
878 | /* | 921 | /* |
879 | * High resolution timer specific code | 922 | * High resolution timer specific code |
880 | */ | 923 | */ |
881 | #ifdef CONFIG_HIGH_RES_TIMERS | 924 | #ifdef CONFIG_HIGH_RES_TIMERS |
882 | /* | 925 | /* |
883 | * We rearm the timer until we get disabled by the idle code. | 926 | * We rearm the timer until we get disabled by the idle code. |
884 | * Called with interrupts disabled. | 927 | * Called with interrupts disabled. |
885 | */ | 928 | */ |
886 | static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) | 929 | static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) |
887 | { | 930 | { |
888 | struct tick_sched *ts = | 931 | struct tick_sched *ts = |
889 | container_of(timer, struct tick_sched, sched_timer); | 932 | container_of(timer, struct tick_sched, sched_timer); |
890 | struct pt_regs *regs = get_irq_regs(); | 933 | struct pt_regs *regs = get_irq_regs(); |
891 | ktime_t now = ktime_get(); | 934 | ktime_t now = ktime_get(); |
892 | 935 | ||
893 | tick_sched_do_timer(now); | 936 | tick_sched_do_timer(now); |
894 | 937 | ||
895 | /* | 938 | /* |
896 | * Do not call, when we are not in irq context and have | 939 | * Do not call, when we are not in irq context and have |
897 | * no valid regs pointer | 940 | * no valid regs pointer |
898 | */ | 941 | */ |
899 | if (regs) | 942 | if (regs) |
900 | tick_sched_handle(ts, regs); | 943 | tick_sched_handle(ts, regs); |
901 | 944 | ||
902 | hrtimer_forward(timer, now, tick_period); | 945 | hrtimer_forward(timer, now, tick_period); |
903 | 946 | ||
904 | return HRTIMER_RESTART; | 947 | return HRTIMER_RESTART; |
905 | } | 948 | } |
906 | 949 | ||
907 | static int sched_skew_tick; | 950 | static int sched_skew_tick; |
908 | 951 | ||
909 | static int __init skew_tick(char *str) | 952 | static int __init skew_tick(char *str) |
910 | { | 953 | { |
911 | get_option(&str, &sched_skew_tick); | 954 | get_option(&str, &sched_skew_tick); |
912 | 955 | ||
913 | return 0; | 956 | return 0; |
914 | } | 957 | } |
915 | early_param("skew_tick", skew_tick); | 958 | early_param("skew_tick", skew_tick); |
916 | 959 | ||
917 | /** | 960 | /** |
918 | * tick_setup_sched_timer - setup the tick emulation timer | 961 | * tick_setup_sched_timer - setup the tick emulation timer |
919 | */ | 962 | */ |
920 | void tick_setup_sched_timer(void) | 963 | void tick_setup_sched_timer(void) |
921 | { | 964 | { |
922 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 965 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
923 | ktime_t now = ktime_get(); | 966 | ktime_t now = ktime_get(); |
924 | 967 | ||
925 | /* | 968 | /* |
926 | * Emulate tick processing via per-CPU hrtimers: | 969 | * Emulate tick processing via per-CPU hrtimers: |
927 | */ | 970 | */ |
928 | hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | 971 | hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); |
929 | ts->sched_timer.function = tick_sched_timer; | 972 | ts->sched_timer.function = tick_sched_timer; |
930 | 973 | ||
931 | /* Get the next period (per cpu) */ | 974 | /* Get the next period (per cpu) */ |
932 | hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); | 975 | hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); |
933 | 976 | ||
934 | /* Offset the tick to avert jiffies_lock contention. */ | 977 | /* Offset the tick to avert jiffies_lock contention. */ |
935 | if (sched_skew_tick) { | 978 | if (sched_skew_tick) { |
936 | u64 offset = ktime_to_ns(tick_period) >> 1; | 979 | u64 offset = ktime_to_ns(tick_period) >> 1; |
937 | do_div(offset, num_possible_cpus()); | 980 | do_div(offset, num_possible_cpus()); |
938 | offset *= smp_processor_id(); | 981 | offset *= smp_processor_id(); |
939 | hrtimer_add_expires_ns(&ts->sched_timer, offset); | 982 | hrtimer_add_expires_ns(&ts->sched_timer, offset); |
940 | } | 983 | } |
941 | 984 | ||
942 | for (;;) { | 985 | for (;;) { |
943 | hrtimer_forward(&ts->sched_timer, now, tick_period); | 986 | hrtimer_forward(&ts->sched_timer, now, tick_period); |
944 | hrtimer_start_expires(&ts->sched_timer, | 987 | hrtimer_start_expires(&ts->sched_timer, |
945 | HRTIMER_MODE_ABS_PINNED); | 988 | HRTIMER_MODE_ABS_PINNED); |
946 | /* Check, if the timer was already in the past */ | 989 | /* Check, if the timer was already in the past */ |
947 | if (hrtimer_active(&ts->sched_timer)) | 990 | if (hrtimer_active(&ts->sched_timer)) |
948 | break; | 991 | break; |
949 | now = ktime_get(); | 992 | now = ktime_get(); |
950 | } | 993 | } |
951 | 994 | ||
952 | #ifdef CONFIG_NO_HZ | 995 | #ifdef CONFIG_NO_HZ |
953 | if (tick_nohz_enabled) | 996 | if (tick_nohz_enabled) |
954 | ts->nohz_mode = NOHZ_MODE_HIGHRES; | 997 | ts->nohz_mode = NOHZ_MODE_HIGHRES; |
955 | #endif | 998 | #endif |
956 | } | 999 | } |
957 | #endif /* HIGH_RES_TIMERS */ | 1000 | #endif /* HIGH_RES_TIMERS */ |
958 | 1001 | ||
959 | #if defined CONFIG_NO_HZ || defined CONFIG_HIGH_RES_TIMERS | 1002 | #if defined CONFIG_NO_HZ || defined CONFIG_HIGH_RES_TIMERS |
960 | void tick_cancel_sched_timer(int cpu) | 1003 | void tick_cancel_sched_timer(int cpu) |
961 | { | 1004 | { |
962 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 1005 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
963 | 1006 | ||
964 | # ifdef CONFIG_HIGH_RES_TIMERS | 1007 | # ifdef CONFIG_HIGH_RES_TIMERS |
965 | if (ts->sched_timer.base) | 1008 | if (ts->sched_timer.base) |
966 | hrtimer_cancel(&ts->sched_timer); | 1009 | hrtimer_cancel(&ts->sched_timer); |
967 | # endif | 1010 | # endif |
968 | 1011 | ||
969 | ts->nohz_mode = NOHZ_MODE_INACTIVE; | 1012 | ts->nohz_mode = NOHZ_MODE_INACTIVE; |
970 | } | 1013 | } |
971 | #endif | 1014 | #endif |
972 | 1015 | ||
973 | /** | 1016 | /** |
974 | * Async notification about clocksource changes | 1017 | * Async notification about clocksource changes |
975 | */ | 1018 | */ |
976 | void tick_clock_notify(void) | 1019 | void tick_clock_notify(void) |
977 | { | 1020 | { |
978 | int cpu; | 1021 | int cpu; |
979 | 1022 | ||
980 | for_each_possible_cpu(cpu) | 1023 | for_each_possible_cpu(cpu) |
981 | set_bit(0, &per_cpu(tick_cpu_sched, cpu).check_clocks); | 1024 | set_bit(0, &per_cpu(tick_cpu_sched, cpu).check_clocks); |
982 | } | 1025 | } |
983 | 1026 | ||
984 | /* | 1027 | /* |
985 | * Async notification about clock event changes | 1028 | * Async notification about clock event changes |
986 | */ | 1029 | */ |
987 | void tick_oneshot_notify(void) | 1030 | void tick_oneshot_notify(void) |
988 | { | 1031 | { |
989 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 1032 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
990 | 1033 | ||
991 | set_bit(0, &ts->check_clocks); | 1034 | set_bit(0, &ts->check_clocks); |
992 | } | 1035 | } |
993 | 1036 | ||
994 | /** | 1037 | /** |
995 | * Check, if a change happened, which makes oneshot possible. | 1038 | * Check, if a change happened, which makes oneshot possible. |
996 | * | 1039 | * |
997 | * Called cyclic from the hrtimer softirq (driven by the timer | 1040 | * Called cyclic from the hrtimer softirq (driven by the timer |
998 | * softirq) allow_nohz signals, that we can switch into low-res nohz | 1041 | * softirq) allow_nohz signals, that we can switch into low-res nohz |
999 | * mode, because high resolution timers are disabled (either compile | 1042 | * mode, because high resolution timers are disabled (either compile |
1000 | * or runtime). | 1043 | * or runtime). |
1001 | */ | 1044 | */ |
1002 | int tick_check_oneshot_change(int allow_nohz) | 1045 | int tick_check_oneshot_change(int allow_nohz) |
1003 | { | 1046 | { |
1004 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 1047 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
1005 | 1048 | ||
1006 | if (!test_and_clear_bit(0, &ts->check_clocks)) | 1049 | if (!test_and_clear_bit(0, &ts->check_clocks)) |
1007 | return 0; | 1050 | return 0; |
1008 | 1051 | ||
1009 | if (ts->nohz_mode != NOHZ_MODE_INACTIVE) | 1052 | if (ts->nohz_mode != NOHZ_MODE_INACTIVE) |
1010 | return 0; | 1053 | return 0; |
1011 | 1054 | ||
1012 | if (!timekeeping_valid_for_hres() || !tick_is_oneshot_available()) | 1055 | if (!timekeeping_valid_for_hres() || !tick_is_oneshot_available()) |
1013 | return 0; | 1056 | return 0; |
1014 | 1057 | ||
1015 | if (!allow_nohz) | 1058 | if (!allow_nohz) |
1016 | return 1; | 1059 | return 1; |
1017 | 1060 | ||
1018 | tick_nohz_switch_to_nohz(); | 1061 | tick_nohz_switch_to_nohz(); |
1019 | return 0; | 1062 | return 0; |
1020 | } | 1063 | } |