Commit cb41a29076e9f95547da46578d5c8804f7b8845d

Authored by Frederic Weisbecker
1 parent 0637e02939

nohz: Add basic tracing

It's not obvious to find out why the full dynticks subsystem
doesn't always stop the tick: whether this is due to kthreads,
posix timers, perf events, etc...

These new tracepoints are here to help the user diagnose
the failures and test this feature.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Geoff Levand <geoff@infradead.org>
Cc: Gilad Ben Yossef <gilad@benyossef.com>
Cc: Hakan Akkan <hakanakkan@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Kevin Hilman <khilman@linaro.org>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>

Showing 2 changed files with 36 additions and 4 deletions Inline Diff

include/trace/events/timer.h
1 #undef TRACE_SYSTEM 1 #undef TRACE_SYSTEM
2 #define TRACE_SYSTEM timer 2 #define TRACE_SYSTEM timer
3 3
4 #if !defined(_TRACE_TIMER_H) || defined(TRACE_HEADER_MULTI_READ) 4 #if !defined(_TRACE_TIMER_H) || defined(TRACE_HEADER_MULTI_READ)
5 #define _TRACE_TIMER_H 5 #define _TRACE_TIMER_H
6 6
7 #include <linux/tracepoint.h> 7 #include <linux/tracepoint.h>
8 #include <linux/hrtimer.h> 8 #include <linux/hrtimer.h>
9 #include <linux/timer.h> 9 #include <linux/timer.h>
10 10
11 DECLARE_EVENT_CLASS(timer_class, 11 DECLARE_EVENT_CLASS(timer_class,
12 12
13 TP_PROTO(struct timer_list *timer), 13 TP_PROTO(struct timer_list *timer),
14 14
15 TP_ARGS(timer), 15 TP_ARGS(timer),
16 16
17 TP_STRUCT__entry( 17 TP_STRUCT__entry(
18 __field( void *, timer ) 18 __field( void *, timer )
19 ), 19 ),
20 20
21 TP_fast_assign( 21 TP_fast_assign(
22 __entry->timer = timer; 22 __entry->timer = timer;
23 ), 23 ),
24 24
25 TP_printk("timer=%p", __entry->timer) 25 TP_printk("timer=%p", __entry->timer)
26 ); 26 );
27 27
28 /** 28 /**
29 * timer_init - called when the timer is initialized 29 * timer_init - called when the timer is initialized
30 * @timer: pointer to struct timer_list 30 * @timer: pointer to struct timer_list
31 */ 31 */
32 DEFINE_EVENT(timer_class, timer_init, 32 DEFINE_EVENT(timer_class, timer_init,
33 33
34 TP_PROTO(struct timer_list *timer), 34 TP_PROTO(struct timer_list *timer),
35 35
36 TP_ARGS(timer) 36 TP_ARGS(timer)
37 ); 37 );
38 38
39 /** 39 /**
40 * timer_start - called when the timer is started 40 * timer_start - called when the timer is started
41 * @timer: pointer to struct timer_list 41 * @timer: pointer to struct timer_list
42 * @expires: the timers expiry time 42 * @expires: the timers expiry time
43 */ 43 */
44 TRACE_EVENT(timer_start, 44 TRACE_EVENT(timer_start,
45 45
46 TP_PROTO(struct timer_list *timer, unsigned long expires), 46 TP_PROTO(struct timer_list *timer, unsigned long expires),
47 47
48 TP_ARGS(timer, expires), 48 TP_ARGS(timer, expires),
49 49
50 TP_STRUCT__entry( 50 TP_STRUCT__entry(
51 __field( void *, timer ) 51 __field( void *, timer )
52 __field( void *, function ) 52 __field( void *, function )
53 __field( unsigned long, expires ) 53 __field( unsigned long, expires )
54 __field( unsigned long, now ) 54 __field( unsigned long, now )
55 ), 55 ),
56 56
57 TP_fast_assign( 57 TP_fast_assign(
58 __entry->timer = timer; 58 __entry->timer = timer;
59 __entry->function = timer->function; 59 __entry->function = timer->function;
60 __entry->expires = expires; 60 __entry->expires = expires;
61 __entry->now = jiffies; 61 __entry->now = jiffies;
62 ), 62 ),
63 63
64 TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld]", 64 TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld]",
65 __entry->timer, __entry->function, __entry->expires, 65 __entry->timer, __entry->function, __entry->expires,
66 (long)__entry->expires - __entry->now) 66 (long)__entry->expires - __entry->now)
67 ); 67 );
68 68
69 /** 69 /**
70 * timer_expire_entry - called immediately before the timer callback 70 * timer_expire_entry - called immediately before the timer callback
71 * @timer: pointer to struct timer_list 71 * @timer: pointer to struct timer_list
72 * 72 *
73 * Allows to determine the timer latency. 73 * Allows to determine the timer latency.
74 */ 74 */
75 TRACE_EVENT(timer_expire_entry, 75 TRACE_EVENT(timer_expire_entry,
76 76
77 TP_PROTO(struct timer_list *timer), 77 TP_PROTO(struct timer_list *timer),
78 78
79 TP_ARGS(timer), 79 TP_ARGS(timer),
80 80
81 TP_STRUCT__entry( 81 TP_STRUCT__entry(
82 __field( void *, timer ) 82 __field( void *, timer )
83 __field( unsigned long, now ) 83 __field( unsigned long, now )
84 __field( void *, function) 84 __field( void *, function)
85 ), 85 ),
86 86
87 TP_fast_assign( 87 TP_fast_assign(
88 __entry->timer = timer; 88 __entry->timer = timer;
89 __entry->now = jiffies; 89 __entry->now = jiffies;
90 __entry->function = timer->function; 90 __entry->function = timer->function;
91 ), 91 ),
92 92
93 TP_printk("timer=%p function=%pf now=%lu", __entry->timer, __entry->function,__entry->now) 93 TP_printk("timer=%p function=%pf now=%lu", __entry->timer, __entry->function,__entry->now)
94 ); 94 );
95 95
96 /** 96 /**
97 * timer_expire_exit - called immediately after the timer callback returns 97 * timer_expire_exit - called immediately after the timer callback returns
98 * @timer: pointer to struct timer_list 98 * @timer: pointer to struct timer_list
99 * 99 *
100 * When used in combination with the timer_expire_entry tracepoint we can 100 * When used in combination with the timer_expire_entry tracepoint we can
101 * determine the runtime of the timer callback function. 101 * determine the runtime of the timer callback function.
102 * 102 *
103 * NOTE: Do NOT derefernce timer in TP_fast_assign. The pointer might 103 * NOTE: Do NOT derefernce timer in TP_fast_assign. The pointer might
104 * be invalid. We solely track the pointer. 104 * be invalid. We solely track the pointer.
105 */ 105 */
106 DEFINE_EVENT(timer_class, timer_expire_exit, 106 DEFINE_EVENT(timer_class, timer_expire_exit,
107 107
108 TP_PROTO(struct timer_list *timer), 108 TP_PROTO(struct timer_list *timer),
109 109
110 TP_ARGS(timer) 110 TP_ARGS(timer)
111 ); 111 );
112 112
113 /** 113 /**
114 * timer_cancel - called when the timer is canceled 114 * timer_cancel - called when the timer is canceled
115 * @timer: pointer to struct timer_list 115 * @timer: pointer to struct timer_list
116 */ 116 */
117 DEFINE_EVENT(timer_class, timer_cancel, 117 DEFINE_EVENT(timer_class, timer_cancel,
118 118
119 TP_PROTO(struct timer_list *timer), 119 TP_PROTO(struct timer_list *timer),
120 120
121 TP_ARGS(timer) 121 TP_ARGS(timer)
122 ); 122 );
123 123
124 /** 124 /**
125 * hrtimer_init - called when the hrtimer is initialized 125 * hrtimer_init - called when the hrtimer is initialized
126 * @timer: pointer to struct hrtimer 126 * @timer: pointer to struct hrtimer
127 * @clockid: the hrtimers clock 127 * @clockid: the hrtimers clock
128 * @mode: the hrtimers mode 128 * @mode: the hrtimers mode
129 */ 129 */
130 TRACE_EVENT(hrtimer_init, 130 TRACE_EVENT(hrtimer_init,
131 131
132 TP_PROTO(struct hrtimer *hrtimer, clockid_t clockid, 132 TP_PROTO(struct hrtimer *hrtimer, clockid_t clockid,
133 enum hrtimer_mode mode), 133 enum hrtimer_mode mode),
134 134
135 TP_ARGS(hrtimer, clockid, mode), 135 TP_ARGS(hrtimer, clockid, mode),
136 136
137 TP_STRUCT__entry( 137 TP_STRUCT__entry(
138 __field( void *, hrtimer ) 138 __field( void *, hrtimer )
139 __field( clockid_t, clockid ) 139 __field( clockid_t, clockid )
140 __field( enum hrtimer_mode, mode ) 140 __field( enum hrtimer_mode, mode )
141 ), 141 ),
142 142
143 TP_fast_assign( 143 TP_fast_assign(
144 __entry->hrtimer = hrtimer; 144 __entry->hrtimer = hrtimer;
145 __entry->clockid = clockid; 145 __entry->clockid = clockid;
146 __entry->mode = mode; 146 __entry->mode = mode;
147 ), 147 ),
148 148
149 TP_printk("hrtimer=%p clockid=%s mode=%s", __entry->hrtimer, 149 TP_printk("hrtimer=%p clockid=%s mode=%s", __entry->hrtimer,
150 __entry->clockid == CLOCK_REALTIME ? 150 __entry->clockid == CLOCK_REALTIME ?
151 "CLOCK_REALTIME" : "CLOCK_MONOTONIC", 151 "CLOCK_REALTIME" : "CLOCK_MONOTONIC",
152 __entry->mode == HRTIMER_MODE_ABS ? 152 __entry->mode == HRTIMER_MODE_ABS ?
153 "HRTIMER_MODE_ABS" : "HRTIMER_MODE_REL") 153 "HRTIMER_MODE_ABS" : "HRTIMER_MODE_REL")
154 ); 154 );
155 155
156 /** 156 /**
157 * hrtimer_start - called when the hrtimer is started 157 * hrtimer_start - called when the hrtimer is started
158 * @timer: pointer to struct hrtimer 158 * @timer: pointer to struct hrtimer
159 */ 159 */
160 TRACE_EVENT(hrtimer_start, 160 TRACE_EVENT(hrtimer_start,
161 161
162 TP_PROTO(struct hrtimer *hrtimer), 162 TP_PROTO(struct hrtimer *hrtimer),
163 163
164 TP_ARGS(hrtimer), 164 TP_ARGS(hrtimer),
165 165
166 TP_STRUCT__entry( 166 TP_STRUCT__entry(
167 __field( void *, hrtimer ) 167 __field( void *, hrtimer )
168 __field( void *, function ) 168 __field( void *, function )
169 __field( s64, expires ) 169 __field( s64, expires )
170 __field( s64, softexpires ) 170 __field( s64, softexpires )
171 ), 171 ),
172 172
173 TP_fast_assign( 173 TP_fast_assign(
174 __entry->hrtimer = hrtimer; 174 __entry->hrtimer = hrtimer;
175 __entry->function = hrtimer->function; 175 __entry->function = hrtimer->function;
176 __entry->expires = hrtimer_get_expires(hrtimer).tv64; 176 __entry->expires = hrtimer_get_expires(hrtimer).tv64;
177 __entry->softexpires = hrtimer_get_softexpires(hrtimer).tv64; 177 __entry->softexpires = hrtimer_get_softexpires(hrtimer).tv64;
178 ), 178 ),
179 179
180 TP_printk("hrtimer=%p function=%pf expires=%llu softexpires=%llu", 180 TP_printk("hrtimer=%p function=%pf expires=%llu softexpires=%llu",
181 __entry->hrtimer, __entry->function, 181 __entry->hrtimer, __entry->function,
182 (unsigned long long)ktime_to_ns((ktime_t) { 182 (unsigned long long)ktime_to_ns((ktime_t) {
183 .tv64 = __entry->expires }), 183 .tv64 = __entry->expires }),
184 (unsigned long long)ktime_to_ns((ktime_t) { 184 (unsigned long long)ktime_to_ns((ktime_t) {
185 .tv64 = __entry->softexpires })) 185 .tv64 = __entry->softexpires }))
186 ); 186 );
187 187
188 /** 188 /**
189 * htimmer_expire_entry - called immediately before the hrtimer callback 189 * htimmer_expire_entry - called immediately before the hrtimer callback
190 * @timer: pointer to struct hrtimer 190 * @timer: pointer to struct hrtimer
191 * @now: pointer to variable which contains current time of the 191 * @now: pointer to variable which contains current time of the
192 * timers base. 192 * timers base.
193 * 193 *
194 * Allows to determine the timer latency. 194 * Allows to determine the timer latency.
195 */ 195 */
196 TRACE_EVENT(hrtimer_expire_entry, 196 TRACE_EVENT(hrtimer_expire_entry,
197 197
198 TP_PROTO(struct hrtimer *hrtimer, ktime_t *now), 198 TP_PROTO(struct hrtimer *hrtimer, ktime_t *now),
199 199
200 TP_ARGS(hrtimer, now), 200 TP_ARGS(hrtimer, now),
201 201
202 TP_STRUCT__entry( 202 TP_STRUCT__entry(
203 __field( void *, hrtimer ) 203 __field( void *, hrtimer )
204 __field( s64, now ) 204 __field( s64, now )
205 __field( void *, function) 205 __field( void *, function)
206 ), 206 ),
207 207
208 TP_fast_assign( 208 TP_fast_assign(
209 __entry->hrtimer = hrtimer; 209 __entry->hrtimer = hrtimer;
210 __entry->now = now->tv64; 210 __entry->now = now->tv64;
211 __entry->function = hrtimer->function; 211 __entry->function = hrtimer->function;
212 ), 212 ),
213 213
214 TP_printk("hrtimer=%p function=%pf now=%llu", __entry->hrtimer, __entry->function, 214 TP_printk("hrtimer=%p function=%pf now=%llu", __entry->hrtimer, __entry->function,
215 (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->now })) 215 (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->now }))
216 ); 216 );
217 217
218 DECLARE_EVENT_CLASS(hrtimer_class, 218 DECLARE_EVENT_CLASS(hrtimer_class,
219 219
220 TP_PROTO(struct hrtimer *hrtimer), 220 TP_PROTO(struct hrtimer *hrtimer),
221 221
222 TP_ARGS(hrtimer), 222 TP_ARGS(hrtimer),
223 223
224 TP_STRUCT__entry( 224 TP_STRUCT__entry(
225 __field( void *, hrtimer ) 225 __field( void *, hrtimer )
226 ), 226 ),
227 227
228 TP_fast_assign( 228 TP_fast_assign(
229 __entry->hrtimer = hrtimer; 229 __entry->hrtimer = hrtimer;
230 ), 230 ),
231 231
232 TP_printk("hrtimer=%p", __entry->hrtimer) 232 TP_printk("hrtimer=%p", __entry->hrtimer)
233 ); 233 );
234 234
235 /** 235 /**
236 * hrtimer_expire_exit - called immediately after the hrtimer callback returns 236 * hrtimer_expire_exit - called immediately after the hrtimer callback returns
237 * @timer: pointer to struct hrtimer 237 * @timer: pointer to struct hrtimer
238 * 238 *
239 * When used in combination with the hrtimer_expire_entry tracepoint we can 239 * When used in combination with the hrtimer_expire_entry tracepoint we can
240 * determine the runtime of the callback function. 240 * determine the runtime of the callback function.
241 */ 241 */
242 DEFINE_EVENT(hrtimer_class, hrtimer_expire_exit, 242 DEFINE_EVENT(hrtimer_class, hrtimer_expire_exit,
243 243
244 TP_PROTO(struct hrtimer *hrtimer), 244 TP_PROTO(struct hrtimer *hrtimer),
245 245
246 TP_ARGS(hrtimer) 246 TP_ARGS(hrtimer)
247 ); 247 );
248 248
249 /** 249 /**
250 * hrtimer_cancel - called when the hrtimer is canceled 250 * hrtimer_cancel - called when the hrtimer is canceled
251 * @hrtimer: pointer to struct hrtimer 251 * @hrtimer: pointer to struct hrtimer
252 */ 252 */
253 DEFINE_EVENT(hrtimer_class, hrtimer_cancel, 253 DEFINE_EVENT(hrtimer_class, hrtimer_cancel,
254 254
255 TP_PROTO(struct hrtimer *hrtimer), 255 TP_PROTO(struct hrtimer *hrtimer),
256 256
257 TP_ARGS(hrtimer) 257 TP_ARGS(hrtimer)
258 ); 258 );
259 259
260 /** 260 /**
261 * itimer_state - called when itimer is started or canceled 261 * itimer_state - called when itimer is started or canceled
262 * @which: name of the interval timer 262 * @which: name of the interval timer
263 * @value: the itimers value, itimer is canceled if value->it_value is 263 * @value: the itimers value, itimer is canceled if value->it_value is
264 * zero, otherwise it is started 264 * zero, otherwise it is started
265 * @expires: the itimers expiry time 265 * @expires: the itimers expiry time
266 */ 266 */
267 TRACE_EVENT(itimer_state, 267 TRACE_EVENT(itimer_state,
268 268
269 TP_PROTO(int which, const struct itimerval *const value, 269 TP_PROTO(int which, const struct itimerval *const value,
270 cputime_t expires), 270 cputime_t expires),
271 271
272 TP_ARGS(which, value, expires), 272 TP_ARGS(which, value, expires),
273 273
274 TP_STRUCT__entry( 274 TP_STRUCT__entry(
275 __field( int, which ) 275 __field( int, which )
276 __field( cputime_t, expires ) 276 __field( cputime_t, expires )
277 __field( long, value_sec ) 277 __field( long, value_sec )
278 __field( long, value_usec ) 278 __field( long, value_usec )
279 __field( long, interval_sec ) 279 __field( long, interval_sec )
280 __field( long, interval_usec ) 280 __field( long, interval_usec )
281 ), 281 ),
282 282
283 TP_fast_assign( 283 TP_fast_assign(
284 __entry->which = which; 284 __entry->which = which;
285 __entry->expires = expires; 285 __entry->expires = expires;
286 __entry->value_sec = value->it_value.tv_sec; 286 __entry->value_sec = value->it_value.tv_sec;
287 __entry->value_usec = value->it_value.tv_usec; 287 __entry->value_usec = value->it_value.tv_usec;
288 __entry->interval_sec = value->it_interval.tv_sec; 288 __entry->interval_sec = value->it_interval.tv_sec;
289 __entry->interval_usec = value->it_interval.tv_usec; 289 __entry->interval_usec = value->it_interval.tv_usec;
290 ), 290 ),
291 291
292 TP_printk("which=%d expires=%llu it_value=%ld.%ld it_interval=%ld.%ld", 292 TP_printk("which=%d expires=%llu it_value=%ld.%ld it_interval=%ld.%ld",
293 __entry->which, (unsigned long long)__entry->expires, 293 __entry->which, (unsigned long long)__entry->expires,
294 __entry->value_sec, __entry->value_usec, 294 __entry->value_sec, __entry->value_usec,
295 __entry->interval_sec, __entry->interval_usec) 295 __entry->interval_sec, __entry->interval_usec)
296 ); 296 );
297 297
298 /** 298 /**
299 * itimer_expire - called when itimer expires 299 * itimer_expire - called when itimer expires
300 * @which: type of the interval timer 300 * @which: type of the interval timer
301 * @pid: pid of the process which owns the timer 301 * @pid: pid of the process which owns the timer
302 * @now: current time, used to calculate the latency of itimer 302 * @now: current time, used to calculate the latency of itimer
303 */ 303 */
304 TRACE_EVENT(itimer_expire, 304 TRACE_EVENT(itimer_expire,
305 305
306 TP_PROTO(int which, struct pid *pid, cputime_t now), 306 TP_PROTO(int which, struct pid *pid, cputime_t now),
307 307
308 TP_ARGS(which, pid, now), 308 TP_ARGS(which, pid, now),
309 309
310 TP_STRUCT__entry( 310 TP_STRUCT__entry(
311 __field( int , which ) 311 __field( int , which )
312 __field( pid_t, pid ) 312 __field( pid_t, pid )
313 __field( cputime_t, now ) 313 __field( cputime_t, now )
314 ), 314 ),
315 315
316 TP_fast_assign( 316 TP_fast_assign(
317 __entry->which = which; 317 __entry->which = which;
318 __entry->now = now; 318 __entry->now = now;
319 __entry->pid = pid_nr(pid); 319 __entry->pid = pid_nr(pid);
320 ), 320 ),
321 321
322 TP_printk("which=%d pid=%d now=%llu", __entry->which, 322 TP_printk("which=%d pid=%d now=%llu", __entry->which,
323 (int) __entry->pid, (unsigned long long)__entry->now) 323 (int) __entry->pid, (unsigned long long)__entry->now)
324 ); 324 );
325 325
326 #ifdef CONFIG_NO_HZ_FULL
327 TRACE_EVENT(tick_stop,
328
329 TP_PROTO(int success, char *error_msg),
330
331 TP_ARGS(success, error_msg),
332
333 TP_STRUCT__entry(
334 __field( int , success )
335 __string( msg, error_msg )
336 ),
337
338 TP_fast_assign(
339 __entry->success = success;
340 __assign_str(msg, error_msg);
341 ),
342
343 TP_printk("success=%s msg=%s", __entry->success ? "yes" : "no", __get_str(msg))
344 );
345 #endif
346
326 #endif /* _TRACE_TIMER_H */ 347 #endif /* _TRACE_TIMER_H */
327 348
328 /* This part must be outside protection */ 349 /* This part must be outside protection */
329 #include <trace/define_trace.h> 350 #include <trace/define_trace.h>
330 351
kernel/time/tick-sched.c
1 /* 1 /*
2 * linux/kernel/time/tick-sched.c 2 * linux/kernel/time/tick-sched.c
3 * 3 *
4 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> 4 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
5 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar 5 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
6 * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner 6 * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner
7 * 7 *
8 * No idle tick implementation for low and high resolution timers 8 * No idle tick implementation for low and high resolution timers
9 * 9 *
10 * Started by: Thomas Gleixner and Ingo Molnar 10 * Started by: Thomas Gleixner and Ingo Molnar
11 * 11 *
12 * Distribute under GPLv2. 12 * Distribute under GPLv2.
13 */ 13 */
14 #include <linux/cpu.h> 14 #include <linux/cpu.h>
15 #include <linux/err.h> 15 #include <linux/err.h>
16 #include <linux/hrtimer.h> 16 #include <linux/hrtimer.h>
17 #include <linux/interrupt.h> 17 #include <linux/interrupt.h>
18 #include <linux/kernel_stat.h> 18 #include <linux/kernel_stat.h>
19 #include <linux/percpu.h> 19 #include <linux/percpu.h>
20 #include <linux/profile.h> 20 #include <linux/profile.h>
21 #include <linux/sched.h> 21 #include <linux/sched.h>
22 #include <linux/module.h> 22 #include <linux/module.h>
23 #include <linux/irq_work.h> 23 #include <linux/irq_work.h>
24 #include <linux/posix-timers.h> 24 #include <linux/posix-timers.h>
25 #include <linux/perf_event.h> 25 #include <linux/perf_event.h>
26 26
27 #include <asm/irq_regs.h> 27 #include <asm/irq_regs.h>
28 28
29 #include "tick-internal.h" 29 #include "tick-internal.h"
30 30
31 #include <trace/events/timer.h>
32
31 /* 33 /*
32 * Per cpu nohz control structure 34 * Per cpu nohz control structure
33 */ 35 */
34 DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); 36 DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
35 37
36 /* 38 /*
37 * The time, when the last jiffy update happened. Protected by jiffies_lock. 39 * The time, when the last jiffy update happened. Protected by jiffies_lock.
38 */ 40 */
39 static ktime_t last_jiffies_update; 41 static ktime_t last_jiffies_update;
40 42
41 struct tick_sched *tick_get_tick_sched(int cpu) 43 struct tick_sched *tick_get_tick_sched(int cpu)
42 { 44 {
43 return &per_cpu(tick_cpu_sched, cpu); 45 return &per_cpu(tick_cpu_sched, cpu);
44 } 46 }
45 47
46 /* 48 /*
47 * Must be called with interrupts disabled ! 49 * Must be called with interrupts disabled !
48 */ 50 */
49 static void tick_do_update_jiffies64(ktime_t now) 51 static void tick_do_update_jiffies64(ktime_t now)
50 { 52 {
51 unsigned long ticks = 0; 53 unsigned long ticks = 0;
52 ktime_t delta; 54 ktime_t delta;
53 55
54 /* 56 /*
55 * Do a quick check without holding jiffies_lock: 57 * Do a quick check without holding jiffies_lock:
56 */ 58 */
57 delta = ktime_sub(now, last_jiffies_update); 59 delta = ktime_sub(now, last_jiffies_update);
58 if (delta.tv64 < tick_period.tv64) 60 if (delta.tv64 < tick_period.tv64)
59 return; 61 return;
60 62
61 /* Reevalute with jiffies_lock held */ 63 /* Reevalute with jiffies_lock held */
62 write_seqlock(&jiffies_lock); 64 write_seqlock(&jiffies_lock);
63 65
64 delta = ktime_sub(now, last_jiffies_update); 66 delta = ktime_sub(now, last_jiffies_update);
65 if (delta.tv64 >= tick_period.tv64) { 67 if (delta.tv64 >= tick_period.tv64) {
66 68
67 delta = ktime_sub(delta, tick_period); 69 delta = ktime_sub(delta, tick_period);
68 last_jiffies_update = ktime_add(last_jiffies_update, 70 last_jiffies_update = ktime_add(last_jiffies_update,
69 tick_period); 71 tick_period);
70 72
71 /* Slow path for long timeouts */ 73 /* Slow path for long timeouts */
72 if (unlikely(delta.tv64 >= tick_period.tv64)) { 74 if (unlikely(delta.tv64 >= tick_period.tv64)) {
73 s64 incr = ktime_to_ns(tick_period); 75 s64 incr = ktime_to_ns(tick_period);
74 76
75 ticks = ktime_divns(delta, incr); 77 ticks = ktime_divns(delta, incr);
76 78
77 last_jiffies_update = ktime_add_ns(last_jiffies_update, 79 last_jiffies_update = ktime_add_ns(last_jiffies_update,
78 incr * ticks); 80 incr * ticks);
79 } 81 }
80 do_timer(++ticks); 82 do_timer(++ticks);
81 83
82 /* Keep the tick_next_period variable up to date */ 84 /* Keep the tick_next_period variable up to date */
83 tick_next_period = ktime_add(last_jiffies_update, tick_period); 85 tick_next_period = ktime_add(last_jiffies_update, tick_period);
84 } 86 }
85 write_sequnlock(&jiffies_lock); 87 write_sequnlock(&jiffies_lock);
86 } 88 }
87 89
88 /* 90 /*
89 * Initialize and return retrieve the jiffies update. 91 * Initialize and return retrieve the jiffies update.
90 */ 92 */
91 static ktime_t tick_init_jiffy_update(void) 93 static ktime_t tick_init_jiffy_update(void)
92 { 94 {
93 ktime_t period; 95 ktime_t period;
94 96
95 write_seqlock(&jiffies_lock); 97 write_seqlock(&jiffies_lock);
96 /* Did we start the jiffies update yet ? */ 98 /* Did we start the jiffies update yet ? */
97 if (last_jiffies_update.tv64 == 0) 99 if (last_jiffies_update.tv64 == 0)
98 last_jiffies_update = tick_next_period; 100 last_jiffies_update = tick_next_period;
99 period = last_jiffies_update; 101 period = last_jiffies_update;
100 write_sequnlock(&jiffies_lock); 102 write_sequnlock(&jiffies_lock);
101 return period; 103 return period;
102 } 104 }
103 105
104 106
105 static void tick_sched_do_timer(ktime_t now) 107 static void tick_sched_do_timer(ktime_t now)
106 { 108 {
107 int cpu = smp_processor_id(); 109 int cpu = smp_processor_id();
108 110
109 #ifdef CONFIG_NO_HZ_COMMON 111 #ifdef CONFIG_NO_HZ_COMMON
110 /* 112 /*
111 * Check if the do_timer duty was dropped. We don't care about 113 * Check if the do_timer duty was dropped. We don't care about
112 * concurrency: This happens only when the cpu in charge went 114 * concurrency: This happens only when the cpu in charge went
113 * into a long sleep. If two cpus happen to assign themself to 115 * into a long sleep. If two cpus happen to assign themself to
114 * this duty, then the jiffies update is still serialized by 116 * this duty, then the jiffies update is still serialized by
115 * jiffies_lock. 117 * jiffies_lock.
116 */ 118 */
117 if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE) 119 if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)
118 && !tick_nohz_full_cpu(cpu)) 120 && !tick_nohz_full_cpu(cpu))
119 tick_do_timer_cpu = cpu; 121 tick_do_timer_cpu = cpu;
120 #endif 122 #endif
121 123
122 /* Check, if the jiffies need an update */ 124 /* Check, if the jiffies need an update */
123 if (tick_do_timer_cpu == cpu) 125 if (tick_do_timer_cpu == cpu)
124 tick_do_update_jiffies64(now); 126 tick_do_update_jiffies64(now);
125 } 127 }
126 128
127 static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) 129 static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
128 { 130 {
129 #ifdef CONFIG_NO_HZ_COMMON 131 #ifdef CONFIG_NO_HZ_COMMON
130 /* 132 /*
131 * When we are idle and the tick is stopped, we have to touch 133 * When we are idle and the tick is stopped, we have to touch
132 * the watchdog as we might not schedule for a really long 134 * the watchdog as we might not schedule for a really long
133 * time. This happens on complete idle SMP systems while 135 * time. This happens on complete idle SMP systems while
134 * waiting on the login prompt. We also increment the "start of 136 * waiting on the login prompt. We also increment the "start of
135 * idle" jiffy stamp so the idle accounting adjustment we do 137 * idle" jiffy stamp so the idle accounting adjustment we do
136 * when we go busy again does not account too much ticks. 138 * when we go busy again does not account too much ticks.
137 */ 139 */
138 if (ts->tick_stopped) { 140 if (ts->tick_stopped) {
139 touch_softlockup_watchdog(); 141 touch_softlockup_watchdog();
140 if (is_idle_task(current)) 142 if (is_idle_task(current))
141 ts->idle_jiffies++; 143 ts->idle_jiffies++;
142 } 144 }
143 #endif 145 #endif
144 update_process_times(user_mode(regs)); 146 update_process_times(user_mode(regs));
145 profile_tick(CPU_PROFILING); 147 profile_tick(CPU_PROFILING);
146 } 148 }
147 149
148 #ifdef CONFIG_NO_HZ_FULL 150 #ifdef CONFIG_NO_HZ_FULL
149 static cpumask_var_t nohz_full_mask; 151 static cpumask_var_t nohz_full_mask;
150 bool have_nohz_full_mask; 152 bool have_nohz_full_mask;
151 153
152 static bool can_stop_full_tick(void) 154 static bool can_stop_full_tick(void)
153 { 155 {
154 WARN_ON_ONCE(!irqs_disabled()); 156 WARN_ON_ONCE(!irqs_disabled());
155 157
156 if (!sched_can_stop_tick()) 158 if (!sched_can_stop_tick()) {
159 trace_tick_stop(0, "more than 1 task in runqueue\n");
157 return false; 160 return false;
161 }
158 162
159 if (!posix_cpu_timers_can_stop_tick(current)) 163 if (!posix_cpu_timers_can_stop_tick(current)) {
164 trace_tick_stop(0, "posix timers running\n");
160 return false; 165 return false;
166 }
161 167
162 if (!perf_event_can_stop_tick()) 168 if (!perf_event_can_stop_tick()) {
169 trace_tick_stop(0, "perf events running\n");
163 return false; 170 return false;
171 }
164 172
165 /* sched_clock_tick() needs us? */ 173 /* sched_clock_tick() needs us? */
166 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK 174 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
167 /* 175 /*
168 * TODO: kick full dynticks CPUs when 176 * TODO: kick full dynticks CPUs when
169 * sched_clock_stable is set. 177 * sched_clock_stable is set.
170 */ 178 */
171 if (!sched_clock_stable) 179 if (!sched_clock_stable) {
180 trace_tick_stop(0, "unstable sched clock\n");
172 return false; 181 return false;
182 }
173 #endif 183 #endif
174 184
175 return true; 185 return true;
176 } 186 }
177 187
178 static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now); 188 static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now);
179 189
180 /* 190 /*
181 * Re-evaluate the need for the tick on the current CPU 191 * Re-evaluate the need for the tick on the current CPU
182 * and restart it if necessary. 192 * and restart it if necessary.
183 */ 193 */
184 void tick_nohz_full_check(void) 194 void tick_nohz_full_check(void)
185 { 195 {
186 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); 196 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
187 197
188 if (tick_nohz_full_cpu(smp_processor_id())) { 198 if (tick_nohz_full_cpu(smp_processor_id())) {
189 if (ts->tick_stopped && !is_idle_task(current)) { 199 if (ts->tick_stopped && !is_idle_task(current)) {
190 if (!can_stop_full_tick()) 200 if (!can_stop_full_tick())
191 tick_nohz_restart_sched_tick(ts, ktime_get()); 201 tick_nohz_restart_sched_tick(ts, ktime_get());
192 } 202 }
193 } 203 }
194 } 204 }
195 205
196 static void nohz_full_kick_work_func(struct irq_work *work) 206 static void nohz_full_kick_work_func(struct irq_work *work)
197 { 207 {
198 tick_nohz_full_check(); 208 tick_nohz_full_check();
199 } 209 }
200 210
201 static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { 211 static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
202 .func = nohz_full_kick_work_func, 212 .func = nohz_full_kick_work_func,
203 }; 213 };
204 214
205 /* 215 /*
206 * Kick the current CPU if it's full dynticks in order to force it to 216 * Kick the current CPU if it's full dynticks in order to force it to
207 * re-evaluate its dependency on the tick and restart it if necessary. 217 * re-evaluate its dependency on the tick and restart it if necessary.
208 */ 218 */
209 void tick_nohz_full_kick(void) 219 void tick_nohz_full_kick(void)
210 { 220 {
211 if (tick_nohz_full_cpu(smp_processor_id())) 221 if (tick_nohz_full_cpu(smp_processor_id()))
212 irq_work_queue(&__get_cpu_var(nohz_full_kick_work)); 222 irq_work_queue(&__get_cpu_var(nohz_full_kick_work));
213 } 223 }
214 224
215 static void nohz_full_kick_ipi(void *info) 225 static void nohz_full_kick_ipi(void *info)
216 { 226 {
217 tick_nohz_full_check(); 227 tick_nohz_full_check();
218 } 228 }
219 229
220 /* 230 /*
221 * Kick all full dynticks CPUs in order to force these to re-evaluate 231 * Kick all full dynticks CPUs in order to force these to re-evaluate
222 * their dependency on the tick and restart it if necessary. 232 * their dependency on the tick and restart it if necessary.
223 */ 233 */
224 void tick_nohz_full_kick_all(void) 234 void tick_nohz_full_kick_all(void)
225 { 235 {
226 if (!have_nohz_full_mask) 236 if (!have_nohz_full_mask)
227 return; 237 return;
228 238
229 preempt_disable(); 239 preempt_disable();
230 smp_call_function_many(nohz_full_mask, 240 smp_call_function_many(nohz_full_mask,
231 nohz_full_kick_ipi, NULL, false); 241 nohz_full_kick_ipi, NULL, false);
232 preempt_enable(); 242 preempt_enable();
233 } 243 }
234 244
235 /* 245 /*
236 * Re-evaluate the need for the tick as we switch the current task. 246 * Re-evaluate the need for the tick as we switch the current task.
237 * It might need the tick due to per task/process properties: 247 * It might need the tick due to per task/process properties:
238 * perf events, posix cpu timers, ... 248 * perf events, posix cpu timers, ...
239 */ 249 */
240 void tick_nohz_task_switch(struct task_struct *tsk) 250 void tick_nohz_task_switch(struct task_struct *tsk)
241 { 251 {
242 unsigned long flags; 252 unsigned long flags;
243 253
244 if (!tick_nohz_full_cpu(smp_processor_id())) 254 if (!tick_nohz_full_cpu(smp_processor_id()))
245 return; 255 return;
246 256
247 local_irq_save(flags); 257 local_irq_save(flags);
248 258
249 if (tick_nohz_tick_stopped() && !can_stop_full_tick()) 259 if (tick_nohz_tick_stopped() && !can_stop_full_tick())
250 tick_nohz_full_kick(); 260 tick_nohz_full_kick();
251 261
252 local_irq_restore(flags); 262 local_irq_restore(flags);
253 } 263 }
254 264
255 int tick_nohz_full_cpu(int cpu) 265 int tick_nohz_full_cpu(int cpu)
256 { 266 {
257 if (!have_nohz_full_mask) 267 if (!have_nohz_full_mask)
258 return 0; 268 return 0;
259 269
260 return cpumask_test_cpu(cpu, nohz_full_mask); 270 return cpumask_test_cpu(cpu, nohz_full_mask);
261 } 271 }
262 272
263 /* Parse the boot-time nohz CPU list from the kernel parameters. */ 273 /* Parse the boot-time nohz CPU list from the kernel parameters. */
264 static int __init tick_nohz_full_setup(char *str) 274 static int __init tick_nohz_full_setup(char *str)
265 { 275 {
266 int cpu; 276 int cpu;
267 277
268 alloc_bootmem_cpumask_var(&nohz_full_mask); 278 alloc_bootmem_cpumask_var(&nohz_full_mask);
269 if (cpulist_parse(str, nohz_full_mask) < 0) { 279 if (cpulist_parse(str, nohz_full_mask) < 0) {
270 pr_warning("NOHZ: Incorrect nohz_full cpumask\n"); 280 pr_warning("NOHZ: Incorrect nohz_full cpumask\n");
271 return 1; 281 return 1;
272 } 282 }
273 283
274 cpu = smp_processor_id(); 284 cpu = smp_processor_id();
275 if (cpumask_test_cpu(cpu, nohz_full_mask)) { 285 if (cpumask_test_cpu(cpu, nohz_full_mask)) {
276 pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu); 286 pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu);
277 cpumask_clear_cpu(cpu, nohz_full_mask); 287 cpumask_clear_cpu(cpu, nohz_full_mask);
278 } 288 }
279 have_nohz_full_mask = true; 289 have_nohz_full_mask = true;
280 290
281 return 1; 291 return 1;
282 } 292 }
283 __setup("nohz_full=", tick_nohz_full_setup); 293 __setup("nohz_full=", tick_nohz_full_setup);
284 294
285 static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb, 295 static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb,
286 unsigned long action, 296 unsigned long action,
287 void *hcpu) 297 void *hcpu)
288 { 298 {
289 unsigned int cpu = (unsigned long)hcpu; 299 unsigned int cpu = (unsigned long)hcpu;
290 300
291 switch (action & ~CPU_TASKS_FROZEN) { 301 switch (action & ~CPU_TASKS_FROZEN) {
292 case CPU_DOWN_PREPARE: 302 case CPU_DOWN_PREPARE:
293 /* 303 /*
294 * If we handle the timekeeping duty for full dynticks CPUs, 304 * If we handle the timekeeping duty for full dynticks CPUs,
295 * we can't safely shutdown that CPU. 305 * we can't safely shutdown that CPU.
296 */ 306 */
297 if (have_nohz_full_mask && tick_do_timer_cpu == cpu) 307 if (have_nohz_full_mask && tick_do_timer_cpu == cpu)
298 return -EINVAL; 308 return -EINVAL;
299 break; 309 break;
300 } 310 }
301 return NOTIFY_OK; 311 return NOTIFY_OK;
302 } 312 }
303 313
304 /* 314 /*
305 * Worst case string length in chunks of CPU range seems 2 steps 315 * Worst case string length in chunks of CPU range seems 2 steps
306 * separations: 0,2,4,6,... 316 * separations: 0,2,4,6,...
307 * This is NR_CPUS + sizeof('\0') 317 * This is NR_CPUS + sizeof('\0')
308 */ 318 */
309 static char __initdata nohz_full_buf[NR_CPUS + 1]; 319 static char __initdata nohz_full_buf[NR_CPUS + 1];
310 320
311 static int tick_nohz_init_all(void) 321 static int tick_nohz_init_all(void)
312 { 322 {
313 int err = -1; 323 int err = -1;
314 324
315 #ifdef CONFIG_NO_HZ_FULL_ALL 325 #ifdef CONFIG_NO_HZ_FULL_ALL
316 if (!alloc_cpumask_var(&nohz_full_mask, GFP_KERNEL)) { 326 if (!alloc_cpumask_var(&nohz_full_mask, GFP_KERNEL)) {
317 pr_err("NO_HZ: Can't allocate full dynticks cpumask\n"); 327 pr_err("NO_HZ: Can't allocate full dynticks cpumask\n");
318 return err; 328 return err;
319 } 329 }
320 err = 0; 330 err = 0;
321 cpumask_setall(nohz_full_mask); 331 cpumask_setall(nohz_full_mask);
322 cpumask_clear_cpu(smp_processor_id(), nohz_full_mask); 332 cpumask_clear_cpu(smp_processor_id(), nohz_full_mask);
323 have_nohz_full_mask = true; 333 have_nohz_full_mask = true;
324 #endif 334 #endif
325 return err; 335 return err;
326 } 336 }
327 337
328 void __init tick_nohz_init(void) 338 void __init tick_nohz_init(void)
329 { 339 {
330 int cpu; 340 int cpu;
331 341
332 if (!have_nohz_full_mask) { 342 if (!have_nohz_full_mask) {
333 if (tick_nohz_init_all() < 0) 343 if (tick_nohz_init_all() < 0)
334 return; 344 return;
335 } 345 }
336 346
337 cpu_notifier(tick_nohz_cpu_down_callback, 0); 347 cpu_notifier(tick_nohz_cpu_down_callback, 0);
338 348
339 /* Make sure full dynticks CPU are also RCU nocbs */ 349 /* Make sure full dynticks CPU are also RCU nocbs */
340 for_each_cpu(cpu, nohz_full_mask) { 350 for_each_cpu(cpu, nohz_full_mask) {
341 if (!rcu_is_nocb_cpu(cpu)) { 351 if (!rcu_is_nocb_cpu(cpu)) {
342 pr_warning("NO_HZ: CPU %d is not RCU nocb: " 352 pr_warning("NO_HZ: CPU %d is not RCU nocb: "
343 "cleared from nohz_full range", cpu); 353 "cleared from nohz_full range", cpu);
344 cpumask_clear_cpu(cpu, nohz_full_mask); 354 cpumask_clear_cpu(cpu, nohz_full_mask);
345 } 355 }
346 } 356 }
347 357
348 cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask); 358 cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask);
349 pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); 359 pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf);
350 } 360 }
351 #else 361 #else
352 #define have_nohz_full_mask (0) 362 #define have_nohz_full_mask (0)
353 #endif 363 #endif
354 364
355 /* 365 /*
356 * NOHZ - aka dynamic tick functionality 366 * NOHZ - aka dynamic tick functionality
357 */ 367 */
358 #ifdef CONFIG_NO_HZ_COMMON 368 #ifdef CONFIG_NO_HZ_COMMON
359 /* 369 /*
360 * NO HZ enabled ? 370 * NO HZ enabled ?
361 */ 371 */
362 int tick_nohz_enabled __read_mostly = 1; 372 int tick_nohz_enabled __read_mostly = 1;
363 373
364 /* 374 /*
365 * Enable / Disable tickless mode 375 * Enable / Disable tickless mode
366 */ 376 */
367 static int __init setup_tick_nohz(char *str) 377 static int __init setup_tick_nohz(char *str)
368 { 378 {
369 if (!strcmp(str, "off")) 379 if (!strcmp(str, "off"))
370 tick_nohz_enabled = 0; 380 tick_nohz_enabled = 0;
371 else if (!strcmp(str, "on")) 381 else if (!strcmp(str, "on"))
372 tick_nohz_enabled = 1; 382 tick_nohz_enabled = 1;
373 else 383 else
374 return 0; 384 return 0;
375 return 1; 385 return 1;
376 } 386 }
377 387
378 __setup("nohz=", setup_tick_nohz); 388 __setup("nohz=", setup_tick_nohz);
379 389
380 /** 390 /**
381 * tick_nohz_update_jiffies - update jiffies when idle was interrupted 391 * tick_nohz_update_jiffies - update jiffies when idle was interrupted
382 * 392 *
383 * Called from interrupt entry when the CPU was idle 393 * Called from interrupt entry when the CPU was idle
384 * 394 *
385 * In case the sched_tick was stopped on this CPU, we have to check if jiffies 395 * In case the sched_tick was stopped on this CPU, we have to check if jiffies
386 * must be updated. Otherwise an interrupt handler could use a stale jiffy 396 * must be updated. Otherwise an interrupt handler could use a stale jiffy
387 * value. We do this unconditionally on any cpu, as we don't know whether the 397 * value. We do this unconditionally on any cpu, as we don't know whether the
388 * cpu, which has the update task assigned is in a long sleep. 398 * cpu, which has the update task assigned is in a long sleep.
389 */ 399 */
390 static void tick_nohz_update_jiffies(ktime_t now) 400 static void tick_nohz_update_jiffies(ktime_t now)
391 { 401 {
392 int cpu = smp_processor_id(); 402 int cpu = smp_processor_id();
393 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 403 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
394 unsigned long flags; 404 unsigned long flags;
395 405
396 ts->idle_waketime = now; 406 ts->idle_waketime = now;
397 407
398 local_irq_save(flags); 408 local_irq_save(flags);
399 tick_do_update_jiffies64(now); 409 tick_do_update_jiffies64(now);
400 local_irq_restore(flags); 410 local_irq_restore(flags);
401 411
402 touch_softlockup_watchdog(); 412 touch_softlockup_watchdog();
403 } 413 }
404 414
405 /* 415 /*
406 * Updates the per cpu time idle statistics counters 416 * Updates the per cpu time idle statistics counters
407 */ 417 */
408 static void 418 static void
409 update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_update_time) 419 update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_update_time)
410 { 420 {
411 ktime_t delta; 421 ktime_t delta;
412 422
413 if (ts->idle_active) { 423 if (ts->idle_active) {
414 delta = ktime_sub(now, ts->idle_entrytime); 424 delta = ktime_sub(now, ts->idle_entrytime);
415 if (nr_iowait_cpu(cpu) > 0) 425 if (nr_iowait_cpu(cpu) > 0)
416 ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta); 426 ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta);
417 else 427 else
418 ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); 428 ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
419 ts->idle_entrytime = now; 429 ts->idle_entrytime = now;
420 } 430 }
421 431
422 if (last_update_time) 432 if (last_update_time)
423 *last_update_time = ktime_to_us(now); 433 *last_update_time = ktime_to_us(now);
424 434
425 } 435 }
426 436
427 static void tick_nohz_stop_idle(int cpu, ktime_t now) 437 static void tick_nohz_stop_idle(int cpu, ktime_t now)
428 { 438 {
429 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 439 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
430 440
431 update_ts_time_stats(cpu, ts, now, NULL); 441 update_ts_time_stats(cpu, ts, now, NULL);
432 ts->idle_active = 0; 442 ts->idle_active = 0;
433 443
434 sched_clock_idle_wakeup_event(0); 444 sched_clock_idle_wakeup_event(0);
435 } 445 }
436 446
437 static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts) 447 static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts)
438 { 448 {
439 ktime_t now = ktime_get(); 449 ktime_t now = ktime_get();
440 450
441 ts->idle_entrytime = now; 451 ts->idle_entrytime = now;
442 ts->idle_active = 1; 452 ts->idle_active = 1;
443 sched_clock_idle_sleep_event(); 453 sched_clock_idle_sleep_event();
444 return now; 454 return now;
445 } 455 }
446 456
447 /** 457 /**
448 * get_cpu_idle_time_us - get the total idle time of a cpu 458 * get_cpu_idle_time_us - get the total idle time of a cpu
449 * @cpu: CPU number to query 459 * @cpu: CPU number to query
450 * @last_update_time: variable to store update time in. Do not update 460 * @last_update_time: variable to store update time in. Do not update
451 * counters if NULL. 461 * counters if NULL.
452 * 462 *
453 * Return the cummulative idle time (since boot) for a given 463 * Return the cummulative idle time (since boot) for a given
454 * CPU, in microseconds. 464 * CPU, in microseconds.
455 * 465 *
456 * This time is measured via accounting rather than sampling, 466 * This time is measured via accounting rather than sampling,
457 * and is as accurate as ktime_get() is. 467 * and is as accurate as ktime_get() is.
458 * 468 *
459 * This function returns -1 if NOHZ is not enabled. 469 * This function returns -1 if NOHZ is not enabled.
460 */ 470 */
461 u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) 471 u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
462 { 472 {
463 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 473 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
464 ktime_t now, idle; 474 ktime_t now, idle;
465 475
466 if (!tick_nohz_enabled) 476 if (!tick_nohz_enabled)
467 return -1; 477 return -1;
468 478
469 now = ktime_get(); 479 now = ktime_get();
470 if (last_update_time) { 480 if (last_update_time) {
471 update_ts_time_stats(cpu, ts, now, last_update_time); 481 update_ts_time_stats(cpu, ts, now, last_update_time);
472 idle = ts->idle_sleeptime; 482 idle = ts->idle_sleeptime;
473 } else { 483 } else {
474 if (ts->idle_active && !nr_iowait_cpu(cpu)) { 484 if (ts->idle_active && !nr_iowait_cpu(cpu)) {
475 ktime_t delta = ktime_sub(now, ts->idle_entrytime); 485 ktime_t delta = ktime_sub(now, ts->idle_entrytime);
476 486
477 idle = ktime_add(ts->idle_sleeptime, delta); 487 idle = ktime_add(ts->idle_sleeptime, delta);
478 } else { 488 } else {
479 idle = ts->idle_sleeptime; 489 idle = ts->idle_sleeptime;
480 } 490 }
481 } 491 }
482 492
483 return ktime_to_us(idle); 493 return ktime_to_us(idle);
484 494
485 } 495 }
486 EXPORT_SYMBOL_GPL(get_cpu_idle_time_us); 496 EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
487 497
488 /** 498 /**
489 * get_cpu_iowait_time_us - get the total iowait time of a cpu 499 * get_cpu_iowait_time_us - get the total iowait time of a cpu
490 * @cpu: CPU number to query 500 * @cpu: CPU number to query
491 * @last_update_time: variable to store update time in. Do not update 501 * @last_update_time: variable to store update time in. Do not update
492 * counters if NULL. 502 * counters if NULL.
493 * 503 *
494 * Return the cummulative iowait time (since boot) for a given 504 * Return the cummulative iowait time (since boot) for a given
495 * CPU, in microseconds. 505 * CPU, in microseconds.
496 * 506 *
497 * This time is measured via accounting rather than sampling, 507 * This time is measured via accounting rather than sampling,
498 * and is as accurate as ktime_get() is. 508 * and is as accurate as ktime_get() is.
499 * 509 *
500 * This function returns -1 if NOHZ is not enabled. 510 * This function returns -1 if NOHZ is not enabled.
501 */ 511 */
502 u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) 512 u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
503 { 513 {
504 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 514 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
505 ktime_t now, iowait; 515 ktime_t now, iowait;
506 516
507 if (!tick_nohz_enabled) 517 if (!tick_nohz_enabled)
508 return -1; 518 return -1;
509 519
510 now = ktime_get(); 520 now = ktime_get();
511 if (last_update_time) { 521 if (last_update_time) {
512 update_ts_time_stats(cpu, ts, now, last_update_time); 522 update_ts_time_stats(cpu, ts, now, last_update_time);
513 iowait = ts->iowait_sleeptime; 523 iowait = ts->iowait_sleeptime;
514 } else { 524 } else {
515 if (ts->idle_active && nr_iowait_cpu(cpu) > 0) { 525 if (ts->idle_active && nr_iowait_cpu(cpu) > 0) {
516 ktime_t delta = ktime_sub(now, ts->idle_entrytime); 526 ktime_t delta = ktime_sub(now, ts->idle_entrytime);
517 527
518 iowait = ktime_add(ts->iowait_sleeptime, delta); 528 iowait = ktime_add(ts->iowait_sleeptime, delta);
519 } else { 529 } else {
520 iowait = ts->iowait_sleeptime; 530 iowait = ts->iowait_sleeptime;
521 } 531 }
522 } 532 }
523 533
524 return ktime_to_us(iowait); 534 return ktime_to_us(iowait);
525 } 535 }
526 EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); 536 EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
527 537
528 static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, 538 static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
529 ktime_t now, int cpu) 539 ktime_t now, int cpu)
530 { 540 {
531 unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; 541 unsigned long seq, last_jiffies, next_jiffies, delta_jiffies;
532 ktime_t last_update, expires, ret = { .tv64 = 0 }; 542 ktime_t last_update, expires, ret = { .tv64 = 0 };
533 unsigned long rcu_delta_jiffies; 543 unsigned long rcu_delta_jiffies;
534 struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; 544 struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
535 u64 time_delta; 545 u64 time_delta;
536 546
537 /* Read jiffies and the time when jiffies were updated last */ 547 /* Read jiffies and the time when jiffies were updated last */
538 do { 548 do {
539 seq = read_seqbegin(&jiffies_lock); 549 seq = read_seqbegin(&jiffies_lock);
540 last_update = last_jiffies_update; 550 last_update = last_jiffies_update;
541 last_jiffies = jiffies; 551 last_jiffies = jiffies;
542 time_delta = timekeeping_max_deferment(); 552 time_delta = timekeeping_max_deferment();
543 } while (read_seqretry(&jiffies_lock, seq)); 553 } while (read_seqretry(&jiffies_lock, seq));
544 554
545 if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || 555 if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) ||
546 arch_needs_cpu(cpu) || irq_work_needs_cpu()) { 556 arch_needs_cpu(cpu) || irq_work_needs_cpu()) {
547 next_jiffies = last_jiffies + 1; 557 next_jiffies = last_jiffies + 1;
548 delta_jiffies = 1; 558 delta_jiffies = 1;
549 } else { 559 } else {
550 /* Get the next timer wheel timer */ 560 /* Get the next timer wheel timer */
551 next_jiffies = get_next_timer_interrupt(last_jiffies); 561 next_jiffies = get_next_timer_interrupt(last_jiffies);
552 delta_jiffies = next_jiffies - last_jiffies; 562 delta_jiffies = next_jiffies - last_jiffies;
553 if (rcu_delta_jiffies < delta_jiffies) { 563 if (rcu_delta_jiffies < delta_jiffies) {
554 next_jiffies = last_jiffies + rcu_delta_jiffies; 564 next_jiffies = last_jiffies + rcu_delta_jiffies;
555 delta_jiffies = rcu_delta_jiffies; 565 delta_jiffies = rcu_delta_jiffies;
556 } 566 }
557 } 567 }
558 /* 568 /*
559 * Do not stop the tick, if we are only one off 569 * Do not stop the tick, if we are only one off
560 * or if the cpu is required for rcu 570 * or if the cpu is required for rcu
561 */ 571 */
562 if (!ts->tick_stopped && delta_jiffies == 1) 572 if (!ts->tick_stopped && delta_jiffies == 1)
563 goto out; 573 goto out;
564 574
565 /* Schedule the tick, if we are at least one jiffie off */ 575 /* Schedule the tick, if we are at least one jiffie off */
566 if ((long)delta_jiffies >= 1) { 576 if ((long)delta_jiffies >= 1) {
567 577
568 /* 578 /*
569 * If this cpu is the one which updates jiffies, then 579 * If this cpu is the one which updates jiffies, then
570 * give up the assignment and let it be taken by the 580 * give up the assignment and let it be taken by the
571 * cpu which runs the tick timer next, which might be 581 * cpu which runs the tick timer next, which might be
572 * this cpu as well. If we don't drop this here the 582 * this cpu as well. If we don't drop this here the
573 * jiffies might be stale and do_timer() never 583 * jiffies might be stale and do_timer() never
574 * invoked. Keep track of the fact that it was the one 584 * invoked. Keep track of the fact that it was the one
575 * which had the do_timer() duty last. If this cpu is 585 * which had the do_timer() duty last. If this cpu is
576 * the one which had the do_timer() duty last, we 586 * the one which had the do_timer() duty last, we
577 * limit the sleep time to the timekeeping 587 * limit the sleep time to the timekeeping
578 * max_deferement value which we retrieved 588 * max_deferement value which we retrieved
579 * above. Otherwise we can sleep as long as we want. 589 * above. Otherwise we can sleep as long as we want.
580 */ 590 */
581 if (cpu == tick_do_timer_cpu) { 591 if (cpu == tick_do_timer_cpu) {
582 tick_do_timer_cpu = TICK_DO_TIMER_NONE; 592 tick_do_timer_cpu = TICK_DO_TIMER_NONE;
583 ts->do_timer_last = 1; 593 ts->do_timer_last = 1;
584 } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) { 594 } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
585 time_delta = KTIME_MAX; 595 time_delta = KTIME_MAX;
586 ts->do_timer_last = 0; 596 ts->do_timer_last = 0;
587 } else if (!ts->do_timer_last) { 597 } else if (!ts->do_timer_last) {
588 time_delta = KTIME_MAX; 598 time_delta = KTIME_MAX;
589 } 599 }
590 600
591 /* 601 /*
592 * calculate the expiry time for the next timer wheel 602 * calculate the expiry time for the next timer wheel
593 * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals 603 * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals
594 * that there is no timer pending or at least extremely 604 * that there is no timer pending or at least extremely
595 * far into the future (12 days for HZ=1000). In this 605 * far into the future (12 days for HZ=1000). In this
596 * case we set the expiry to the end of time. 606 * case we set the expiry to the end of time.
597 */ 607 */
598 if (likely(delta_jiffies < NEXT_TIMER_MAX_DELTA)) { 608 if (likely(delta_jiffies < NEXT_TIMER_MAX_DELTA)) {
599 /* 609 /*
600 * Calculate the time delta for the next timer event. 610 * Calculate the time delta for the next timer event.
601 * If the time delta exceeds the maximum time delta 611 * If the time delta exceeds the maximum time delta
602 * permitted by the current clocksource then adjust 612 * permitted by the current clocksource then adjust
603 * the time delta accordingly to ensure the 613 * the time delta accordingly to ensure the
604 * clocksource does not wrap. 614 * clocksource does not wrap.
605 */ 615 */
606 time_delta = min_t(u64, time_delta, 616 time_delta = min_t(u64, time_delta,
607 tick_period.tv64 * delta_jiffies); 617 tick_period.tv64 * delta_jiffies);
608 } 618 }
609 619
610 if (time_delta < KTIME_MAX) 620 if (time_delta < KTIME_MAX)
611 expires = ktime_add_ns(last_update, time_delta); 621 expires = ktime_add_ns(last_update, time_delta);
612 else 622 else
613 expires.tv64 = KTIME_MAX; 623 expires.tv64 = KTIME_MAX;
614 624
615 /* Skip reprogram of event if its not changed */ 625 /* Skip reprogram of event if its not changed */
616 if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) 626 if (ts->tick_stopped && ktime_equal(expires, dev->next_event))
617 goto out; 627 goto out;
618 628
619 ret = expires; 629 ret = expires;
620 630
621 /* 631 /*
622 * nohz_stop_sched_tick can be called several times before 632 * nohz_stop_sched_tick can be called several times before
623 * the nohz_restart_sched_tick is called. This happens when 633 * the nohz_restart_sched_tick is called. This happens when
624 * interrupts arrive which do not cause a reschedule. In the 634 * interrupts arrive which do not cause a reschedule. In the
625 * first call we save the current tick time, so we can restart 635 * first call we save the current tick time, so we can restart
626 * the scheduler tick in nohz_restart_sched_tick. 636 * the scheduler tick in nohz_restart_sched_tick.
627 */ 637 */
628 if (!ts->tick_stopped) { 638 if (!ts->tick_stopped) {
629 nohz_balance_enter_idle(cpu); 639 nohz_balance_enter_idle(cpu);
630 calc_load_enter_idle(); 640 calc_load_enter_idle();
631 641
632 ts->last_tick = hrtimer_get_expires(&ts->sched_timer); 642 ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
633 ts->tick_stopped = 1; 643 ts->tick_stopped = 1;
644 trace_tick_stop(1, " ");
634 } 645 }
635 646
636 /* 647 /*
637 * If the expiration time == KTIME_MAX, then 648 * If the expiration time == KTIME_MAX, then
638 * in this case we simply stop the tick timer. 649 * in this case we simply stop the tick timer.
639 */ 650 */
640 if (unlikely(expires.tv64 == KTIME_MAX)) { 651 if (unlikely(expires.tv64 == KTIME_MAX)) {
641 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) 652 if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
642 hrtimer_cancel(&ts->sched_timer); 653 hrtimer_cancel(&ts->sched_timer);
643 goto out; 654 goto out;
644 } 655 }
645 656
646 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { 657 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
647 hrtimer_start(&ts->sched_timer, expires, 658 hrtimer_start(&ts->sched_timer, expires,
648 HRTIMER_MODE_ABS_PINNED); 659 HRTIMER_MODE_ABS_PINNED);
649 /* Check, if the timer was already in the past */ 660 /* Check, if the timer was already in the past */
650 if (hrtimer_active(&ts->sched_timer)) 661 if (hrtimer_active(&ts->sched_timer))
651 goto out; 662 goto out;
652 } else if (!tick_program_event(expires, 0)) 663 } else if (!tick_program_event(expires, 0))
653 goto out; 664 goto out;
654 /* 665 /*
655 * We are past the event already. So we crossed a 666 * We are past the event already. So we crossed a
656 * jiffie boundary. Update jiffies and raise the 667 * jiffie boundary. Update jiffies and raise the
657 * softirq. 668 * softirq.
658 */ 669 */
659 tick_do_update_jiffies64(ktime_get()); 670 tick_do_update_jiffies64(ktime_get());
660 } 671 }
661 raise_softirq_irqoff(TIMER_SOFTIRQ); 672 raise_softirq_irqoff(TIMER_SOFTIRQ);
662 out: 673 out:
663 ts->next_jiffies = next_jiffies; 674 ts->next_jiffies = next_jiffies;
664 ts->last_jiffies = last_jiffies; 675 ts->last_jiffies = last_jiffies;
665 ts->sleep_length = ktime_sub(dev->next_event, now); 676 ts->sleep_length = ktime_sub(dev->next_event, now);
666 677
667 return ret; 678 return ret;
668 } 679 }
669 680
670 static void tick_nohz_full_stop_tick(struct tick_sched *ts) 681 static void tick_nohz_full_stop_tick(struct tick_sched *ts)
671 { 682 {
672 #ifdef CONFIG_NO_HZ_FULL 683 #ifdef CONFIG_NO_HZ_FULL
673 int cpu = smp_processor_id(); 684 int cpu = smp_processor_id();
674 685
675 if (!tick_nohz_full_cpu(cpu) || is_idle_task(current)) 686 if (!tick_nohz_full_cpu(cpu) || is_idle_task(current))
676 return; 687 return;
677 688
678 if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE) 689 if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
679 return; 690 return;
680 691
681 if (!can_stop_full_tick()) 692 if (!can_stop_full_tick())
682 return; 693 return;
683 694
684 tick_nohz_stop_sched_tick(ts, ktime_get(), cpu); 695 tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
685 #endif 696 #endif
686 } 697 }
687 698
688 static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) 699 static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
689 { 700 {
690 /* 701 /*
691 * If this cpu is offline and it is the one which updates 702 * If this cpu is offline and it is the one which updates
692 * jiffies, then give up the assignment and let it be taken by 703 * jiffies, then give up the assignment and let it be taken by
693 * the cpu which runs the tick timer next. If we don't drop 704 * the cpu which runs the tick timer next. If we don't drop
694 * this here the jiffies might be stale and do_timer() never 705 * this here the jiffies might be stale and do_timer() never
695 * invoked. 706 * invoked.
696 */ 707 */
697 if (unlikely(!cpu_online(cpu))) { 708 if (unlikely(!cpu_online(cpu))) {
698 if (cpu == tick_do_timer_cpu) 709 if (cpu == tick_do_timer_cpu)
699 tick_do_timer_cpu = TICK_DO_TIMER_NONE; 710 tick_do_timer_cpu = TICK_DO_TIMER_NONE;
700 } 711 }
701 712
702 if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) 713 if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
703 return false; 714 return false;
704 715
705 if (need_resched()) 716 if (need_resched())
706 return false; 717 return false;
707 718
708 if (unlikely(local_softirq_pending() && cpu_online(cpu))) { 719 if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
709 static int ratelimit; 720 static int ratelimit;
710 721
711 if (ratelimit < 10 && 722 if (ratelimit < 10 &&
712 (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { 723 (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
713 printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", 724 printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
714 (unsigned int) local_softirq_pending()); 725 (unsigned int) local_softirq_pending());
715 ratelimit++; 726 ratelimit++;
716 } 727 }
717 return false; 728 return false;
718 } 729 }
719 730
720 if (have_nohz_full_mask) { 731 if (have_nohz_full_mask) {
721 /* 732 /*
722 * Keep the tick alive to guarantee timekeeping progression 733 * Keep the tick alive to guarantee timekeeping progression
723 * if there are full dynticks CPUs around 734 * if there are full dynticks CPUs around
724 */ 735 */
725 if (tick_do_timer_cpu == cpu) 736 if (tick_do_timer_cpu == cpu)
726 return false; 737 return false;
727 /* 738 /*
728 * Boot safety: make sure the timekeeping duty has been 739 * Boot safety: make sure the timekeeping duty has been
729 * assigned before entering dyntick-idle mode, 740 * assigned before entering dyntick-idle mode,
730 */ 741 */
731 if (tick_do_timer_cpu == TICK_DO_TIMER_NONE) 742 if (tick_do_timer_cpu == TICK_DO_TIMER_NONE)
732 return false; 743 return false;
733 } 744 }
734 745
735 return true; 746 return true;
736 } 747 }
737 748
738 static void __tick_nohz_idle_enter(struct tick_sched *ts) 749 static void __tick_nohz_idle_enter(struct tick_sched *ts)
739 { 750 {
740 ktime_t now, expires; 751 ktime_t now, expires;
741 int cpu = smp_processor_id(); 752 int cpu = smp_processor_id();
742 753
743 now = tick_nohz_start_idle(cpu, ts); 754 now = tick_nohz_start_idle(cpu, ts);
744 755
745 if (can_stop_idle_tick(cpu, ts)) { 756 if (can_stop_idle_tick(cpu, ts)) {
746 int was_stopped = ts->tick_stopped; 757 int was_stopped = ts->tick_stopped;
747 758
748 ts->idle_calls++; 759 ts->idle_calls++;
749 760
750 expires = tick_nohz_stop_sched_tick(ts, now, cpu); 761 expires = tick_nohz_stop_sched_tick(ts, now, cpu);
751 if (expires.tv64 > 0LL) { 762 if (expires.tv64 > 0LL) {
752 ts->idle_sleeps++; 763 ts->idle_sleeps++;
753 ts->idle_expires = expires; 764 ts->idle_expires = expires;
754 } 765 }
755 766
756 if (!was_stopped && ts->tick_stopped) 767 if (!was_stopped && ts->tick_stopped)
757 ts->idle_jiffies = ts->last_jiffies; 768 ts->idle_jiffies = ts->last_jiffies;
758 } 769 }
759 } 770 }
760 771
761 /** 772 /**
762 * tick_nohz_idle_enter - stop the idle tick from the idle task 773 * tick_nohz_idle_enter - stop the idle tick from the idle task
763 * 774 *
764 * When the next event is more than a tick into the future, stop the idle tick 775 * When the next event is more than a tick into the future, stop the idle tick
765 * Called when we start the idle loop. 776 * Called when we start the idle loop.
766 * 777 *
767 * The arch is responsible of calling: 778 * The arch is responsible of calling:
768 * 779 *
769 * - rcu_idle_enter() after its last use of RCU before the CPU is put 780 * - rcu_idle_enter() after its last use of RCU before the CPU is put
770 * to sleep. 781 * to sleep.
771 * - rcu_idle_exit() before the first use of RCU after the CPU is woken up. 782 * - rcu_idle_exit() before the first use of RCU after the CPU is woken up.
772 */ 783 */
773 void tick_nohz_idle_enter(void) 784 void tick_nohz_idle_enter(void)
774 { 785 {
775 struct tick_sched *ts; 786 struct tick_sched *ts;
776 787
777 WARN_ON_ONCE(irqs_disabled()); 788 WARN_ON_ONCE(irqs_disabled());
778 789
779 /* 790 /*
780 * Update the idle state in the scheduler domain hierarchy 791 * Update the idle state in the scheduler domain hierarchy
781 * when tick_nohz_stop_sched_tick() is called from the idle loop. 792 * when tick_nohz_stop_sched_tick() is called from the idle loop.
782 * State will be updated to busy during the first busy tick after 793 * State will be updated to busy during the first busy tick after
783 * exiting idle. 794 * exiting idle.
784 */ 795 */
785 set_cpu_sd_state_idle(); 796 set_cpu_sd_state_idle();
786 797
787 local_irq_disable(); 798 local_irq_disable();
788 799
789 ts = &__get_cpu_var(tick_cpu_sched); 800 ts = &__get_cpu_var(tick_cpu_sched);
790 /* 801 /*
791 * set ts->inidle unconditionally. even if the system did not 802 * set ts->inidle unconditionally. even if the system did not
792 * switch to nohz mode the cpu frequency governers rely on the 803 * switch to nohz mode the cpu frequency governers rely on the
793 * update of the idle time accounting in tick_nohz_start_idle(). 804 * update of the idle time accounting in tick_nohz_start_idle().
794 */ 805 */
795 ts->inidle = 1; 806 ts->inidle = 1;
796 __tick_nohz_idle_enter(ts); 807 __tick_nohz_idle_enter(ts);
797 808
798 local_irq_enable(); 809 local_irq_enable();
799 } 810 }
800 EXPORT_SYMBOL_GPL(tick_nohz_idle_enter); 811 EXPORT_SYMBOL_GPL(tick_nohz_idle_enter);
801 812
802 /** 813 /**
803 * tick_nohz_irq_exit - update next tick event from interrupt exit 814 * tick_nohz_irq_exit - update next tick event from interrupt exit
804 * 815 *
805 * When an interrupt fires while we are idle and it doesn't cause 816 * When an interrupt fires while we are idle and it doesn't cause
806 * a reschedule, it may still add, modify or delete a timer, enqueue 817 * a reschedule, it may still add, modify or delete a timer, enqueue
807 * an RCU callback, etc... 818 * an RCU callback, etc...
808 * So we need to re-calculate and reprogram the next tick event. 819 * So we need to re-calculate and reprogram the next tick event.
809 */ 820 */
810 void tick_nohz_irq_exit(void) 821 void tick_nohz_irq_exit(void)
811 { 822 {
812 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); 823 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
813 824
814 if (ts->inidle) { 825 if (ts->inidle) {
815 /* Cancel the timer because CPU already waken up from the C-states*/ 826 /* Cancel the timer because CPU already waken up from the C-states*/
816 menu_hrtimer_cancel(); 827 menu_hrtimer_cancel();
817 __tick_nohz_idle_enter(ts); 828 __tick_nohz_idle_enter(ts);
818 } else { 829 } else {
819 tick_nohz_full_stop_tick(ts); 830 tick_nohz_full_stop_tick(ts);
820 } 831 }
821 } 832 }
822 833
823 /** 834 /**
824 * tick_nohz_get_sleep_length - return the length of the current sleep 835 * tick_nohz_get_sleep_length - return the length of the current sleep
825 * 836 *
826 * Called from power state control code with interrupts disabled 837 * Called from power state control code with interrupts disabled
827 */ 838 */
828 ktime_t tick_nohz_get_sleep_length(void) 839 ktime_t tick_nohz_get_sleep_length(void)
829 { 840 {
830 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); 841 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
831 842
832 return ts->sleep_length; 843 return ts->sleep_length;
833 } 844 }
834 845
835 static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) 846 static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
836 { 847 {
837 hrtimer_cancel(&ts->sched_timer); 848 hrtimer_cancel(&ts->sched_timer);
838 hrtimer_set_expires(&ts->sched_timer, ts->last_tick); 849 hrtimer_set_expires(&ts->sched_timer, ts->last_tick);
839 850
840 while (1) { 851 while (1) {
841 /* Forward the time to expire in the future */ 852 /* Forward the time to expire in the future */
842 hrtimer_forward(&ts->sched_timer, now, tick_period); 853 hrtimer_forward(&ts->sched_timer, now, tick_period);
843 854
844 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { 855 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
845 hrtimer_start_expires(&ts->sched_timer, 856 hrtimer_start_expires(&ts->sched_timer,
846 HRTIMER_MODE_ABS_PINNED); 857 HRTIMER_MODE_ABS_PINNED);
847 /* Check, if the timer was already in the past */ 858 /* Check, if the timer was already in the past */
848 if (hrtimer_active(&ts->sched_timer)) 859 if (hrtimer_active(&ts->sched_timer))
849 break; 860 break;
850 } else { 861 } else {
851 if (!tick_program_event( 862 if (!tick_program_event(
852 hrtimer_get_expires(&ts->sched_timer), 0)) 863 hrtimer_get_expires(&ts->sched_timer), 0))
853 break; 864 break;
854 } 865 }
855 /* Reread time and update jiffies */ 866 /* Reread time and update jiffies */
856 now = ktime_get(); 867 now = ktime_get();
857 tick_do_update_jiffies64(now); 868 tick_do_update_jiffies64(now);
858 } 869 }
859 } 870 }
860 871
861 static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) 872 static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
862 { 873 {
863 /* Update jiffies first */ 874 /* Update jiffies first */
864 tick_do_update_jiffies64(now); 875 tick_do_update_jiffies64(now);
865 update_cpu_load_nohz(); 876 update_cpu_load_nohz();
866 877
867 calc_load_exit_idle(); 878 calc_load_exit_idle();
868 touch_softlockup_watchdog(); 879 touch_softlockup_watchdog();
869 /* 880 /*
870 * Cancel the scheduled timer and restore the tick 881 * Cancel the scheduled timer and restore the tick
871 */ 882 */
872 ts->tick_stopped = 0; 883 ts->tick_stopped = 0;
873 ts->idle_exittime = now; 884 ts->idle_exittime = now;
874 885
875 tick_nohz_restart(ts, now); 886 tick_nohz_restart(ts, now);
876 } 887 }
877 888
878 static void tick_nohz_account_idle_ticks(struct tick_sched *ts) 889 static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
879 { 890 {
880 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 891 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
881 unsigned long ticks; 892 unsigned long ticks;
882 893
883 if (vtime_accounting_enabled()) 894 if (vtime_accounting_enabled())
884 return; 895 return;
885 /* 896 /*
886 * We stopped the tick in idle. Update process times would miss the 897 * We stopped the tick in idle. Update process times would miss the
887 * time we slept as update_process_times does only a 1 tick 898 * time we slept as update_process_times does only a 1 tick
888 * accounting. Enforce that this is accounted to idle ! 899 * accounting. Enforce that this is accounted to idle !
889 */ 900 */
890 ticks = jiffies - ts->idle_jiffies; 901 ticks = jiffies - ts->idle_jiffies;
891 /* 902 /*
892 * We might be one off. Do not randomly account a huge number of ticks! 903 * We might be one off. Do not randomly account a huge number of ticks!
893 */ 904 */
894 if (ticks && ticks < LONG_MAX) 905 if (ticks && ticks < LONG_MAX)
895 account_idle_ticks(ticks); 906 account_idle_ticks(ticks);
896 #endif 907 #endif
897 } 908 }
898 909
899 /** 910 /**
900 * tick_nohz_idle_exit - restart the idle tick from the idle task 911 * tick_nohz_idle_exit - restart the idle tick from the idle task
901 * 912 *
902 * Restart the idle tick when the CPU is woken up from idle 913 * Restart the idle tick when the CPU is woken up from idle
903 * This also exit the RCU extended quiescent state. The CPU 914 * This also exit the RCU extended quiescent state. The CPU
904 * can use RCU again after this function is called. 915 * can use RCU again after this function is called.
905 */ 916 */
906 void tick_nohz_idle_exit(void) 917 void tick_nohz_idle_exit(void)
907 { 918 {
908 int cpu = smp_processor_id(); 919 int cpu = smp_processor_id();
909 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 920 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
910 ktime_t now; 921 ktime_t now;
911 922
912 local_irq_disable(); 923 local_irq_disable();
913 924
914 WARN_ON_ONCE(!ts->inidle); 925 WARN_ON_ONCE(!ts->inidle);
915 926
916 ts->inidle = 0; 927 ts->inidle = 0;
917 928
918 /* Cancel the timer because CPU already waken up from the C-states*/ 929 /* Cancel the timer because CPU already waken up from the C-states*/
919 menu_hrtimer_cancel(); 930 menu_hrtimer_cancel();
920 if (ts->idle_active || ts->tick_stopped) 931 if (ts->idle_active || ts->tick_stopped)
921 now = ktime_get(); 932 now = ktime_get();
922 933
923 if (ts->idle_active) 934 if (ts->idle_active)
924 tick_nohz_stop_idle(cpu, now); 935 tick_nohz_stop_idle(cpu, now);
925 936
926 if (ts->tick_stopped) { 937 if (ts->tick_stopped) {
927 tick_nohz_restart_sched_tick(ts, now); 938 tick_nohz_restart_sched_tick(ts, now);
928 tick_nohz_account_idle_ticks(ts); 939 tick_nohz_account_idle_ticks(ts);
929 } 940 }
930 941
931 local_irq_enable(); 942 local_irq_enable();
932 } 943 }
933 EXPORT_SYMBOL_GPL(tick_nohz_idle_exit); 944 EXPORT_SYMBOL_GPL(tick_nohz_idle_exit);
934 945
935 static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now) 946 static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now)
936 { 947 {
937 hrtimer_forward(&ts->sched_timer, now, tick_period); 948 hrtimer_forward(&ts->sched_timer, now, tick_period);
938 return tick_program_event(hrtimer_get_expires(&ts->sched_timer), 0); 949 return tick_program_event(hrtimer_get_expires(&ts->sched_timer), 0);
939 } 950 }
940 951
941 /* 952 /*
942 * The nohz low res interrupt handler 953 * The nohz low res interrupt handler
943 */ 954 */
944 static void tick_nohz_handler(struct clock_event_device *dev) 955 static void tick_nohz_handler(struct clock_event_device *dev)
945 { 956 {
946 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); 957 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
947 struct pt_regs *regs = get_irq_regs(); 958 struct pt_regs *regs = get_irq_regs();
948 ktime_t now = ktime_get(); 959 ktime_t now = ktime_get();
949 960
950 dev->next_event.tv64 = KTIME_MAX; 961 dev->next_event.tv64 = KTIME_MAX;
951 962
952 tick_sched_do_timer(now); 963 tick_sched_do_timer(now);
953 tick_sched_handle(ts, regs); 964 tick_sched_handle(ts, regs);
954 965
955 while (tick_nohz_reprogram(ts, now)) { 966 while (tick_nohz_reprogram(ts, now)) {
956 now = ktime_get(); 967 now = ktime_get();
957 tick_do_update_jiffies64(now); 968 tick_do_update_jiffies64(now);
958 } 969 }
959 } 970 }
960 971
961 /** 972 /**
962 * tick_nohz_switch_to_nohz - switch to nohz mode 973 * tick_nohz_switch_to_nohz - switch to nohz mode
963 */ 974 */
964 static void tick_nohz_switch_to_nohz(void) 975 static void tick_nohz_switch_to_nohz(void)
965 { 976 {
966 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); 977 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
967 ktime_t next; 978 ktime_t next;
968 979
969 if (!tick_nohz_enabled) 980 if (!tick_nohz_enabled)
970 return; 981 return;
971 982
972 local_irq_disable(); 983 local_irq_disable();
973 if (tick_switch_to_oneshot(tick_nohz_handler)) { 984 if (tick_switch_to_oneshot(tick_nohz_handler)) {
974 local_irq_enable(); 985 local_irq_enable();
975 return; 986 return;
976 } 987 }
977 988
978 ts->nohz_mode = NOHZ_MODE_LOWRES; 989 ts->nohz_mode = NOHZ_MODE_LOWRES;
979 990
980 /* 991 /*
981 * Recycle the hrtimer in ts, so we can share the 992 * Recycle the hrtimer in ts, so we can share the
982 * hrtimer_forward with the highres code. 993 * hrtimer_forward with the highres code.
983 */ 994 */
984 hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 995 hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
985 /* Get the next period */ 996 /* Get the next period */
986 next = tick_init_jiffy_update(); 997 next = tick_init_jiffy_update();
987 998
988 for (;;) { 999 for (;;) {
989 hrtimer_set_expires(&ts->sched_timer, next); 1000 hrtimer_set_expires(&ts->sched_timer, next);
990 if (!tick_program_event(next, 0)) 1001 if (!tick_program_event(next, 0))
991 break; 1002 break;
992 next = ktime_add(next, tick_period); 1003 next = ktime_add(next, tick_period);
993 } 1004 }
994 local_irq_enable(); 1005 local_irq_enable();
995 } 1006 }
996 1007
997 /* 1008 /*
998 * When NOHZ is enabled and the tick is stopped, we need to kick the 1009 * When NOHZ is enabled and the tick is stopped, we need to kick the
999 * tick timer from irq_enter() so that the jiffies update is kept 1010 * tick timer from irq_enter() so that the jiffies update is kept
1000 * alive during long running softirqs. That's ugly as hell, but 1011 * alive during long running softirqs. That's ugly as hell, but
1001 * correctness is key even if we need to fix the offending softirq in 1012 * correctness is key even if we need to fix the offending softirq in
1002 * the first place. 1013 * the first place.
1003 * 1014 *
1004 * Note, this is different to tick_nohz_restart. We just kick the 1015 * Note, this is different to tick_nohz_restart. We just kick the
1005 * timer and do not touch the other magic bits which need to be done 1016 * timer and do not touch the other magic bits which need to be done
1006 * when idle is left. 1017 * when idle is left.
1007 */ 1018 */
1008 static void tick_nohz_kick_tick(int cpu, ktime_t now) 1019 static void tick_nohz_kick_tick(int cpu, ktime_t now)
1009 { 1020 {
1010 #if 0 1021 #if 0
1011 /* Switch back to 2.6.27 behaviour */ 1022 /* Switch back to 2.6.27 behaviour */
1012 1023
1013 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 1024 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
1014 ktime_t delta; 1025 ktime_t delta;
1015 1026
1016 /* 1027 /*
1017 * Do not touch the tick device, when the next expiry is either 1028 * Do not touch the tick device, when the next expiry is either
1018 * already reached or less/equal than the tick period. 1029 * already reached or less/equal than the tick period.
1019 */ 1030 */
1020 delta = ktime_sub(hrtimer_get_expires(&ts->sched_timer), now); 1031 delta = ktime_sub(hrtimer_get_expires(&ts->sched_timer), now);
1021 if (delta.tv64 <= tick_period.tv64) 1032 if (delta.tv64 <= tick_period.tv64)
1022 return; 1033 return;
1023 1034
1024 tick_nohz_restart(ts, now); 1035 tick_nohz_restart(ts, now);
1025 #endif 1036 #endif
1026 } 1037 }
1027 1038
1028 static inline void tick_check_nohz(int cpu) 1039 static inline void tick_check_nohz(int cpu)
1029 { 1040 {
1030 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 1041 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
1031 ktime_t now; 1042 ktime_t now;
1032 1043
1033 if (!ts->idle_active && !ts->tick_stopped) 1044 if (!ts->idle_active && !ts->tick_stopped)
1034 return; 1045 return;
1035 now = ktime_get(); 1046 now = ktime_get();
1036 if (ts->idle_active) 1047 if (ts->idle_active)
1037 tick_nohz_stop_idle(cpu, now); 1048 tick_nohz_stop_idle(cpu, now);
1038 if (ts->tick_stopped) { 1049 if (ts->tick_stopped) {
1039 tick_nohz_update_jiffies(now); 1050 tick_nohz_update_jiffies(now);
1040 tick_nohz_kick_tick(cpu, now); 1051 tick_nohz_kick_tick(cpu, now);
1041 } 1052 }
1042 } 1053 }
1043 1054
1044 #else 1055 #else
1045 1056
1046 static inline void tick_nohz_switch_to_nohz(void) { } 1057 static inline void tick_nohz_switch_to_nohz(void) { }
1047 static inline void tick_check_nohz(int cpu) { } 1058 static inline void tick_check_nohz(int cpu) { }
1048 1059
1049 #endif /* CONFIG_NO_HZ_COMMON */ 1060 #endif /* CONFIG_NO_HZ_COMMON */
1050 1061
1051 /* 1062 /*
1052 * Called from irq_enter to notify about the possible interruption of idle() 1063 * Called from irq_enter to notify about the possible interruption of idle()
1053 */ 1064 */
1054 void tick_check_idle(int cpu) 1065 void tick_check_idle(int cpu)
1055 { 1066 {
1056 tick_check_oneshot_broadcast(cpu); 1067 tick_check_oneshot_broadcast(cpu);
1057 tick_check_nohz(cpu); 1068 tick_check_nohz(cpu);
1058 } 1069 }
1059 1070
1060 /* 1071 /*
1061 * High resolution timer specific code 1072 * High resolution timer specific code
1062 */ 1073 */
1063 #ifdef CONFIG_HIGH_RES_TIMERS 1074 #ifdef CONFIG_HIGH_RES_TIMERS
1064 /* 1075 /*
1065 * We rearm the timer until we get disabled by the idle code. 1076 * We rearm the timer until we get disabled by the idle code.
1066 * Called with interrupts disabled. 1077 * Called with interrupts disabled.
1067 */ 1078 */
1068 static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) 1079 static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
1069 { 1080 {
1070 struct tick_sched *ts = 1081 struct tick_sched *ts =
1071 container_of(timer, struct tick_sched, sched_timer); 1082 container_of(timer, struct tick_sched, sched_timer);
1072 struct pt_regs *regs = get_irq_regs(); 1083 struct pt_regs *regs = get_irq_regs();
1073 ktime_t now = ktime_get(); 1084 ktime_t now = ktime_get();
1074 1085
1075 tick_sched_do_timer(now); 1086 tick_sched_do_timer(now);
1076 1087
1077 /* 1088 /*
1078 * Do not call, when we are not in irq context and have 1089 * Do not call, when we are not in irq context and have
1079 * no valid regs pointer 1090 * no valid regs pointer
1080 */ 1091 */
1081 if (regs) 1092 if (regs)
1082 tick_sched_handle(ts, regs); 1093 tick_sched_handle(ts, regs);
1083 1094
1084 hrtimer_forward(timer, now, tick_period); 1095 hrtimer_forward(timer, now, tick_period);
1085 1096
1086 return HRTIMER_RESTART; 1097 return HRTIMER_RESTART;
1087 } 1098 }
1088 1099
1089 static int sched_skew_tick; 1100 static int sched_skew_tick;
1090 1101
1091 static int __init skew_tick(char *str) 1102 static int __init skew_tick(char *str)
1092 { 1103 {
1093 get_option(&str, &sched_skew_tick); 1104 get_option(&str, &sched_skew_tick);
1094 1105
1095 return 0; 1106 return 0;
1096 } 1107 }
1097 early_param("skew_tick", skew_tick); 1108 early_param("skew_tick", skew_tick);
1098 1109
1099 /** 1110 /**
1100 * tick_setup_sched_timer - setup the tick emulation timer 1111 * tick_setup_sched_timer - setup the tick emulation timer
1101 */ 1112 */
1102 void tick_setup_sched_timer(void) 1113 void tick_setup_sched_timer(void)
1103 { 1114 {
1104 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); 1115 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
1105 ktime_t now = ktime_get(); 1116 ktime_t now = ktime_get();
1106 1117
1107 /* 1118 /*
1108 * Emulate tick processing via per-CPU hrtimers: 1119 * Emulate tick processing via per-CPU hrtimers:
1109 */ 1120 */
1110 hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 1121 hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1111 ts->sched_timer.function = tick_sched_timer; 1122 ts->sched_timer.function = tick_sched_timer;
1112 1123
1113 /* Get the next period (per cpu) */ 1124 /* Get the next period (per cpu) */
1114 hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); 1125 hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
1115 1126
1116 /* Offset the tick to avert jiffies_lock contention. */ 1127 /* Offset the tick to avert jiffies_lock contention. */
1117 if (sched_skew_tick) { 1128 if (sched_skew_tick) {
1118 u64 offset = ktime_to_ns(tick_period) >> 1; 1129 u64 offset = ktime_to_ns(tick_period) >> 1;
1119 do_div(offset, num_possible_cpus()); 1130 do_div(offset, num_possible_cpus());
1120 offset *= smp_processor_id(); 1131 offset *= smp_processor_id();
1121 hrtimer_add_expires_ns(&ts->sched_timer, offset); 1132 hrtimer_add_expires_ns(&ts->sched_timer, offset);
1122 } 1133 }
1123 1134
1124 for (;;) { 1135 for (;;) {
1125 hrtimer_forward(&ts->sched_timer, now, tick_period); 1136 hrtimer_forward(&ts->sched_timer, now, tick_period);
1126 hrtimer_start_expires(&ts->sched_timer, 1137 hrtimer_start_expires(&ts->sched_timer,
1127 HRTIMER_MODE_ABS_PINNED); 1138 HRTIMER_MODE_ABS_PINNED);
1128 /* Check, if the timer was already in the past */ 1139 /* Check, if the timer was already in the past */
1129 if (hrtimer_active(&ts->sched_timer)) 1140 if (hrtimer_active(&ts->sched_timer))
1130 break; 1141 break;
1131 now = ktime_get(); 1142 now = ktime_get();
1132 } 1143 }
1133 1144
1134 #ifdef CONFIG_NO_HZ_COMMON 1145 #ifdef CONFIG_NO_HZ_COMMON
1135 if (tick_nohz_enabled) 1146 if (tick_nohz_enabled)
1136 ts->nohz_mode = NOHZ_MODE_HIGHRES; 1147 ts->nohz_mode = NOHZ_MODE_HIGHRES;
1137 #endif 1148 #endif
1138 } 1149 }
1139 #endif /* HIGH_RES_TIMERS */ 1150 #endif /* HIGH_RES_TIMERS */
1140 1151
1141 #if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS 1152 #if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
1142 void tick_cancel_sched_timer(int cpu) 1153 void tick_cancel_sched_timer(int cpu)
1143 { 1154 {
1144 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 1155 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
1145 1156
1146 # ifdef CONFIG_HIGH_RES_TIMERS 1157 # ifdef CONFIG_HIGH_RES_TIMERS
1147 if (ts->sched_timer.base) 1158 if (ts->sched_timer.base)
1148 hrtimer_cancel(&ts->sched_timer); 1159 hrtimer_cancel(&ts->sched_timer);
1149 # endif 1160 # endif
1150 1161
1151 ts->nohz_mode = NOHZ_MODE_INACTIVE; 1162 ts->nohz_mode = NOHZ_MODE_INACTIVE;
1152 } 1163 }
1153 #endif 1164 #endif
1154 1165
1155 /** 1166 /**
1156 * Async notification about clocksource changes 1167 * Async notification about clocksource changes
1157 */ 1168 */
1158 void tick_clock_notify(void) 1169 void tick_clock_notify(void)
1159 { 1170 {
1160 int cpu; 1171 int cpu;
1161 1172
1162 for_each_possible_cpu(cpu) 1173 for_each_possible_cpu(cpu)
1163 set_bit(0, &per_cpu(tick_cpu_sched, cpu).check_clocks); 1174 set_bit(0, &per_cpu(tick_cpu_sched, cpu).check_clocks);
1164 } 1175 }
1165 1176
1166 /* 1177 /*
1167 * Async notification about clock event changes 1178 * Async notification about clock event changes
1168 */ 1179 */
1169 void tick_oneshot_notify(void) 1180 void tick_oneshot_notify(void)
1170 { 1181 {
1171 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); 1182 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
1172 1183
1173 set_bit(0, &ts->check_clocks); 1184 set_bit(0, &ts->check_clocks);
1174 } 1185 }
1175 1186
1176 /** 1187 /**
1177 * Check, if a change happened, which makes oneshot possible. 1188 * Check, if a change happened, which makes oneshot possible.
1178 * 1189 *
1179 * Called cyclic from the hrtimer softirq (driven by the timer 1190 * Called cyclic from the hrtimer softirq (driven by the timer
1180 * softirq) allow_nohz signals, that we can switch into low-res nohz 1191 * softirq) allow_nohz signals, that we can switch into low-res nohz
1181 * mode, because high resolution timers are disabled (either compile 1192 * mode, because high resolution timers are disabled (either compile
1182 * or runtime). 1193 * or runtime).
1183 */ 1194 */
1184 int tick_check_oneshot_change(int allow_nohz) 1195 int tick_check_oneshot_change(int allow_nohz)
1185 { 1196 {
1186 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); 1197 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
1187 1198
1188 if (!test_and_clear_bit(0, &ts->check_clocks)) 1199 if (!test_and_clear_bit(0, &ts->check_clocks))
1189 return 0; 1200 return 0;
1190 1201
1191 if (ts->nohz_mode != NOHZ_MODE_INACTIVE) 1202 if (ts->nohz_mode != NOHZ_MODE_INACTIVE)
1192 return 0; 1203 return 0;
1193 1204
1194 if (!timekeeping_valid_for_hres() || !tick_is_oneshot_available()) 1205 if (!timekeeping_valid_for_hres() || !tick_is_oneshot_available())
1195 return 0; 1206 return 0;
1196 1207
1197 if (!allow_nohz) 1208 if (!allow_nohz)
1198 return 1; 1209 return 1;
1199 1210
1200 tick_nohz_switch_to_nohz(); 1211 tick_nohz_switch_to_nohz();
1201 return 0; 1212 return 0;
1202 } 1213 }
1203 1214