Commit cb41a29076e9f95547da46578d5c8804f7b8845d
1 parent
0637e02939
Exists in
master
and in
20 other branches
nohz: Add basic tracing
It's not obvious to find out why the full dynticks subsystem doesn't always stop the tick: whether this is due to kthreads, posix timers, perf events, etc... These new tracepoints are here to help the user diagnose the failures and test this feature. Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Chris Metcalf <cmetcalf@tilera.com> Cc: Christoph Lameter <cl@linux.com> Cc: Geoff Levand <geoff@infradead.org> Cc: Gilad Ben Yossef <gilad@benyossef.com> Cc: Hakan Akkan <hakanakkan@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Kevin Hilman <khilman@linaro.org> Cc: Li Zhong <zhong@linux.vnet.ibm.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Paul Gortmaker <paul.gortmaker@windriver.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Thomas Gleixner <tglx@linutronix.de>
Showing 2 changed files with 36 additions and 4 deletions Inline Diff
include/trace/events/timer.h
1 | #undef TRACE_SYSTEM | 1 | #undef TRACE_SYSTEM |
2 | #define TRACE_SYSTEM timer | 2 | #define TRACE_SYSTEM timer |
3 | 3 | ||
4 | #if !defined(_TRACE_TIMER_H) || defined(TRACE_HEADER_MULTI_READ) | 4 | #if !defined(_TRACE_TIMER_H) || defined(TRACE_HEADER_MULTI_READ) |
5 | #define _TRACE_TIMER_H | 5 | #define _TRACE_TIMER_H |
6 | 6 | ||
7 | #include <linux/tracepoint.h> | 7 | #include <linux/tracepoint.h> |
8 | #include <linux/hrtimer.h> | 8 | #include <linux/hrtimer.h> |
9 | #include <linux/timer.h> | 9 | #include <linux/timer.h> |
10 | 10 | ||
11 | DECLARE_EVENT_CLASS(timer_class, | 11 | DECLARE_EVENT_CLASS(timer_class, |
12 | 12 | ||
13 | TP_PROTO(struct timer_list *timer), | 13 | TP_PROTO(struct timer_list *timer), |
14 | 14 | ||
15 | TP_ARGS(timer), | 15 | TP_ARGS(timer), |
16 | 16 | ||
17 | TP_STRUCT__entry( | 17 | TP_STRUCT__entry( |
18 | __field( void *, timer ) | 18 | __field( void *, timer ) |
19 | ), | 19 | ), |
20 | 20 | ||
21 | TP_fast_assign( | 21 | TP_fast_assign( |
22 | __entry->timer = timer; | 22 | __entry->timer = timer; |
23 | ), | 23 | ), |
24 | 24 | ||
25 | TP_printk("timer=%p", __entry->timer) | 25 | TP_printk("timer=%p", __entry->timer) |
26 | ); | 26 | ); |
27 | 27 | ||
28 | /** | 28 | /** |
29 | * timer_init - called when the timer is initialized | 29 | * timer_init - called when the timer is initialized |
30 | * @timer: pointer to struct timer_list | 30 | * @timer: pointer to struct timer_list |
31 | */ | 31 | */ |
32 | DEFINE_EVENT(timer_class, timer_init, | 32 | DEFINE_EVENT(timer_class, timer_init, |
33 | 33 | ||
34 | TP_PROTO(struct timer_list *timer), | 34 | TP_PROTO(struct timer_list *timer), |
35 | 35 | ||
36 | TP_ARGS(timer) | 36 | TP_ARGS(timer) |
37 | ); | 37 | ); |
38 | 38 | ||
39 | /** | 39 | /** |
40 | * timer_start - called when the timer is started | 40 | * timer_start - called when the timer is started |
41 | * @timer: pointer to struct timer_list | 41 | * @timer: pointer to struct timer_list |
42 | * @expires: the timers expiry time | 42 | * @expires: the timers expiry time |
43 | */ | 43 | */ |
44 | TRACE_EVENT(timer_start, | 44 | TRACE_EVENT(timer_start, |
45 | 45 | ||
46 | TP_PROTO(struct timer_list *timer, unsigned long expires), | 46 | TP_PROTO(struct timer_list *timer, unsigned long expires), |
47 | 47 | ||
48 | TP_ARGS(timer, expires), | 48 | TP_ARGS(timer, expires), |
49 | 49 | ||
50 | TP_STRUCT__entry( | 50 | TP_STRUCT__entry( |
51 | __field( void *, timer ) | 51 | __field( void *, timer ) |
52 | __field( void *, function ) | 52 | __field( void *, function ) |
53 | __field( unsigned long, expires ) | 53 | __field( unsigned long, expires ) |
54 | __field( unsigned long, now ) | 54 | __field( unsigned long, now ) |
55 | ), | 55 | ), |
56 | 56 | ||
57 | TP_fast_assign( | 57 | TP_fast_assign( |
58 | __entry->timer = timer; | 58 | __entry->timer = timer; |
59 | __entry->function = timer->function; | 59 | __entry->function = timer->function; |
60 | __entry->expires = expires; | 60 | __entry->expires = expires; |
61 | __entry->now = jiffies; | 61 | __entry->now = jiffies; |
62 | ), | 62 | ), |
63 | 63 | ||
64 | TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld]", | 64 | TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld]", |
65 | __entry->timer, __entry->function, __entry->expires, | 65 | __entry->timer, __entry->function, __entry->expires, |
66 | (long)__entry->expires - __entry->now) | 66 | (long)__entry->expires - __entry->now) |
67 | ); | 67 | ); |
68 | 68 | ||
69 | /** | 69 | /** |
70 | * timer_expire_entry - called immediately before the timer callback | 70 | * timer_expire_entry - called immediately before the timer callback |
71 | * @timer: pointer to struct timer_list | 71 | * @timer: pointer to struct timer_list |
72 | * | 72 | * |
73 | * Allows to determine the timer latency. | 73 | * Allows to determine the timer latency. |
74 | */ | 74 | */ |
75 | TRACE_EVENT(timer_expire_entry, | 75 | TRACE_EVENT(timer_expire_entry, |
76 | 76 | ||
77 | TP_PROTO(struct timer_list *timer), | 77 | TP_PROTO(struct timer_list *timer), |
78 | 78 | ||
79 | TP_ARGS(timer), | 79 | TP_ARGS(timer), |
80 | 80 | ||
81 | TP_STRUCT__entry( | 81 | TP_STRUCT__entry( |
82 | __field( void *, timer ) | 82 | __field( void *, timer ) |
83 | __field( unsigned long, now ) | 83 | __field( unsigned long, now ) |
84 | __field( void *, function) | 84 | __field( void *, function) |
85 | ), | 85 | ), |
86 | 86 | ||
87 | TP_fast_assign( | 87 | TP_fast_assign( |
88 | __entry->timer = timer; | 88 | __entry->timer = timer; |
89 | __entry->now = jiffies; | 89 | __entry->now = jiffies; |
90 | __entry->function = timer->function; | 90 | __entry->function = timer->function; |
91 | ), | 91 | ), |
92 | 92 | ||
93 | TP_printk("timer=%p function=%pf now=%lu", __entry->timer, __entry->function,__entry->now) | 93 | TP_printk("timer=%p function=%pf now=%lu", __entry->timer, __entry->function,__entry->now) |
94 | ); | 94 | ); |
95 | 95 | ||
96 | /** | 96 | /** |
97 | * timer_expire_exit - called immediately after the timer callback returns | 97 | * timer_expire_exit - called immediately after the timer callback returns |
98 | * @timer: pointer to struct timer_list | 98 | * @timer: pointer to struct timer_list |
99 | * | 99 | * |
100 | * When used in combination with the timer_expire_entry tracepoint we can | 100 | * When used in combination with the timer_expire_entry tracepoint we can |
101 | * determine the runtime of the timer callback function. | 101 | * determine the runtime of the timer callback function. |
102 | * | 102 | * |
103 | * NOTE: Do NOT derefernce timer in TP_fast_assign. The pointer might | 103 | * NOTE: Do NOT derefernce timer in TP_fast_assign. The pointer might |
104 | * be invalid. We solely track the pointer. | 104 | * be invalid. We solely track the pointer. |
105 | */ | 105 | */ |
106 | DEFINE_EVENT(timer_class, timer_expire_exit, | 106 | DEFINE_EVENT(timer_class, timer_expire_exit, |
107 | 107 | ||
108 | TP_PROTO(struct timer_list *timer), | 108 | TP_PROTO(struct timer_list *timer), |
109 | 109 | ||
110 | TP_ARGS(timer) | 110 | TP_ARGS(timer) |
111 | ); | 111 | ); |
112 | 112 | ||
113 | /** | 113 | /** |
114 | * timer_cancel - called when the timer is canceled | 114 | * timer_cancel - called when the timer is canceled |
115 | * @timer: pointer to struct timer_list | 115 | * @timer: pointer to struct timer_list |
116 | */ | 116 | */ |
117 | DEFINE_EVENT(timer_class, timer_cancel, | 117 | DEFINE_EVENT(timer_class, timer_cancel, |
118 | 118 | ||
119 | TP_PROTO(struct timer_list *timer), | 119 | TP_PROTO(struct timer_list *timer), |
120 | 120 | ||
121 | TP_ARGS(timer) | 121 | TP_ARGS(timer) |
122 | ); | 122 | ); |
123 | 123 | ||
124 | /** | 124 | /** |
125 | * hrtimer_init - called when the hrtimer is initialized | 125 | * hrtimer_init - called when the hrtimer is initialized |
126 | * @timer: pointer to struct hrtimer | 126 | * @timer: pointer to struct hrtimer |
127 | * @clockid: the hrtimers clock | 127 | * @clockid: the hrtimers clock |
128 | * @mode: the hrtimers mode | 128 | * @mode: the hrtimers mode |
129 | */ | 129 | */ |
130 | TRACE_EVENT(hrtimer_init, | 130 | TRACE_EVENT(hrtimer_init, |
131 | 131 | ||
132 | TP_PROTO(struct hrtimer *hrtimer, clockid_t clockid, | 132 | TP_PROTO(struct hrtimer *hrtimer, clockid_t clockid, |
133 | enum hrtimer_mode mode), | 133 | enum hrtimer_mode mode), |
134 | 134 | ||
135 | TP_ARGS(hrtimer, clockid, mode), | 135 | TP_ARGS(hrtimer, clockid, mode), |
136 | 136 | ||
137 | TP_STRUCT__entry( | 137 | TP_STRUCT__entry( |
138 | __field( void *, hrtimer ) | 138 | __field( void *, hrtimer ) |
139 | __field( clockid_t, clockid ) | 139 | __field( clockid_t, clockid ) |
140 | __field( enum hrtimer_mode, mode ) | 140 | __field( enum hrtimer_mode, mode ) |
141 | ), | 141 | ), |
142 | 142 | ||
143 | TP_fast_assign( | 143 | TP_fast_assign( |
144 | __entry->hrtimer = hrtimer; | 144 | __entry->hrtimer = hrtimer; |
145 | __entry->clockid = clockid; | 145 | __entry->clockid = clockid; |
146 | __entry->mode = mode; | 146 | __entry->mode = mode; |
147 | ), | 147 | ), |
148 | 148 | ||
149 | TP_printk("hrtimer=%p clockid=%s mode=%s", __entry->hrtimer, | 149 | TP_printk("hrtimer=%p clockid=%s mode=%s", __entry->hrtimer, |
150 | __entry->clockid == CLOCK_REALTIME ? | 150 | __entry->clockid == CLOCK_REALTIME ? |
151 | "CLOCK_REALTIME" : "CLOCK_MONOTONIC", | 151 | "CLOCK_REALTIME" : "CLOCK_MONOTONIC", |
152 | __entry->mode == HRTIMER_MODE_ABS ? | 152 | __entry->mode == HRTIMER_MODE_ABS ? |
153 | "HRTIMER_MODE_ABS" : "HRTIMER_MODE_REL") | 153 | "HRTIMER_MODE_ABS" : "HRTIMER_MODE_REL") |
154 | ); | 154 | ); |
155 | 155 | ||
156 | /** | 156 | /** |
157 | * hrtimer_start - called when the hrtimer is started | 157 | * hrtimer_start - called when the hrtimer is started |
158 | * @timer: pointer to struct hrtimer | 158 | * @timer: pointer to struct hrtimer |
159 | */ | 159 | */ |
160 | TRACE_EVENT(hrtimer_start, | 160 | TRACE_EVENT(hrtimer_start, |
161 | 161 | ||
162 | TP_PROTO(struct hrtimer *hrtimer), | 162 | TP_PROTO(struct hrtimer *hrtimer), |
163 | 163 | ||
164 | TP_ARGS(hrtimer), | 164 | TP_ARGS(hrtimer), |
165 | 165 | ||
166 | TP_STRUCT__entry( | 166 | TP_STRUCT__entry( |
167 | __field( void *, hrtimer ) | 167 | __field( void *, hrtimer ) |
168 | __field( void *, function ) | 168 | __field( void *, function ) |
169 | __field( s64, expires ) | 169 | __field( s64, expires ) |
170 | __field( s64, softexpires ) | 170 | __field( s64, softexpires ) |
171 | ), | 171 | ), |
172 | 172 | ||
173 | TP_fast_assign( | 173 | TP_fast_assign( |
174 | __entry->hrtimer = hrtimer; | 174 | __entry->hrtimer = hrtimer; |
175 | __entry->function = hrtimer->function; | 175 | __entry->function = hrtimer->function; |
176 | __entry->expires = hrtimer_get_expires(hrtimer).tv64; | 176 | __entry->expires = hrtimer_get_expires(hrtimer).tv64; |
177 | __entry->softexpires = hrtimer_get_softexpires(hrtimer).tv64; | 177 | __entry->softexpires = hrtimer_get_softexpires(hrtimer).tv64; |
178 | ), | 178 | ), |
179 | 179 | ||
180 | TP_printk("hrtimer=%p function=%pf expires=%llu softexpires=%llu", | 180 | TP_printk("hrtimer=%p function=%pf expires=%llu softexpires=%llu", |
181 | __entry->hrtimer, __entry->function, | 181 | __entry->hrtimer, __entry->function, |
182 | (unsigned long long)ktime_to_ns((ktime_t) { | 182 | (unsigned long long)ktime_to_ns((ktime_t) { |
183 | .tv64 = __entry->expires }), | 183 | .tv64 = __entry->expires }), |
184 | (unsigned long long)ktime_to_ns((ktime_t) { | 184 | (unsigned long long)ktime_to_ns((ktime_t) { |
185 | .tv64 = __entry->softexpires })) | 185 | .tv64 = __entry->softexpires })) |
186 | ); | 186 | ); |
187 | 187 | ||
188 | /** | 188 | /** |
189 | * htimmer_expire_entry - called immediately before the hrtimer callback | 189 | * htimmer_expire_entry - called immediately before the hrtimer callback |
190 | * @timer: pointer to struct hrtimer | 190 | * @timer: pointer to struct hrtimer |
191 | * @now: pointer to variable which contains current time of the | 191 | * @now: pointer to variable which contains current time of the |
192 | * timers base. | 192 | * timers base. |
193 | * | 193 | * |
194 | * Allows to determine the timer latency. | 194 | * Allows to determine the timer latency. |
195 | */ | 195 | */ |
196 | TRACE_EVENT(hrtimer_expire_entry, | 196 | TRACE_EVENT(hrtimer_expire_entry, |
197 | 197 | ||
198 | TP_PROTO(struct hrtimer *hrtimer, ktime_t *now), | 198 | TP_PROTO(struct hrtimer *hrtimer, ktime_t *now), |
199 | 199 | ||
200 | TP_ARGS(hrtimer, now), | 200 | TP_ARGS(hrtimer, now), |
201 | 201 | ||
202 | TP_STRUCT__entry( | 202 | TP_STRUCT__entry( |
203 | __field( void *, hrtimer ) | 203 | __field( void *, hrtimer ) |
204 | __field( s64, now ) | 204 | __field( s64, now ) |
205 | __field( void *, function) | 205 | __field( void *, function) |
206 | ), | 206 | ), |
207 | 207 | ||
208 | TP_fast_assign( | 208 | TP_fast_assign( |
209 | __entry->hrtimer = hrtimer; | 209 | __entry->hrtimer = hrtimer; |
210 | __entry->now = now->tv64; | 210 | __entry->now = now->tv64; |
211 | __entry->function = hrtimer->function; | 211 | __entry->function = hrtimer->function; |
212 | ), | 212 | ), |
213 | 213 | ||
214 | TP_printk("hrtimer=%p function=%pf now=%llu", __entry->hrtimer, __entry->function, | 214 | TP_printk("hrtimer=%p function=%pf now=%llu", __entry->hrtimer, __entry->function, |
215 | (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->now })) | 215 | (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->now })) |
216 | ); | 216 | ); |
217 | 217 | ||
218 | DECLARE_EVENT_CLASS(hrtimer_class, | 218 | DECLARE_EVENT_CLASS(hrtimer_class, |
219 | 219 | ||
220 | TP_PROTO(struct hrtimer *hrtimer), | 220 | TP_PROTO(struct hrtimer *hrtimer), |
221 | 221 | ||
222 | TP_ARGS(hrtimer), | 222 | TP_ARGS(hrtimer), |
223 | 223 | ||
224 | TP_STRUCT__entry( | 224 | TP_STRUCT__entry( |
225 | __field( void *, hrtimer ) | 225 | __field( void *, hrtimer ) |
226 | ), | 226 | ), |
227 | 227 | ||
228 | TP_fast_assign( | 228 | TP_fast_assign( |
229 | __entry->hrtimer = hrtimer; | 229 | __entry->hrtimer = hrtimer; |
230 | ), | 230 | ), |
231 | 231 | ||
232 | TP_printk("hrtimer=%p", __entry->hrtimer) | 232 | TP_printk("hrtimer=%p", __entry->hrtimer) |
233 | ); | 233 | ); |
234 | 234 | ||
235 | /** | 235 | /** |
236 | * hrtimer_expire_exit - called immediately after the hrtimer callback returns | 236 | * hrtimer_expire_exit - called immediately after the hrtimer callback returns |
237 | * @timer: pointer to struct hrtimer | 237 | * @timer: pointer to struct hrtimer |
238 | * | 238 | * |
239 | * When used in combination with the hrtimer_expire_entry tracepoint we can | 239 | * When used in combination with the hrtimer_expire_entry tracepoint we can |
240 | * determine the runtime of the callback function. | 240 | * determine the runtime of the callback function. |
241 | */ | 241 | */ |
242 | DEFINE_EVENT(hrtimer_class, hrtimer_expire_exit, | 242 | DEFINE_EVENT(hrtimer_class, hrtimer_expire_exit, |
243 | 243 | ||
244 | TP_PROTO(struct hrtimer *hrtimer), | 244 | TP_PROTO(struct hrtimer *hrtimer), |
245 | 245 | ||
246 | TP_ARGS(hrtimer) | 246 | TP_ARGS(hrtimer) |
247 | ); | 247 | ); |
248 | 248 | ||
249 | /** | 249 | /** |
250 | * hrtimer_cancel - called when the hrtimer is canceled | 250 | * hrtimer_cancel - called when the hrtimer is canceled |
251 | * @hrtimer: pointer to struct hrtimer | 251 | * @hrtimer: pointer to struct hrtimer |
252 | */ | 252 | */ |
253 | DEFINE_EVENT(hrtimer_class, hrtimer_cancel, | 253 | DEFINE_EVENT(hrtimer_class, hrtimer_cancel, |
254 | 254 | ||
255 | TP_PROTO(struct hrtimer *hrtimer), | 255 | TP_PROTO(struct hrtimer *hrtimer), |
256 | 256 | ||
257 | TP_ARGS(hrtimer) | 257 | TP_ARGS(hrtimer) |
258 | ); | 258 | ); |
259 | 259 | ||
260 | /** | 260 | /** |
261 | * itimer_state - called when itimer is started or canceled | 261 | * itimer_state - called when itimer is started or canceled |
262 | * @which: name of the interval timer | 262 | * @which: name of the interval timer |
263 | * @value: the itimers value, itimer is canceled if value->it_value is | 263 | * @value: the itimers value, itimer is canceled if value->it_value is |
264 | * zero, otherwise it is started | 264 | * zero, otherwise it is started |
265 | * @expires: the itimers expiry time | 265 | * @expires: the itimers expiry time |
266 | */ | 266 | */ |
267 | TRACE_EVENT(itimer_state, | 267 | TRACE_EVENT(itimer_state, |
268 | 268 | ||
269 | TP_PROTO(int which, const struct itimerval *const value, | 269 | TP_PROTO(int which, const struct itimerval *const value, |
270 | cputime_t expires), | 270 | cputime_t expires), |
271 | 271 | ||
272 | TP_ARGS(which, value, expires), | 272 | TP_ARGS(which, value, expires), |
273 | 273 | ||
274 | TP_STRUCT__entry( | 274 | TP_STRUCT__entry( |
275 | __field( int, which ) | 275 | __field( int, which ) |
276 | __field( cputime_t, expires ) | 276 | __field( cputime_t, expires ) |
277 | __field( long, value_sec ) | 277 | __field( long, value_sec ) |
278 | __field( long, value_usec ) | 278 | __field( long, value_usec ) |
279 | __field( long, interval_sec ) | 279 | __field( long, interval_sec ) |
280 | __field( long, interval_usec ) | 280 | __field( long, interval_usec ) |
281 | ), | 281 | ), |
282 | 282 | ||
283 | TP_fast_assign( | 283 | TP_fast_assign( |
284 | __entry->which = which; | 284 | __entry->which = which; |
285 | __entry->expires = expires; | 285 | __entry->expires = expires; |
286 | __entry->value_sec = value->it_value.tv_sec; | 286 | __entry->value_sec = value->it_value.tv_sec; |
287 | __entry->value_usec = value->it_value.tv_usec; | 287 | __entry->value_usec = value->it_value.tv_usec; |
288 | __entry->interval_sec = value->it_interval.tv_sec; | 288 | __entry->interval_sec = value->it_interval.tv_sec; |
289 | __entry->interval_usec = value->it_interval.tv_usec; | 289 | __entry->interval_usec = value->it_interval.tv_usec; |
290 | ), | 290 | ), |
291 | 291 | ||
292 | TP_printk("which=%d expires=%llu it_value=%ld.%ld it_interval=%ld.%ld", | 292 | TP_printk("which=%d expires=%llu it_value=%ld.%ld it_interval=%ld.%ld", |
293 | __entry->which, (unsigned long long)__entry->expires, | 293 | __entry->which, (unsigned long long)__entry->expires, |
294 | __entry->value_sec, __entry->value_usec, | 294 | __entry->value_sec, __entry->value_usec, |
295 | __entry->interval_sec, __entry->interval_usec) | 295 | __entry->interval_sec, __entry->interval_usec) |
296 | ); | 296 | ); |
297 | 297 | ||
298 | /** | 298 | /** |
299 | * itimer_expire - called when itimer expires | 299 | * itimer_expire - called when itimer expires |
300 | * @which: type of the interval timer | 300 | * @which: type of the interval timer |
301 | * @pid: pid of the process which owns the timer | 301 | * @pid: pid of the process which owns the timer |
302 | * @now: current time, used to calculate the latency of itimer | 302 | * @now: current time, used to calculate the latency of itimer |
303 | */ | 303 | */ |
304 | TRACE_EVENT(itimer_expire, | 304 | TRACE_EVENT(itimer_expire, |
305 | 305 | ||
306 | TP_PROTO(int which, struct pid *pid, cputime_t now), | 306 | TP_PROTO(int which, struct pid *pid, cputime_t now), |
307 | 307 | ||
308 | TP_ARGS(which, pid, now), | 308 | TP_ARGS(which, pid, now), |
309 | 309 | ||
310 | TP_STRUCT__entry( | 310 | TP_STRUCT__entry( |
311 | __field( int , which ) | 311 | __field( int , which ) |
312 | __field( pid_t, pid ) | 312 | __field( pid_t, pid ) |
313 | __field( cputime_t, now ) | 313 | __field( cputime_t, now ) |
314 | ), | 314 | ), |
315 | 315 | ||
316 | TP_fast_assign( | 316 | TP_fast_assign( |
317 | __entry->which = which; | 317 | __entry->which = which; |
318 | __entry->now = now; | 318 | __entry->now = now; |
319 | __entry->pid = pid_nr(pid); | 319 | __entry->pid = pid_nr(pid); |
320 | ), | 320 | ), |
321 | 321 | ||
322 | TP_printk("which=%d pid=%d now=%llu", __entry->which, | 322 | TP_printk("which=%d pid=%d now=%llu", __entry->which, |
323 | (int) __entry->pid, (unsigned long long)__entry->now) | 323 | (int) __entry->pid, (unsigned long long)__entry->now) |
324 | ); | 324 | ); |
325 | 325 | ||
326 | #ifdef CONFIG_NO_HZ_FULL | ||
327 | TRACE_EVENT(tick_stop, | ||
328 | |||
329 | TP_PROTO(int success, char *error_msg), | ||
330 | |||
331 | TP_ARGS(success, error_msg), | ||
332 | |||
333 | TP_STRUCT__entry( | ||
334 | __field( int , success ) | ||
335 | __string( msg, error_msg ) | ||
336 | ), | ||
337 | |||
338 | TP_fast_assign( | ||
339 | __entry->success = success; | ||
340 | __assign_str(msg, error_msg); | ||
341 | ), | ||
342 | |||
343 | TP_printk("success=%s msg=%s", __entry->success ? "yes" : "no", __get_str(msg)) | ||
344 | ); | ||
345 | #endif | ||
346 | |||
326 | #endif /* _TRACE_TIMER_H */ | 347 | #endif /* _TRACE_TIMER_H */ |
327 | 348 | ||
328 | /* This part must be outside protection */ | 349 | /* This part must be outside protection */ |
329 | #include <trace/define_trace.h> | 350 | #include <trace/define_trace.h> |
330 | 351 |
kernel/time/tick-sched.c
1 | /* | 1 | /* |
2 | * linux/kernel/time/tick-sched.c | 2 | * linux/kernel/time/tick-sched.c |
3 | * | 3 | * |
4 | * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> | 4 | * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> |
5 | * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar | 5 | * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar |
6 | * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner | 6 | * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner |
7 | * | 7 | * |
8 | * No idle tick implementation for low and high resolution timers | 8 | * No idle tick implementation for low and high resolution timers |
9 | * | 9 | * |
10 | * Started by: Thomas Gleixner and Ingo Molnar | 10 | * Started by: Thomas Gleixner and Ingo Molnar |
11 | * | 11 | * |
12 | * Distribute under GPLv2. | 12 | * Distribute under GPLv2. |
13 | */ | 13 | */ |
14 | #include <linux/cpu.h> | 14 | #include <linux/cpu.h> |
15 | #include <linux/err.h> | 15 | #include <linux/err.h> |
16 | #include <linux/hrtimer.h> | 16 | #include <linux/hrtimer.h> |
17 | #include <linux/interrupt.h> | 17 | #include <linux/interrupt.h> |
18 | #include <linux/kernel_stat.h> | 18 | #include <linux/kernel_stat.h> |
19 | #include <linux/percpu.h> | 19 | #include <linux/percpu.h> |
20 | #include <linux/profile.h> | 20 | #include <linux/profile.h> |
21 | #include <linux/sched.h> | 21 | #include <linux/sched.h> |
22 | #include <linux/module.h> | 22 | #include <linux/module.h> |
23 | #include <linux/irq_work.h> | 23 | #include <linux/irq_work.h> |
24 | #include <linux/posix-timers.h> | 24 | #include <linux/posix-timers.h> |
25 | #include <linux/perf_event.h> | 25 | #include <linux/perf_event.h> |
26 | 26 | ||
27 | #include <asm/irq_regs.h> | 27 | #include <asm/irq_regs.h> |
28 | 28 | ||
29 | #include "tick-internal.h" | 29 | #include "tick-internal.h" |
30 | 30 | ||
31 | #include <trace/events/timer.h> | ||
32 | |||
31 | /* | 33 | /* |
32 | * Per cpu nohz control structure | 34 | * Per cpu nohz control structure |
33 | */ | 35 | */ |
34 | DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); | 36 | DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); |
35 | 37 | ||
36 | /* | 38 | /* |
37 | * The time, when the last jiffy update happened. Protected by jiffies_lock. | 39 | * The time, when the last jiffy update happened. Protected by jiffies_lock. |
38 | */ | 40 | */ |
39 | static ktime_t last_jiffies_update; | 41 | static ktime_t last_jiffies_update; |
40 | 42 | ||
41 | struct tick_sched *tick_get_tick_sched(int cpu) | 43 | struct tick_sched *tick_get_tick_sched(int cpu) |
42 | { | 44 | { |
43 | return &per_cpu(tick_cpu_sched, cpu); | 45 | return &per_cpu(tick_cpu_sched, cpu); |
44 | } | 46 | } |
45 | 47 | ||
46 | /* | 48 | /* |
47 | * Must be called with interrupts disabled ! | 49 | * Must be called with interrupts disabled ! |
48 | */ | 50 | */ |
49 | static void tick_do_update_jiffies64(ktime_t now) | 51 | static void tick_do_update_jiffies64(ktime_t now) |
50 | { | 52 | { |
51 | unsigned long ticks = 0; | 53 | unsigned long ticks = 0; |
52 | ktime_t delta; | 54 | ktime_t delta; |
53 | 55 | ||
54 | /* | 56 | /* |
55 | * Do a quick check without holding jiffies_lock: | 57 | * Do a quick check without holding jiffies_lock: |
56 | */ | 58 | */ |
57 | delta = ktime_sub(now, last_jiffies_update); | 59 | delta = ktime_sub(now, last_jiffies_update); |
58 | if (delta.tv64 < tick_period.tv64) | 60 | if (delta.tv64 < tick_period.tv64) |
59 | return; | 61 | return; |
60 | 62 | ||
61 | /* Reevalute with jiffies_lock held */ | 63 | /* Reevalute with jiffies_lock held */ |
62 | write_seqlock(&jiffies_lock); | 64 | write_seqlock(&jiffies_lock); |
63 | 65 | ||
64 | delta = ktime_sub(now, last_jiffies_update); | 66 | delta = ktime_sub(now, last_jiffies_update); |
65 | if (delta.tv64 >= tick_period.tv64) { | 67 | if (delta.tv64 >= tick_period.tv64) { |
66 | 68 | ||
67 | delta = ktime_sub(delta, tick_period); | 69 | delta = ktime_sub(delta, tick_period); |
68 | last_jiffies_update = ktime_add(last_jiffies_update, | 70 | last_jiffies_update = ktime_add(last_jiffies_update, |
69 | tick_period); | 71 | tick_period); |
70 | 72 | ||
71 | /* Slow path for long timeouts */ | 73 | /* Slow path for long timeouts */ |
72 | if (unlikely(delta.tv64 >= tick_period.tv64)) { | 74 | if (unlikely(delta.tv64 >= tick_period.tv64)) { |
73 | s64 incr = ktime_to_ns(tick_period); | 75 | s64 incr = ktime_to_ns(tick_period); |
74 | 76 | ||
75 | ticks = ktime_divns(delta, incr); | 77 | ticks = ktime_divns(delta, incr); |
76 | 78 | ||
77 | last_jiffies_update = ktime_add_ns(last_jiffies_update, | 79 | last_jiffies_update = ktime_add_ns(last_jiffies_update, |
78 | incr * ticks); | 80 | incr * ticks); |
79 | } | 81 | } |
80 | do_timer(++ticks); | 82 | do_timer(++ticks); |
81 | 83 | ||
82 | /* Keep the tick_next_period variable up to date */ | 84 | /* Keep the tick_next_period variable up to date */ |
83 | tick_next_period = ktime_add(last_jiffies_update, tick_period); | 85 | tick_next_period = ktime_add(last_jiffies_update, tick_period); |
84 | } | 86 | } |
85 | write_sequnlock(&jiffies_lock); | 87 | write_sequnlock(&jiffies_lock); |
86 | } | 88 | } |
87 | 89 | ||
88 | /* | 90 | /* |
89 | * Initialize and return retrieve the jiffies update. | 91 | * Initialize and return retrieve the jiffies update. |
90 | */ | 92 | */ |
91 | static ktime_t tick_init_jiffy_update(void) | 93 | static ktime_t tick_init_jiffy_update(void) |
92 | { | 94 | { |
93 | ktime_t period; | 95 | ktime_t period; |
94 | 96 | ||
95 | write_seqlock(&jiffies_lock); | 97 | write_seqlock(&jiffies_lock); |
96 | /* Did we start the jiffies update yet ? */ | 98 | /* Did we start the jiffies update yet ? */ |
97 | if (last_jiffies_update.tv64 == 0) | 99 | if (last_jiffies_update.tv64 == 0) |
98 | last_jiffies_update = tick_next_period; | 100 | last_jiffies_update = tick_next_period; |
99 | period = last_jiffies_update; | 101 | period = last_jiffies_update; |
100 | write_sequnlock(&jiffies_lock); | 102 | write_sequnlock(&jiffies_lock); |
101 | return period; | 103 | return period; |
102 | } | 104 | } |
103 | 105 | ||
104 | 106 | ||
105 | static void tick_sched_do_timer(ktime_t now) | 107 | static void tick_sched_do_timer(ktime_t now) |
106 | { | 108 | { |
107 | int cpu = smp_processor_id(); | 109 | int cpu = smp_processor_id(); |
108 | 110 | ||
109 | #ifdef CONFIG_NO_HZ_COMMON | 111 | #ifdef CONFIG_NO_HZ_COMMON |
110 | /* | 112 | /* |
111 | * Check if the do_timer duty was dropped. We don't care about | 113 | * Check if the do_timer duty was dropped. We don't care about |
112 | * concurrency: This happens only when the cpu in charge went | 114 | * concurrency: This happens only when the cpu in charge went |
113 | * into a long sleep. If two cpus happen to assign themself to | 115 | * into a long sleep. If two cpus happen to assign themself to |
114 | * this duty, then the jiffies update is still serialized by | 116 | * this duty, then the jiffies update is still serialized by |
115 | * jiffies_lock. | 117 | * jiffies_lock. |
116 | */ | 118 | */ |
117 | if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE) | 119 | if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE) |
118 | && !tick_nohz_full_cpu(cpu)) | 120 | && !tick_nohz_full_cpu(cpu)) |
119 | tick_do_timer_cpu = cpu; | 121 | tick_do_timer_cpu = cpu; |
120 | #endif | 122 | #endif |
121 | 123 | ||
122 | /* Check, if the jiffies need an update */ | 124 | /* Check, if the jiffies need an update */ |
123 | if (tick_do_timer_cpu == cpu) | 125 | if (tick_do_timer_cpu == cpu) |
124 | tick_do_update_jiffies64(now); | 126 | tick_do_update_jiffies64(now); |
125 | } | 127 | } |
126 | 128 | ||
127 | static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) | 129 | static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) |
128 | { | 130 | { |
129 | #ifdef CONFIG_NO_HZ_COMMON | 131 | #ifdef CONFIG_NO_HZ_COMMON |
130 | /* | 132 | /* |
131 | * When we are idle and the tick is stopped, we have to touch | 133 | * When we are idle and the tick is stopped, we have to touch |
132 | * the watchdog as we might not schedule for a really long | 134 | * the watchdog as we might not schedule for a really long |
133 | * time. This happens on complete idle SMP systems while | 135 | * time. This happens on complete idle SMP systems while |
134 | * waiting on the login prompt. We also increment the "start of | 136 | * waiting on the login prompt. We also increment the "start of |
135 | * idle" jiffy stamp so the idle accounting adjustment we do | 137 | * idle" jiffy stamp so the idle accounting adjustment we do |
136 | * when we go busy again does not account too much ticks. | 138 | * when we go busy again does not account too much ticks. |
137 | */ | 139 | */ |
138 | if (ts->tick_stopped) { | 140 | if (ts->tick_stopped) { |
139 | touch_softlockup_watchdog(); | 141 | touch_softlockup_watchdog(); |
140 | if (is_idle_task(current)) | 142 | if (is_idle_task(current)) |
141 | ts->idle_jiffies++; | 143 | ts->idle_jiffies++; |
142 | } | 144 | } |
143 | #endif | 145 | #endif |
144 | update_process_times(user_mode(regs)); | 146 | update_process_times(user_mode(regs)); |
145 | profile_tick(CPU_PROFILING); | 147 | profile_tick(CPU_PROFILING); |
146 | } | 148 | } |
147 | 149 | ||
148 | #ifdef CONFIG_NO_HZ_FULL | 150 | #ifdef CONFIG_NO_HZ_FULL |
149 | static cpumask_var_t nohz_full_mask; | 151 | static cpumask_var_t nohz_full_mask; |
150 | bool have_nohz_full_mask; | 152 | bool have_nohz_full_mask; |
151 | 153 | ||
152 | static bool can_stop_full_tick(void) | 154 | static bool can_stop_full_tick(void) |
153 | { | 155 | { |
154 | WARN_ON_ONCE(!irqs_disabled()); | 156 | WARN_ON_ONCE(!irqs_disabled()); |
155 | 157 | ||
156 | if (!sched_can_stop_tick()) | 158 | if (!sched_can_stop_tick()) { |
159 | trace_tick_stop(0, "more than 1 task in runqueue\n"); | ||
157 | return false; | 160 | return false; |
161 | } | ||
158 | 162 | ||
159 | if (!posix_cpu_timers_can_stop_tick(current)) | 163 | if (!posix_cpu_timers_can_stop_tick(current)) { |
164 | trace_tick_stop(0, "posix timers running\n"); | ||
160 | return false; | 165 | return false; |
166 | } | ||
161 | 167 | ||
162 | if (!perf_event_can_stop_tick()) | 168 | if (!perf_event_can_stop_tick()) { |
169 | trace_tick_stop(0, "perf events running\n"); | ||
163 | return false; | 170 | return false; |
171 | } | ||
164 | 172 | ||
165 | /* sched_clock_tick() needs us? */ | 173 | /* sched_clock_tick() needs us? */ |
166 | #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK | 174 | #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK |
167 | /* | 175 | /* |
168 | * TODO: kick full dynticks CPUs when | 176 | * TODO: kick full dynticks CPUs when |
169 | * sched_clock_stable is set. | 177 | * sched_clock_stable is set. |
170 | */ | 178 | */ |
171 | if (!sched_clock_stable) | 179 | if (!sched_clock_stable) { |
180 | trace_tick_stop(0, "unstable sched clock\n"); | ||
172 | return false; | 181 | return false; |
182 | } | ||
173 | #endif | 183 | #endif |
174 | 184 | ||
175 | return true; | 185 | return true; |
176 | } | 186 | } |
177 | 187 | ||
178 | static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now); | 188 | static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now); |
179 | 189 | ||
180 | /* | 190 | /* |
181 | * Re-evaluate the need for the tick on the current CPU | 191 | * Re-evaluate the need for the tick on the current CPU |
182 | * and restart it if necessary. | 192 | * and restart it if necessary. |
183 | */ | 193 | */ |
184 | void tick_nohz_full_check(void) | 194 | void tick_nohz_full_check(void) |
185 | { | 195 | { |
186 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 196 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
187 | 197 | ||
188 | if (tick_nohz_full_cpu(smp_processor_id())) { | 198 | if (tick_nohz_full_cpu(smp_processor_id())) { |
189 | if (ts->tick_stopped && !is_idle_task(current)) { | 199 | if (ts->tick_stopped && !is_idle_task(current)) { |
190 | if (!can_stop_full_tick()) | 200 | if (!can_stop_full_tick()) |
191 | tick_nohz_restart_sched_tick(ts, ktime_get()); | 201 | tick_nohz_restart_sched_tick(ts, ktime_get()); |
192 | } | 202 | } |
193 | } | 203 | } |
194 | } | 204 | } |
195 | 205 | ||
196 | static void nohz_full_kick_work_func(struct irq_work *work) | 206 | static void nohz_full_kick_work_func(struct irq_work *work) |
197 | { | 207 | { |
198 | tick_nohz_full_check(); | 208 | tick_nohz_full_check(); |
199 | } | 209 | } |
200 | 210 | ||
201 | static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { | 211 | static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { |
202 | .func = nohz_full_kick_work_func, | 212 | .func = nohz_full_kick_work_func, |
203 | }; | 213 | }; |
204 | 214 | ||
205 | /* | 215 | /* |
206 | * Kick the current CPU if it's full dynticks in order to force it to | 216 | * Kick the current CPU if it's full dynticks in order to force it to |
207 | * re-evaluate its dependency on the tick and restart it if necessary. | 217 | * re-evaluate its dependency on the tick and restart it if necessary. |
208 | */ | 218 | */ |
209 | void tick_nohz_full_kick(void) | 219 | void tick_nohz_full_kick(void) |
210 | { | 220 | { |
211 | if (tick_nohz_full_cpu(smp_processor_id())) | 221 | if (tick_nohz_full_cpu(smp_processor_id())) |
212 | irq_work_queue(&__get_cpu_var(nohz_full_kick_work)); | 222 | irq_work_queue(&__get_cpu_var(nohz_full_kick_work)); |
213 | } | 223 | } |
214 | 224 | ||
215 | static void nohz_full_kick_ipi(void *info) | 225 | static void nohz_full_kick_ipi(void *info) |
216 | { | 226 | { |
217 | tick_nohz_full_check(); | 227 | tick_nohz_full_check(); |
218 | } | 228 | } |
219 | 229 | ||
220 | /* | 230 | /* |
221 | * Kick all full dynticks CPUs in order to force these to re-evaluate | 231 | * Kick all full dynticks CPUs in order to force these to re-evaluate |
222 | * their dependency on the tick and restart it if necessary. | 232 | * their dependency on the tick and restart it if necessary. |
223 | */ | 233 | */ |
224 | void tick_nohz_full_kick_all(void) | 234 | void tick_nohz_full_kick_all(void) |
225 | { | 235 | { |
226 | if (!have_nohz_full_mask) | 236 | if (!have_nohz_full_mask) |
227 | return; | 237 | return; |
228 | 238 | ||
229 | preempt_disable(); | 239 | preempt_disable(); |
230 | smp_call_function_many(nohz_full_mask, | 240 | smp_call_function_many(nohz_full_mask, |
231 | nohz_full_kick_ipi, NULL, false); | 241 | nohz_full_kick_ipi, NULL, false); |
232 | preempt_enable(); | 242 | preempt_enable(); |
233 | } | 243 | } |
234 | 244 | ||
235 | /* | 245 | /* |
236 | * Re-evaluate the need for the tick as we switch the current task. | 246 | * Re-evaluate the need for the tick as we switch the current task. |
237 | * It might need the tick due to per task/process properties: | 247 | * It might need the tick due to per task/process properties: |
238 | * perf events, posix cpu timers, ... | 248 | * perf events, posix cpu timers, ... |
239 | */ | 249 | */ |
240 | void tick_nohz_task_switch(struct task_struct *tsk) | 250 | void tick_nohz_task_switch(struct task_struct *tsk) |
241 | { | 251 | { |
242 | unsigned long flags; | 252 | unsigned long flags; |
243 | 253 | ||
244 | if (!tick_nohz_full_cpu(smp_processor_id())) | 254 | if (!tick_nohz_full_cpu(smp_processor_id())) |
245 | return; | 255 | return; |
246 | 256 | ||
247 | local_irq_save(flags); | 257 | local_irq_save(flags); |
248 | 258 | ||
249 | if (tick_nohz_tick_stopped() && !can_stop_full_tick()) | 259 | if (tick_nohz_tick_stopped() && !can_stop_full_tick()) |
250 | tick_nohz_full_kick(); | 260 | tick_nohz_full_kick(); |
251 | 261 | ||
252 | local_irq_restore(flags); | 262 | local_irq_restore(flags); |
253 | } | 263 | } |
254 | 264 | ||
255 | int tick_nohz_full_cpu(int cpu) | 265 | int tick_nohz_full_cpu(int cpu) |
256 | { | 266 | { |
257 | if (!have_nohz_full_mask) | 267 | if (!have_nohz_full_mask) |
258 | return 0; | 268 | return 0; |
259 | 269 | ||
260 | return cpumask_test_cpu(cpu, nohz_full_mask); | 270 | return cpumask_test_cpu(cpu, nohz_full_mask); |
261 | } | 271 | } |
262 | 272 | ||
263 | /* Parse the boot-time nohz CPU list from the kernel parameters. */ | 273 | /* Parse the boot-time nohz CPU list from the kernel parameters. */ |
264 | static int __init tick_nohz_full_setup(char *str) | 274 | static int __init tick_nohz_full_setup(char *str) |
265 | { | 275 | { |
266 | int cpu; | 276 | int cpu; |
267 | 277 | ||
268 | alloc_bootmem_cpumask_var(&nohz_full_mask); | 278 | alloc_bootmem_cpumask_var(&nohz_full_mask); |
269 | if (cpulist_parse(str, nohz_full_mask) < 0) { | 279 | if (cpulist_parse(str, nohz_full_mask) < 0) { |
270 | pr_warning("NOHZ: Incorrect nohz_full cpumask\n"); | 280 | pr_warning("NOHZ: Incorrect nohz_full cpumask\n"); |
271 | return 1; | 281 | return 1; |
272 | } | 282 | } |
273 | 283 | ||
274 | cpu = smp_processor_id(); | 284 | cpu = smp_processor_id(); |
275 | if (cpumask_test_cpu(cpu, nohz_full_mask)) { | 285 | if (cpumask_test_cpu(cpu, nohz_full_mask)) { |
276 | pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu); | 286 | pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu); |
277 | cpumask_clear_cpu(cpu, nohz_full_mask); | 287 | cpumask_clear_cpu(cpu, nohz_full_mask); |
278 | } | 288 | } |
279 | have_nohz_full_mask = true; | 289 | have_nohz_full_mask = true; |
280 | 290 | ||
281 | return 1; | 291 | return 1; |
282 | } | 292 | } |
283 | __setup("nohz_full=", tick_nohz_full_setup); | 293 | __setup("nohz_full=", tick_nohz_full_setup); |
284 | 294 | ||
285 | static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb, | 295 | static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb, |
286 | unsigned long action, | 296 | unsigned long action, |
287 | void *hcpu) | 297 | void *hcpu) |
288 | { | 298 | { |
289 | unsigned int cpu = (unsigned long)hcpu; | 299 | unsigned int cpu = (unsigned long)hcpu; |
290 | 300 | ||
291 | switch (action & ~CPU_TASKS_FROZEN) { | 301 | switch (action & ~CPU_TASKS_FROZEN) { |
292 | case CPU_DOWN_PREPARE: | 302 | case CPU_DOWN_PREPARE: |
293 | /* | 303 | /* |
294 | * If we handle the timekeeping duty for full dynticks CPUs, | 304 | * If we handle the timekeeping duty for full dynticks CPUs, |
295 | * we can't safely shutdown that CPU. | 305 | * we can't safely shutdown that CPU. |
296 | */ | 306 | */ |
297 | if (have_nohz_full_mask && tick_do_timer_cpu == cpu) | 307 | if (have_nohz_full_mask && tick_do_timer_cpu == cpu) |
298 | return -EINVAL; | 308 | return -EINVAL; |
299 | break; | 309 | break; |
300 | } | 310 | } |
301 | return NOTIFY_OK; | 311 | return NOTIFY_OK; |
302 | } | 312 | } |
303 | 313 | ||
304 | /* | 314 | /* |
305 | * Worst case string length in chunks of CPU range seems 2 steps | 315 | * Worst case string length in chunks of CPU range seems 2 steps |
306 | * separations: 0,2,4,6,... | 316 | * separations: 0,2,4,6,... |
307 | * This is NR_CPUS + sizeof('\0') | 317 | * This is NR_CPUS + sizeof('\0') |
308 | */ | 318 | */ |
309 | static char __initdata nohz_full_buf[NR_CPUS + 1]; | 319 | static char __initdata nohz_full_buf[NR_CPUS + 1]; |
310 | 320 | ||
311 | static int tick_nohz_init_all(void) | 321 | static int tick_nohz_init_all(void) |
312 | { | 322 | { |
313 | int err = -1; | 323 | int err = -1; |
314 | 324 | ||
315 | #ifdef CONFIG_NO_HZ_FULL_ALL | 325 | #ifdef CONFIG_NO_HZ_FULL_ALL |
316 | if (!alloc_cpumask_var(&nohz_full_mask, GFP_KERNEL)) { | 326 | if (!alloc_cpumask_var(&nohz_full_mask, GFP_KERNEL)) { |
317 | pr_err("NO_HZ: Can't allocate full dynticks cpumask\n"); | 327 | pr_err("NO_HZ: Can't allocate full dynticks cpumask\n"); |
318 | return err; | 328 | return err; |
319 | } | 329 | } |
320 | err = 0; | 330 | err = 0; |
321 | cpumask_setall(nohz_full_mask); | 331 | cpumask_setall(nohz_full_mask); |
322 | cpumask_clear_cpu(smp_processor_id(), nohz_full_mask); | 332 | cpumask_clear_cpu(smp_processor_id(), nohz_full_mask); |
323 | have_nohz_full_mask = true; | 333 | have_nohz_full_mask = true; |
324 | #endif | 334 | #endif |
325 | return err; | 335 | return err; |
326 | } | 336 | } |
327 | 337 | ||
328 | void __init tick_nohz_init(void) | 338 | void __init tick_nohz_init(void) |
329 | { | 339 | { |
330 | int cpu; | 340 | int cpu; |
331 | 341 | ||
332 | if (!have_nohz_full_mask) { | 342 | if (!have_nohz_full_mask) { |
333 | if (tick_nohz_init_all() < 0) | 343 | if (tick_nohz_init_all() < 0) |
334 | return; | 344 | return; |
335 | } | 345 | } |
336 | 346 | ||
337 | cpu_notifier(tick_nohz_cpu_down_callback, 0); | 347 | cpu_notifier(tick_nohz_cpu_down_callback, 0); |
338 | 348 | ||
339 | /* Make sure full dynticks CPU are also RCU nocbs */ | 349 | /* Make sure full dynticks CPU are also RCU nocbs */ |
340 | for_each_cpu(cpu, nohz_full_mask) { | 350 | for_each_cpu(cpu, nohz_full_mask) { |
341 | if (!rcu_is_nocb_cpu(cpu)) { | 351 | if (!rcu_is_nocb_cpu(cpu)) { |
342 | pr_warning("NO_HZ: CPU %d is not RCU nocb: " | 352 | pr_warning("NO_HZ: CPU %d is not RCU nocb: " |
343 | "cleared from nohz_full range", cpu); | 353 | "cleared from nohz_full range", cpu); |
344 | cpumask_clear_cpu(cpu, nohz_full_mask); | 354 | cpumask_clear_cpu(cpu, nohz_full_mask); |
345 | } | 355 | } |
346 | } | 356 | } |
347 | 357 | ||
348 | cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask); | 358 | cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask); |
349 | pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); | 359 | pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); |
350 | } | 360 | } |
351 | #else | 361 | #else |
352 | #define have_nohz_full_mask (0) | 362 | #define have_nohz_full_mask (0) |
353 | #endif | 363 | #endif |
354 | 364 | ||
355 | /* | 365 | /* |
356 | * NOHZ - aka dynamic tick functionality | 366 | * NOHZ - aka dynamic tick functionality |
357 | */ | 367 | */ |
358 | #ifdef CONFIG_NO_HZ_COMMON | 368 | #ifdef CONFIG_NO_HZ_COMMON |
359 | /* | 369 | /* |
360 | * NO HZ enabled ? | 370 | * NO HZ enabled ? |
361 | */ | 371 | */ |
362 | int tick_nohz_enabled __read_mostly = 1; | 372 | int tick_nohz_enabled __read_mostly = 1; |
363 | 373 | ||
364 | /* | 374 | /* |
365 | * Enable / Disable tickless mode | 375 | * Enable / Disable tickless mode |
366 | */ | 376 | */ |
367 | static int __init setup_tick_nohz(char *str) | 377 | static int __init setup_tick_nohz(char *str) |
368 | { | 378 | { |
369 | if (!strcmp(str, "off")) | 379 | if (!strcmp(str, "off")) |
370 | tick_nohz_enabled = 0; | 380 | tick_nohz_enabled = 0; |
371 | else if (!strcmp(str, "on")) | 381 | else if (!strcmp(str, "on")) |
372 | tick_nohz_enabled = 1; | 382 | tick_nohz_enabled = 1; |
373 | else | 383 | else |
374 | return 0; | 384 | return 0; |
375 | return 1; | 385 | return 1; |
376 | } | 386 | } |
377 | 387 | ||
378 | __setup("nohz=", setup_tick_nohz); | 388 | __setup("nohz=", setup_tick_nohz); |
379 | 389 | ||
380 | /** | 390 | /** |
381 | * tick_nohz_update_jiffies - update jiffies when idle was interrupted | 391 | * tick_nohz_update_jiffies - update jiffies when idle was interrupted |
382 | * | 392 | * |
383 | * Called from interrupt entry when the CPU was idle | 393 | * Called from interrupt entry when the CPU was idle |
384 | * | 394 | * |
385 | * In case the sched_tick was stopped on this CPU, we have to check if jiffies | 395 | * In case the sched_tick was stopped on this CPU, we have to check if jiffies |
386 | * must be updated. Otherwise an interrupt handler could use a stale jiffy | 396 | * must be updated. Otherwise an interrupt handler could use a stale jiffy |
387 | * value. We do this unconditionally on any cpu, as we don't know whether the | 397 | * value. We do this unconditionally on any cpu, as we don't know whether the |
388 | * cpu, which has the update task assigned is in a long sleep. | 398 | * cpu, which has the update task assigned is in a long sleep. |
389 | */ | 399 | */ |
390 | static void tick_nohz_update_jiffies(ktime_t now) | 400 | static void tick_nohz_update_jiffies(ktime_t now) |
391 | { | 401 | { |
392 | int cpu = smp_processor_id(); | 402 | int cpu = smp_processor_id(); |
393 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 403 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
394 | unsigned long flags; | 404 | unsigned long flags; |
395 | 405 | ||
396 | ts->idle_waketime = now; | 406 | ts->idle_waketime = now; |
397 | 407 | ||
398 | local_irq_save(flags); | 408 | local_irq_save(flags); |
399 | tick_do_update_jiffies64(now); | 409 | tick_do_update_jiffies64(now); |
400 | local_irq_restore(flags); | 410 | local_irq_restore(flags); |
401 | 411 | ||
402 | touch_softlockup_watchdog(); | 412 | touch_softlockup_watchdog(); |
403 | } | 413 | } |
404 | 414 | ||
405 | /* | 415 | /* |
406 | * Updates the per cpu time idle statistics counters | 416 | * Updates the per cpu time idle statistics counters |
407 | */ | 417 | */ |
408 | static void | 418 | static void |
409 | update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_update_time) | 419 | update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_update_time) |
410 | { | 420 | { |
411 | ktime_t delta; | 421 | ktime_t delta; |
412 | 422 | ||
413 | if (ts->idle_active) { | 423 | if (ts->idle_active) { |
414 | delta = ktime_sub(now, ts->idle_entrytime); | 424 | delta = ktime_sub(now, ts->idle_entrytime); |
415 | if (nr_iowait_cpu(cpu) > 0) | 425 | if (nr_iowait_cpu(cpu) > 0) |
416 | ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta); | 426 | ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta); |
417 | else | 427 | else |
418 | ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); | 428 | ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); |
419 | ts->idle_entrytime = now; | 429 | ts->idle_entrytime = now; |
420 | } | 430 | } |
421 | 431 | ||
422 | if (last_update_time) | 432 | if (last_update_time) |
423 | *last_update_time = ktime_to_us(now); | 433 | *last_update_time = ktime_to_us(now); |
424 | 434 | ||
425 | } | 435 | } |
426 | 436 | ||
427 | static void tick_nohz_stop_idle(int cpu, ktime_t now) | 437 | static void tick_nohz_stop_idle(int cpu, ktime_t now) |
428 | { | 438 | { |
429 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 439 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
430 | 440 | ||
431 | update_ts_time_stats(cpu, ts, now, NULL); | 441 | update_ts_time_stats(cpu, ts, now, NULL); |
432 | ts->idle_active = 0; | 442 | ts->idle_active = 0; |
433 | 443 | ||
434 | sched_clock_idle_wakeup_event(0); | 444 | sched_clock_idle_wakeup_event(0); |
435 | } | 445 | } |
436 | 446 | ||
437 | static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts) | 447 | static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts) |
438 | { | 448 | { |
439 | ktime_t now = ktime_get(); | 449 | ktime_t now = ktime_get(); |
440 | 450 | ||
441 | ts->idle_entrytime = now; | 451 | ts->idle_entrytime = now; |
442 | ts->idle_active = 1; | 452 | ts->idle_active = 1; |
443 | sched_clock_idle_sleep_event(); | 453 | sched_clock_idle_sleep_event(); |
444 | return now; | 454 | return now; |
445 | } | 455 | } |
446 | 456 | ||
447 | /** | 457 | /** |
448 | * get_cpu_idle_time_us - get the total idle time of a cpu | 458 | * get_cpu_idle_time_us - get the total idle time of a cpu |
449 | * @cpu: CPU number to query | 459 | * @cpu: CPU number to query |
450 | * @last_update_time: variable to store update time in. Do not update | 460 | * @last_update_time: variable to store update time in. Do not update |
451 | * counters if NULL. | 461 | * counters if NULL. |
452 | * | 462 | * |
453 | * Return the cummulative idle time (since boot) for a given | 463 | * Return the cummulative idle time (since boot) for a given |
454 | * CPU, in microseconds. | 464 | * CPU, in microseconds. |
455 | * | 465 | * |
456 | * This time is measured via accounting rather than sampling, | 466 | * This time is measured via accounting rather than sampling, |
457 | * and is as accurate as ktime_get() is. | 467 | * and is as accurate as ktime_get() is. |
458 | * | 468 | * |
459 | * This function returns -1 if NOHZ is not enabled. | 469 | * This function returns -1 if NOHZ is not enabled. |
460 | */ | 470 | */ |
461 | u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) | 471 | u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) |
462 | { | 472 | { |
463 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 473 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
464 | ktime_t now, idle; | 474 | ktime_t now, idle; |
465 | 475 | ||
466 | if (!tick_nohz_enabled) | 476 | if (!tick_nohz_enabled) |
467 | return -1; | 477 | return -1; |
468 | 478 | ||
469 | now = ktime_get(); | 479 | now = ktime_get(); |
470 | if (last_update_time) { | 480 | if (last_update_time) { |
471 | update_ts_time_stats(cpu, ts, now, last_update_time); | 481 | update_ts_time_stats(cpu, ts, now, last_update_time); |
472 | idle = ts->idle_sleeptime; | 482 | idle = ts->idle_sleeptime; |
473 | } else { | 483 | } else { |
474 | if (ts->idle_active && !nr_iowait_cpu(cpu)) { | 484 | if (ts->idle_active && !nr_iowait_cpu(cpu)) { |
475 | ktime_t delta = ktime_sub(now, ts->idle_entrytime); | 485 | ktime_t delta = ktime_sub(now, ts->idle_entrytime); |
476 | 486 | ||
477 | idle = ktime_add(ts->idle_sleeptime, delta); | 487 | idle = ktime_add(ts->idle_sleeptime, delta); |
478 | } else { | 488 | } else { |
479 | idle = ts->idle_sleeptime; | 489 | idle = ts->idle_sleeptime; |
480 | } | 490 | } |
481 | } | 491 | } |
482 | 492 | ||
483 | return ktime_to_us(idle); | 493 | return ktime_to_us(idle); |
484 | 494 | ||
485 | } | 495 | } |
486 | EXPORT_SYMBOL_GPL(get_cpu_idle_time_us); | 496 | EXPORT_SYMBOL_GPL(get_cpu_idle_time_us); |
487 | 497 | ||
488 | /** | 498 | /** |
489 | * get_cpu_iowait_time_us - get the total iowait time of a cpu | 499 | * get_cpu_iowait_time_us - get the total iowait time of a cpu |
490 | * @cpu: CPU number to query | 500 | * @cpu: CPU number to query |
491 | * @last_update_time: variable to store update time in. Do not update | 501 | * @last_update_time: variable to store update time in. Do not update |
492 | * counters if NULL. | 502 | * counters if NULL. |
493 | * | 503 | * |
494 | * Return the cummulative iowait time (since boot) for a given | 504 | * Return the cummulative iowait time (since boot) for a given |
495 | * CPU, in microseconds. | 505 | * CPU, in microseconds. |
496 | * | 506 | * |
497 | * This time is measured via accounting rather than sampling, | 507 | * This time is measured via accounting rather than sampling, |
498 | * and is as accurate as ktime_get() is. | 508 | * and is as accurate as ktime_get() is. |
499 | * | 509 | * |
500 | * This function returns -1 if NOHZ is not enabled. | 510 | * This function returns -1 if NOHZ is not enabled. |
501 | */ | 511 | */ |
502 | u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) | 512 | u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) |
503 | { | 513 | { |
504 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 514 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
505 | ktime_t now, iowait; | 515 | ktime_t now, iowait; |
506 | 516 | ||
507 | if (!tick_nohz_enabled) | 517 | if (!tick_nohz_enabled) |
508 | return -1; | 518 | return -1; |
509 | 519 | ||
510 | now = ktime_get(); | 520 | now = ktime_get(); |
511 | if (last_update_time) { | 521 | if (last_update_time) { |
512 | update_ts_time_stats(cpu, ts, now, last_update_time); | 522 | update_ts_time_stats(cpu, ts, now, last_update_time); |
513 | iowait = ts->iowait_sleeptime; | 523 | iowait = ts->iowait_sleeptime; |
514 | } else { | 524 | } else { |
515 | if (ts->idle_active && nr_iowait_cpu(cpu) > 0) { | 525 | if (ts->idle_active && nr_iowait_cpu(cpu) > 0) { |
516 | ktime_t delta = ktime_sub(now, ts->idle_entrytime); | 526 | ktime_t delta = ktime_sub(now, ts->idle_entrytime); |
517 | 527 | ||
518 | iowait = ktime_add(ts->iowait_sleeptime, delta); | 528 | iowait = ktime_add(ts->iowait_sleeptime, delta); |
519 | } else { | 529 | } else { |
520 | iowait = ts->iowait_sleeptime; | 530 | iowait = ts->iowait_sleeptime; |
521 | } | 531 | } |
522 | } | 532 | } |
523 | 533 | ||
524 | return ktime_to_us(iowait); | 534 | return ktime_to_us(iowait); |
525 | } | 535 | } |
526 | EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); | 536 | EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); |
527 | 537 | ||
528 | static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, | 538 | static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, |
529 | ktime_t now, int cpu) | 539 | ktime_t now, int cpu) |
530 | { | 540 | { |
531 | unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; | 541 | unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; |
532 | ktime_t last_update, expires, ret = { .tv64 = 0 }; | 542 | ktime_t last_update, expires, ret = { .tv64 = 0 }; |
533 | unsigned long rcu_delta_jiffies; | 543 | unsigned long rcu_delta_jiffies; |
534 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; | 544 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; |
535 | u64 time_delta; | 545 | u64 time_delta; |
536 | 546 | ||
537 | /* Read jiffies and the time when jiffies were updated last */ | 547 | /* Read jiffies and the time when jiffies were updated last */ |
538 | do { | 548 | do { |
539 | seq = read_seqbegin(&jiffies_lock); | 549 | seq = read_seqbegin(&jiffies_lock); |
540 | last_update = last_jiffies_update; | 550 | last_update = last_jiffies_update; |
541 | last_jiffies = jiffies; | 551 | last_jiffies = jiffies; |
542 | time_delta = timekeeping_max_deferment(); | 552 | time_delta = timekeeping_max_deferment(); |
543 | } while (read_seqretry(&jiffies_lock, seq)); | 553 | } while (read_seqretry(&jiffies_lock, seq)); |
544 | 554 | ||
545 | if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || | 555 | if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || |
546 | arch_needs_cpu(cpu) || irq_work_needs_cpu()) { | 556 | arch_needs_cpu(cpu) || irq_work_needs_cpu()) { |
547 | next_jiffies = last_jiffies + 1; | 557 | next_jiffies = last_jiffies + 1; |
548 | delta_jiffies = 1; | 558 | delta_jiffies = 1; |
549 | } else { | 559 | } else { |
550 | /* Get the next timer wheel timer */ | 560 | /* Get the next timer wheel timer */ |
551 | next_jiffies = get_next_timer_interrupt(last_jiffies); | 561 | next_jiffies = get_next_timer_interrupt(last_jiffies); |
552 | delta_jiffies = next_jiffies - last_jiffies; | 562 | delta_jiffies = next_jiffies - last_jiffies; |
553 | if (rcu_delta_jiffies < delta_jiffies) { | 563 | if (rcu_delta_jiffies < delta_jiffies) { |
554 | next_jiffies = last_jiffies + rcu_delta_jiffies; | 564 | next_jiffies = last_jiffies + rcu_delta_jiffies; |
555 | delta_jiffies = rcu_delta_jiffies; | 565 | delta_jiffies = rcu_delta_jiffies; |
556 | } | 566 | } |
557 | } | 567 | } |
558 | /* | 568 | /* |
559 | * Do not stop the tick, if we are only one off | 569 | * Do not stop the tick, if we are only one off |
560 | * or if the cpu is required for rcu | 570 | * or if the cpu is required for rcu |
561 | */ | 571 | */ |
562 | if (!ts->tick_stopped && delta_jiffies == 1) | 572 | if (!ts->tick_stopped && delta_jiffies == 1) |
563 | goto out; | 573 | goto out; |
564 | 574 | ||
565 | /* Schedule the tick, if we are at least one jiffie off */ | 575 | /* Schedule the tick, if we are at least one jiffie off */ |
566 | if ((long)delta_jiffies >= 1) { | 576 | if ((long)delta_jiffies >= 1) { |
567 | 577 | ||
568 | /* | 578 | /* |
569 | * If this cpu is the one which updates jiffies, then | 579 | * If this cpu is the one which updates jiffies, then |
570 | * give up the assignment and let it be taken by the | 580 | * give up the assignment and let it be taken by the |
571 | * cpu which runs the tick timer next, which might be | 581 | * cpu which runs the tick timer next, which might be |
572 | * this cpu as well. If we don't drop this here the | 582 | * this cpu as well. If we don't drop this here the |
573 | * jiffies might be stale and do_timer() never | 583 | * jiffies might be stale and do_timer() never |
574 | * invoked. Keep track of the fact that it was the one | 584 | * invoked. Keep track of the fact that it was the one |
575 | * which had the do_timer() duty last. If this cpu is | 585 | * which had the do_timer() duty last. If this cpu is |
576 | * the one which had the do_timer() duty last, we | 586 | * the one which had the do_timer() duty last, we |
577 | * limit the sleep time to the timekeeping | 587 | * limit the sleep time to the timekeeping |
578 | * max_deferement value which we retrieved | 588 | * max_deferement value which we retrieved |
579 | * above. Otherwise we can sleep as long as we want. | 589 | * above. Otherwise we can sleep as long as we want. |
580 | */ | 590 | */ |
581 | if (cpu == tick_do_timer_cpu) { | 591 | if (cpu == tick_do_timer_cpu) { |
582 | tick_do_timer_cpu = TICK_DO_TIMER_NONE; | 592 | tick_do_timer_cpu = TICK_DO_TIMER_NONE; |
583 | ts->do_timer_last = 1; | 593 | ts->do_timer_last = 1; |
584 | } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) { | 594 | } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) { |
585 | time_delta = KTIME_MAX; | 595 | time_delta = KTIME_MAX; |
586 | ts->do_timer_last = 0; | 596 | ts->do_timer_last = 0; |
587 | } else if (!ts->do_timer_last) { | 597 | } else if (!ts->do_timer_last) { |
588 | time_delta = KTIME_MAX; | 598 | time_delta = KTIME_MAX; |
589 | } | 599 | } |
590 | 600 | ||
591 | /* | 601 | /* |
592 | * calculate the expiry time for the next timer wheel | 602 | * calculate the expiry time for the next timer wheel |
593 | * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals | 603 | * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals |
594 | * that there is no timer pending or at least extremely | 604 | * that there is no timer pending or at least extremely |
595 | * far into the future (12 days for HZ=1000). In this | 605 | * far into the future (12 days for HZ=1000). In this |
596 | * case we set the expiry to the end of time. | 606 | * case we set the expiry to the end of time. |
597 | */ | 607 | */ |
598 | if (likely(delta_jiffies < NEXT_TIMER_MAX_DELTA)) { | 608 | if (likely(delta_jiffies < NEXT_TIMER_MAX_DELTA)) { |
599 | /* | 609 | /* |
600 | * Calculate the time delta for the next timer event. | 610 | * Calculate the time delta for the next timer event. |
601 | * If the time delta exceeds the maximum time delta | 611 | * If the time delta exceeds the maximum time delta |
602 | * permitted by the current clocksource then adjust | 612 | * permitted by the current clocksource then adjust |
603 | * the time delta accordingly to ensure the | 613 | * the time delta accordingly to ensure the |
604 | * clocksource does not wrap. | 614 | * clocksource does not wrap. |
605 | */ | 615 | */ |
606 | time_delta = min_t(u64, time_delta, | 616 | time_delta = min_t(u64, time_delta, |
607 | tick_period.tv64 * delta_jiffies); | 617 | tick_period.tv64 * delta_jiffies); |
608 | } | 618 | } |
609 | 619 | ||
610 | if (time_delta < KTIME_MAX) | 620 | if (time_delta < KTIME_MAX) |
611 | expires = ktime_add_ns(last_update, time_delta); | 621 | expires = ktime_add_ns(last_update, time_delta); |
612 | else | 622 | else |
613 | expires.tv64 = KTIME_MAX; | 623 | expires.tv64 = KTIME_MAX; |
614 | 624 | ||
615 | /* Skip reprogram of event if its not changed */ | 625 | /* Skip reprogram of event if its not changed */ |
616 | if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) | 626 | if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) |
617 | goto out; | 627 | goto out; |
618 | 628 | ||
619 | ret = expires; | 629 | ret = expires; |
620 | 630 | ||
621 | /* | 631 | /* |
622 | * nohz_stop_sched_tick can be called several times before | 632 | * nohz_stop_sched_tick can be called several times before |
623 | * the nohz_restart_sched_tick is called. This happens when | 633 | * the nohz_restart_sched_tick is called. This happens when |
624 | * interrupts arrive which do not cause a reschedule. In the | 634 | * interrupts arrive which do not cause a reschedule. In the |
625 | * first call we save the current tick time, so we can restart | 635 | * first call we save the current tick time, so we can restart |
626 | * the scheduler tick in nohz_restart_sched_tick. | 636 | * the scheduler tick in nohz_restart_sched_tick. |
627 | */ | 637 | */ |
628 | if (!ts->tick_stopped) { | 638 | if (!ts->tick_stopped) { |
629 | nohz_balance_enter_idle(cpu); | 639 | nohz_balance_enter_idle(cpu); |
630 | calc_load_enter_idle(); | 640 | calc_load_enter_idle(); |
631 | 641 | ||
632 | ts->last_tick = hrtimer_get_expires(&ts->sched_timer); | 642 | ts->last_tick = hrtimer_get_expires(&ts->sched_timer); |
633 | ts->tick_stopped = 1; | 643 | ts->tick_stopped = 1; |
644 | trace_tick_stop(1, " "); | ||
634 | } | 645 | } |
635 | 646 | ||
636 | /* | 647 | /* |
637 | * If the expiration time == KTIME_MAX, then | 648 | * If the expiration time == KTIME_MAX, then |
638 | * in this case we simply stop the tick timer. | 649 | * in this case we simply stop the tick timer. |
639 | */ | 650 | */ |
640 | if (unlikely(expires.tv64 == KTIME_MAX)) { | 651 | if (unlikely(expires.tv64 == KTIME_MAX)) { |
641 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) | 652 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) |
642 | hrtimer_cancel(&ts->sched_timer); | 653 | hrtimer_cancel(&ts->sched_timer); |
643 | goto out; | 654 | goto out; |
644 | } | 655 | } |
645 | 656 | ||
646 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { | 657 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { |
647 | hrtimer_start(&ts->sched_timer, expires, | 658 | hrtimer_start(&ts->sched_timer, expires, |
648 | HRTIMER_MODE_ABS_PINNED); | 659 | HRTIMER_MODE_ABS_PINNED); |
649 | /* Check, if the timer was already in the past */ | 660 | /* Check, if the timer was already in the past */ |
650 | if (hrtimer_active(&ts->sched_timer)) | 661 | if (hrtimer_active(&ts->sched_timer)) |
651 | goto out; | 662 | goto out; |
652 | } else if (!tick_program_event(expires, 0)) | 663 | } else if (!tick_program_event(expires, 0)) |
653 | goto out; | 664 | goto out; |
654 | /* | 665 | /* |
655 | * We are past the event already. So we crossed a | 666 | * We are past the event already. So we crossed a |
656 | * jiffie boundary. Update jiffies and raise the | 667 | * jiffie boundary. Update jiffies and raise the |
657 | * softirq. | 668 | * softirq. |
658 | */ | 669 | */ |
659 | tick_do_update_jiffies64(ktime_get()); | 670 | tick_do_update_jiffies64(ktime_get()); |
660 | } | 671 | } |
661 | raise_softirq_irqoff(TIMER_SOFTIRQ); | 672 | raise_softirq_irqoff(TIMER_SOFTIRQ); |
662 | out: | 673 | out: |
663 | ts->next_jiffies = next_jiffies; | 674 | ts->next_jiffies = next_jiffies; |
664 | ts->last_jiffies = last_jiffies; | 675 | ts->last_jiffies = last_jiffies; |
665 | ts->sleep_length = ktime_sub(dev->next_event, now); | 676 | ts->sleep_length = ktime_sub(dev->next_event, now); |
666 | 677 | ||
667 | return ret; | 678 | return ret; |
668 | } | 679 | } |
669 | 680 | ||
670 | static void tick_nohz_full_stop_tick(struct tick_sched *ts) | 681 | static void tick_nohz_full_stop_tick(struct tick_sched *ts) |
671 | { | 682 | { |
672 | #ifdef CONFIG_NO_HZ_FULL | 683 | #ifdef CONFIG_NO_HZ_FULL |
673 | int cpu = smp_processor_id(); | 684 | int cpu = smp_processor_id(); |
674 | 685 | ||
675 | if (!tick_nohz_full_cpu(cpu) || is_idle_task(current)) | 686 | if (!tick_nohz_full_cpu(cpu) || is_idle_task(current)) |
676 | return; | 687 | return; |
677 | 688 | ||
678 | if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE) | 689 | if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE) |
679 | return; | 690 | return; |
680 | 691 | ||
681 | if (!can_stop_full_tick()) | 692 | if (!can_stop_full_tick()) |
682 | return; | 693 | return; |
683 | 694 | ||
684 | tick_nohz_stop_sched_tick(ts, ktime_get(), cpu); | 695 | tick_nohz_stop_sched_tick(ts, ktime_get(), cpu); |
685 | #endif | 696 | #endif |
686 | } | 697 | } |
687 | 698 | ||
688 | static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) | 699 | static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) |
689 | { | 700 | { |
690 | /* | 701 | /* |
691 | * If this cpu is offline and it is the one which updates | 702 | * If this cpu is offline and it is the one which updates |
692 | * jiffies, then give up the assignment and let it be taken by | 703 | * jiffies, then give up the assignment and let it be taken by |
693 | * the cpu which runs the tick timer next. If we don't drop | 704 | * the cpu which runs the tick timer next. If we don't drop |
694 | * this here the jiffies might be stale and do_timer() never | 705 | * this here the jiffies might be stale and do_timer() never |
695 | * invoked. | 706 | * invoked. |
696 | */ | 707 | */ |
697 | if (unlikely(!cpu_online(cpu))) { | 708 | if (unlikely(!cpu_online(cpu))) { |
698 | if (cpu == tick_do_timer_cpu) | 709 | if (cpu == tick_do_timer_cpu) |
699 | tick_do_timer_cpu = TICK_DO_TIMER_NONE; | 710 | tick_do_timer_cpu = TICK_DO_TIMER_NONE; |
700 | } | 711 | } |
701 | 712 | ||
702 | if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) | 713 | if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) |
703 | return false; | 714 | return false; |
704 | 715 | ||
705 | if (need_resched()) | 716 | if (need_resched()) |
706 | return false; | 717 | return false; |
707 | 718 | ||
708 | if (unlikely(local_softirq_pending() && cpu_online(cpu))) { | 719 | if (unlikely(local_softirq_pending() && cpu_online(cpu))) { |
709 | static int ratelimit; | 720 | static int ratelimit; |
710 | 721 | ||
711 | if (ratelimit < 10 && | 722 | if (ratelimit < 10 && |
712 | (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { | 723 | (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { |
713 | printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", | 724 | printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", |
714 | (unsigned int) local_softirq_pending()); | 725 | (unsigned int) local_softirq_pending()); |
715 | ratelimit++; | 726 | ratelimit++; |
716 | } | 727 | } |
717 | return false; | 728 | return false; |
718 | } | 729 | } |
719 | 730 | ||
720 | if (have_nohz_full_mask) { | 731 | if (have_nohz_full_mask) { |
721 | /* | 732 | /* |
722 | * Keep the tick alive to guarantee timekeeping progression | 733 | * Keep the tick alive to guarantee timekeeping progression |
723 | * if there are full dynticks CPUs around | 734 | * if there are full dynticks CPUs around |
724 | */ | 735 | */ |
725 | if (tick_do_timer_cpu == cpu) | 736 | if (tick_do_timer_cpu == cpu) |
726 | return false; | 737 | return false; |
727 | /* | 738 | /* |
728 | * Boot safety: make sure the timekeeping duty has been | 739 | * Boot safety: make sure the timekeeping duty has been |
729 | * assigned before entering dyntick-idle mode, | 740 | * assigned before entering dyntick-idle mode, |
730 | */ | 741 | */ |
731 | if (tick_do_timer_cpu == TICK_DO_TIMER_NONE) | 742 | if (tick_do_timer_cpu == TICK_DO_TIMER_NONE) |
732 | return false; | 743 | return false; |
733 | } | 744 | } |
734 | 745 | ||
735 | return true; | 746 | return true; |
736 | } | 747 | } |
737 | 748 | ||
738 | static void __tick_nohz_idle_enter(struct tick_sched *ts) | 749 | static void __tick_nohz_idle_enter(struct tick_sched *ts) |
739 | { | 750 | { |
740 | ktime_t now, expires; | 751 | ktime_t now, expires; |
741 | int cpu = smp_processor_id(); | 752 | int cpu = smp_processor_id(); |
742 | 753 | ||
743 | now = tick_nohz_start_idle(cpu, ts); | 754 | now = tick_nohz_start_idle(cpu, ts); |
744 | 755 | ||
745 | if (can_stop_idle_tick(cpu, ts)) { | 756 | if (can_stop_idle_tick(cpu, ts)) { |
746 | int was_stopped = ts->tick_stopped; | 757 | int was_stopped = ts->tick_stopped; |
747 | 758 | ||
748 | ts->idle_calls++; | 759 | ts->idle_calls++; |
749 | 760 | ||
750 | expires = tick_nohz_stop_sched_tick(ts, now, cpu); | 761 | expires = tick_nohz_stop_sched_tick(ts, now, cpu); |
751 | if (expires.tv64 > 0LL) { | 762 | if (expires.tv64 > 0LL) { |
752 | ts->idle_sleeps++; | 763 | ts->idle_sleeps++; |
753 | ts->idle_expires = expires; | 764 | ts->idle_expires = expires; |
754 | } | 765 | } |
755 | 766 | ||
756 | if (!was_stopped && ts->tick_stopped) | 767 | if (!was_stopped && ts->tick_stopped) |
757 | ts->idle_jiffies = ts->last_jiffies; | 768 | ts->idle_jiffies = ts->last_jiffies; |
758 | } | 769 | } |
759 | } | 770 | } |
760 | 771 | ||
761 | /** | 772 | /** |
762 | * tick_nohz_idle_enter - stop the idle tick from the idle task | 773 | * tick_nohz_idle_enter - stop the idle tick from the idle task |
763 | * | 774 | * |
764 | * When the next event is more than a tick into the future, stop the idle tick | 775 | * When the next event is more than a tick into the future, stop the idle tick |
765 | * Called when we start the idle loop. | 776 | * Called when we start the idle loop. |
766 | * | 777 | * |
767 | * The arch is responsible of calling: | 778 | * The arch is responsible of calling: |
768 | * | 779 | * |
769 | * - rcu_idle_enter() after its last use of RCU before the CPU is put | 780 | * - rcu_idle_enter() after its last use of RCU before the CPU is put |
770 | * to sleep. | 781 | * to sleep. |
771 | * - rcu_idle_exit() before the first use of RCU after the CPU is woken up. | 782 | * - rcu_idle_exit() before the first use of RCU after the CPU is woken up. |
772 | */ | 783 | */ |
773 | void tick_nohz_idle_enter(void) | 784 | void tick_nohz_idle_enter(void) |
774 | { | 785 | { |
775 | struct tick_sched *ts; | 786 | struct tick_sched *ts; |
776 | 787 | ||
777 | WARN_ON_ONCE(irqs_disabled()); | 788 | WARN_ON_ONCE(irqs_disabled()); |
778 | 789 | ||
779 | /* | 790 | /* |
780 | * Update the idle state in the scheduler domain hierarchy | 791 | * Update the idle state in the scheduler domain hierarchy |
781 | * when tick_nohz_stop_sched_tick() is called from the idle loop. | 792 | * when tick_nohz_stop_sched_tick() is called from the idle loop. |
782 | * State will be updated to busy during the first busy tick after | 793 | * State will be updated to busy during the first busy tick after |
783 | * exiting idle. | 794 | * exiting idle. |
784 | */ | 795 | */ |
785 | set_cpu_sd_state_idle(); | 796 | set_cpu_sd_state_idle(); |
786 | 797 | ||
787 | local_irq_disable(); | 798 | local_irq_disable(); |
788 | 799 | ||
789 | ts = &__get_cpu_var(tick_cpu_sched); | 800 | ts = &__get_cpu_var(tick_cpu_sched); |
790 | /* | 801 | /* |
791 | * set ts->inidle unconditionally. even if the system did not | 802 | * set ts->inidle unconditionally. even if the system did not |
792 | * switch to nohz mode the cpu frequency governers rely on the | 803 | * switch to nohz mode the cpu frequency governers rely on the |
793 | * update of the idle time accounting in tick_nohz_start_idle(). | 804 | * update of the idle time accounting in tick_nohz_start_idle(). |
794 | */ | 805 | */ |
795 | ts->inidle = 1; | 806 | ts->inidle = 1; |
796 | __tick_nohz_idle_enter(ts); | 807 | __tick_nohz_idle_enter(ts); |
797 | 808 | ||
798 | local_irq_enable(); | 809 | local_irq_enable(); |
799 | } | 810 | } |
800 | EXPORT_SYMBOL_GPL(tick_nohz_idle_enter); | 811 | EXPORT_SYMBOL_GPL(tick_nohz_idle_enter); |
801 | 812 | ||
802 | /** | 813 | /** |
803 | * tick_nohz_irq_exit - update next tick event from interrupt exit | 814 | * tick_nohz_irq_exit - update next tick event from interrupt exit |
804 | * | 815 | * |
805 | * When an interrupt fires while we are idle and it doesn't cause | 816 | * When an interrupt fires while we are idle and it doesn't cause |
806 | * a reschedule, it may still add, modify or delete a timer, enqueue | 817 | * a reschedule, it may still add, modify or delete a timer, enqueue |
807 | * an RCU callback, etc... | 818 | * an RCU callback, etc... |
808 | * So we need to re-calculate and reprogram the next tick event. | 819 | * So we need to re-calculate and reprogram the next tick event. |
809 | */ | 820 | */ |
810 | void tick_nohz_irq_exit(void) | 821 | void tick_nohz_irq_exit(void) |
811 | { | 822 | { |
812 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 823 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
813 | 824 | ||
814 | if (ts->inidle) { | 825 | if (ts->inidle) { |
815 | /* Cancel the timer because CPU already waken up from the C-states*/ | 826 | /* Cancel the timer because CPU already waken up from the C-states*/ |
816 | menu_hrtimer_cancel(); | 827 | menu_hrtimer_cancel(); |
817 | __tick_nohz_idle_enter(ts); | 828 | __tick_nohz_idle_enter(ts); |
818 | } else { | 829 | } else { |
819 | tick_nohz_full_stop_tick(ts); | 830 | tick_nohz_full_stop_tick(ts); |
820 | } | 831 | } |
821 | } | 832 | } |
822 | 833 | ||
823 | /** | 834 | /** |
824 | * tick_nohz_get_sleep_length - return the length of the current sleep | 835 | * tick_nohz_get_sleep_length - return the length of the current sleep |
825 | * | 836 | * |
826 | * Called from power state control code with interrupts disabled | 837 | * Called from power state control code with interrupts disabled |
827 | */ | 838 | */ |
828 | ktime_t tick_nohz_get_sleep_length(void) | 839 | ktime_t tick_nohz_get_sleep_length(void) |
829 | { | 840 | { |
830 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 841 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
831 | 842 | ||
832 | return ts->sleep_length; | 843 | return ts->sleep_length; |
833 | } | 844 | } |
834 | 845 | ||
835 | static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) | 846 | static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) |
836 | { | 847 | { |
837 | hrtimer_cancel(&ts->sched_timer); | 848 | hrtimer_cancel(&ts->sched_timer); |
838 | hrtimer_set_expires(&ts->sched_timer, ts->last_tick); | 849 | hrtimer_set_expires(&ts->sched_timer, ts->last_tick); |
839 | 850 | ||
840 | while (1) { | 851 | while (1) { |
841 | /* Forward the time to expire in the future */ | 852 | /* Forward the time to expire in the future */ |
842 | hrtimer_forward(&ts->sched_timer, now, tick_period); | 853 | hrtimer_forward(&ts->sched_timer, now, tick_period); |
843 | 854 | ||
844 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { | 855 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { |
845 | hrtimer_start_expires(&ts->sched_timer, | 856 | hrtimer_start_expires(&ts->sched_timer, |
846 | HRTIMER_MODE_ABS_PINNED); | 857 | HRTIMER_MODE_ABS_PINNED); |
847 | /* Check, if the timer was already in the past */ | 858 | /* Check, if the timer was already in the past */ |
848 | if (hrtimer_active(&ts->sched_timer)) | 859 | if (hrtimer_active(&ts->sched_timer)) |
849 | break; | 860 | break; |
850 | } else { | 861 | } else { |
851 | if (!tick_program_event( | 862 | if (!tick_program_event( |
852 | hrtimer_get_expires(&ts->sched_timer), 0)) | 863 | hrtimer_get_expires(&ts->sched_timer), 0)) |
853 | break; | 864 | break; |
854 | } | 865 | } |
855 | /* Reread time and update jiffies */ | 866 | /* Reread time and update jiffies */ |
856 | now = ktime_get(); | 867 | now = ktime_get(); |
857 | tick_do_update_jiffies64(now); | 868 | tick_do_update_jiffies64(now); |
858 | } | 869 | } |
859 | } | 870 | } |
860 | 871 | ||
861 | static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) | 872 | static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) |
862 | { | 873 | { |
863 | /* Update jiffies first */ | 874 | /* Update jiffies first */ |
864 | tick_do_update_jiffies64(now); | 875 | tick_do_update_jiffies64(now); |
865 | update_cpu_load_nohz(); | 876 | update_cpu_load_nohz(); |
866 | 877 | ||
867 | calc_load_exit_idle(); | 878 | calc_load_exit_idle(); |
868 | touch_softlockup_watchdog(); | 879 | touch_softlockup_watchdog(); |
869 | /* | 880 | /* |
870 | * Cancel the scheduled timer and restore the tick | 881 | * Cancel the scheduled timer and restore the tick |
871 | */ | 882 | */ |
872 | ts->tick_stopped = 0; | 883 | ts->tick_stopped = 0; |
873 | ts->idle_exittime = now; | 884 | ts->idle_exittime = now; |
874 | 885 | ||
875 | tick_nohz_restart(ts, now); | 886 | tick_nohz_restart(ts, now); |
876 | } | 887 | } |
877 | 888 | ||
878 | static void tick_nohz_account_idle_ticks(struct tick_sched *ts) | 889 | static void tick_nohz_account_idle_ticks(struct tick_sched *ts) |
879 | { | 890 | { |
880 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE | 891 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE |
881 | unsigned long ticks; | 892 | unsigned long ticks; |
882 | 893 | ||
883 | if (vtime_accounting_enabled()) | 894 | if (vtime_accounting_enabled()) |
884 | return; | 895 | return; |
885 | /* | 896 | /* |
886 | * We stopped the tick in idle. Update process times would miss the | 897 | * We stopped the tick in idle. Update process times would miss the |
887 | * time we slept as update_process_times does only a 1 tick | 898 | * time we slept as update_process_times does only a 1 tick |
888 | * accounting. Enforce that this is accounted to idle ! | 899 | * accounting. Enforce that this is accounted to idle ! |
889 | */ | 900 | */ |
890 | ticks = jiffies - ts->idle_jiffies; | 901 | ticks = jiffies - ts->idle_jiffies; |
891 | /* | 902 | /* |
892 | * We might be one off. Do not randomly account a huge number of ticks! | 903 | * We might be one off. Do not randomly account a huge number of ticks! |
893 | */ | 904 | */ |
894 | if (ticks && ticks < LONG_MAX) | 905 | if (ticks && ticks < LONG_MAX) |
895 | account_idle_ticks(ticks); | 906 | account_idle_ticks(ticks); |
896 | #endif | 907 | #endif |
897 | } | 908 | } |
898 | 909 | ||
899 | /** | 910 | /** |
900 | * tick_nohz_idle_exit - restart the idle tick from the idle task | 911 | * tick_nohz_idle_exit - restart the idle tick from the idle task |
901 | * | 912 | * |
902 | * Restart the idle tick when the CPU is woken up from idle | 913 | * Restart the idle tick when the CPU is woken up from idle |
903 | * This also exit the RCU extended quiescent state. The CPU | 914 | * This also exit the RCU extended quiescent state. The CPU |
904 | * can use RCU again after this function is called. | 915 | * can use RCU again after this function is called. |
905 | */ | 916 | */ |
906 | void tick_nohz_idle_exit(void) | 917 | void tick_nohz_idle_exit(void) |
907 | { | 918 | { |
908 | int cpu = smp_processor_id(); | 919 | int cpu = smp_processor_id(); |
909 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 920 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
910 | ktime_t now; | 921 | ktime_t now; |
911 | 922 | ||
912 | local_irq_disable(); | 923 | local_irq_disable(); |
913 | 924 | ||
914 | WARN_ON_ONCE(!ts->inidle); | 925 | WARN_ON_ONCE(!ts->inidle); |
915 | 926 | ||
916 | ts->inidle = 0; | 927 | ts->inidle = 0; |
917 | 928 | ||
918 | /* Cancel the timer because CPU already waken up from the C-states*/ | 929 | /* Cancel the timer because CPU already waken up from the C-states*/ |
919 | menu_hrtimer_cancel(); | 930 | menu_hrtimer_cancel(); |
920 | if (ts->idle_active || ts->tick_stopped) | 931 | if (ts->idle_active || ts->tick_stopped) |
921 | now = ktime_get(); | 932 | now = ktime_get(); |
922 | 933 | ||
923 | if (ts->idle_active) | 934 | if (ts->idle_active) |
924 | tick_nohz_stop_idle(cpu, now); | 935 | tick_nohz_stop_idle(cpu, now); |
925 | 936 | ||
926 | if (ts->tick_stopped) { | 937 | if (ts->tick_stopped) { |
927 | tick_nohz_restart_sched_tick(ts, now); | 938 | tick_nohz_restart_sched_tick(ts, now); |
928 | tick_nohz_account_idle_ticks(ts); | 939 | tick_nohz_account_idle_ticks(ts); |
929 | } | 940 | } |
930 | 941 | ||
931 | local_irq_enable(); | 942 | local_irq_enable(); |
932 | } | 943 | } |
933 | EXPORT_SYMBOL_GPL(tick_nohz_idle_exit); | 944 | EXPORT_SYMBOL_GPL(tick_nohz_idle_exit); |
934 | 945 | ||
935 | static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now) | 946 | static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now) |
936 | { | 947 | { |
937 | hrtimer_forward(&ts->sched_timer, now, tick_period); | 948 | hrtimer_forward(&ts->sched_timer, now, tick_period); |
938 | return tick_program_event(hrtimer_get_expires(&ts->sched_timer), 0); | 949 | return tick_program_event(hrtimer_get_expires(&ts->sched_timer), 0); |
939 | } | 950 | } |
940 | 951 | ||
941 | /* | 952 | /* |
942 | * The nohz low res interrupt handler | 953 | * The nohz low res interrupt handler |
943 | */ | 954 | */ |
944 | static void tick_nohz_handler(struct clock_event_device *dev) | 955 | static void tick_nohz_handler(struct clock_event_device *dev) |
945 | { | 956 | { |
946 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 957 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
947 | struct pt_regs *regs = get_irq_regs(); | 958 | struct pt_regs *regs = get_irq_regs(); |
948 | ktime_t now = ktime_get(); | 959 | ktime_t now = ktime_get(); |
949 | 960 | ||
950 | dev->next_event.tv64 = KTIME_MAX; | 961 | dev->next_event.tv64 = KTIME_MAX; |
951 | 962 | ||
952 | tick_sched_do_timer(now); | 963 | tick_sched_do_timer(now); |
953 | tick_sched_handle(ts, regs); | 964 | tick_sched_handle(ts, regs); |
954 | 965 | ||
955 | while (tick_nohz_reprogram(ts, now)) { | 966 | while (tick_nohz_reprogram(ts, now)) { |
956 | now = ktime_get(); | 967 | now = ktime_get(); |
957 | tick_do_update_jiffies64(now); | 968 | tick_do_update_jiffies64(now); |
958 | } | 969 | } |
959 | } | 970 | } |
960 | 971 | ||
961 | /** | 972 | /** |
962 | * tick_nohz_switch_to_nohz - switch to nohz mode | 973 | * tick_nohz_switch_to_nohz - switch to nohz mode |
963 | */ | 974 | */ |
964 | static void tick_nohz_switch_to_nohz(void) | 975 | static void tick_nohz_switch_to_nohz(void) |
965 | { | 976 | { |
966 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 977 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
967 | ktime_t next; | 978 | ktime_t next; |
968 | 979 | ||
969 | if (!tick_nohz_enabled) | 980 | if (!tick_nohz_enabled) |
970 | return; | 981 | return; |
971 | 982 | ||
972 | local_irq_disable(); | 983 | local_irq_disable(); |
973 | if (tick_switch_to_oneshot(tick_nohz_handler)) { | 984 | if (tick_switch_to_oneshot(tick_nohz_handler)) { |
974 | local_irq_enable(); | 985 | local_irq_enable(); |
975 | return; | 986 | return; |
976 | } | 987 | } |
977 | 988 | ||
978 | ts->nohz_mode = NOHZ_MODE_LOWRES; | 989 | ts->nohz_mode = NOHZ_MODE_LOWRES; |
979 | 990 | ||
980 | /* | 991 | /* |
981 | * Recycle the hrtimer in ts, so we can share the | 992 | * Recycle the hrtimer in ts, so we can share the |
982 | * hrtimer_forward with the highres code. | 993 | * hrtimer_forward with the highres code. |
983 | */ | 994 | */ |
984 | hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | 995 | hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); |
985 | /* Get the next period */ | 996 | /* Get the next period */ |
986 | next = tick_init_jiffy_update(); | 997 | next = tick_init_jiffy_update(); |
987 | 998 | ||
988 | for (;;) { | 999 | for (;;) { |
989 | hrtimer_set_expires(&ts->sched_timer, next); | 1000 | hrtimer_set_expires(&ts->sched_timer, next); |
990 | if (!tick_program_event(next, 0)) | 1001 | if (!tick_program_event(next, 0)) |
991 | break; | 1002 | break; |
992 | next = ktime_add(next, tick_period); | 1003 | next = ktime_add(next, tick_period); |
993 | } | 1004 | } |
994 | local_irq_enable(); | 1005 | local_irq_enable(); |
995 | } | 1006 | } |
996 | 1007 | ||
997 | /* | 1008 | /* |
998 | * When NOHZ is enabled and the tick is stopped, we need to kick the | 1009 | * When NOHZ is enabled and the tick is stopped, we need to kick the |
999 | * tick timer from irq_enter() so that the jiffies update is kept | 1010 | * tick timer from irq_enter() so that the jiffies update is kept |
1000 | * alive during long running softirqs. That's ugly as hell, but | 1011 | * alive during long running softirqs. That's ugly as hell, but |
1001 | * correctness is key even if we need to fix the offending softirq in | 1012 | * correctness is key even if we need to fix the offending softirq in |
1002 | * the first place. | 1013 | * the first place. |
1003 | * | 1014 | * |
1004 | * Note, this is different to tick_nohz_restart. We just kick the | 1015 | * Note, this is different to tick_nohz_restart. We just kick the |
1005 | * timer and do not touch the other magic bits which need to be done | 1016 | * timer and do not touch the other magic bits which need to be done |
1006 | * when idle is left. | 1017 | * when idle is left. |
1007 | */ | 1018 | */ |
1008 | static void tick_nohz_kick_tick(int cpu, ktime_t now) | 1019 | static void tick_nohz_kick_tick(int cpu, ktime_t now) |
1009 | { | 1020 | { |
1010 | #if 0 | 1021 | #if 0 |
1011 | /* Switch back to 2.6.27 behaviour */ | 1022 | /* Switch back to 2.6.27 behaviour */ |
1012 | 1023 | ||
1013 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 1024 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
1014 | ktime_t delta; | 1025 | ktime_t delta; |
1015 | 1026 | ||
1016 | /* | 1027 | /* |
1017 | * Do not touch the tick device, when the next expiry is either | 1028 | * Do not touch the tick device, when the next expiry is either |
1018 | * already reached or less/equal than the tick period. | 1029 | * already reached or less/equal than the tick period. |
1019 | */ | 1030 | */ |
1020 | delta = ktime_sub(hrtimer_get_expires(&ts->sched_timer), now); | 1031 | delta = ktime_sub(hrtimer_get_expires(&ts->sched_timer), now); |
1021 | if (delta.tv64 <= tick_period.tv64) | 1032 | if (delta.tv64 <= tick_period.tv64) |
1022 | return; | 1033 | return; |
1023 | 1034 | ||
1024 | tick_nohz_restart(ts, now); | 1035 | tick_nohz_restart(ts, now); |
1025 | #endif | 1036 | #endif |
1026 | } | 1037 | } |
1027 | 1038 | ||
1028 | static inline void tick_check_nohz(int cpu) | 1039 | static inline void tick_check_nohz(int cpu) |
1029 | { | 1040 | { |
1030 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 1041 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
1031 | ktime_t now; | 1042 | ktime_t now; |
1032 | 1043 | ||
1033 | if (!ts->idle_active && !ts->tick_stopped) | 1044 | if (!ts->idle_active && !ts->tick_stopped) |
1034 | return; | 1045 | return; |
1035 | now = ktime_get(); | 1046 | now = ktime_get(); |
1036 | if (ts->idle_active) | 1047 | if (ts->idle_active) |
1037 | tick_nohz_stop_idle(cpu, now); | 1048 | tick_nohz_stop_idle(cpu, now); |
1038 | if (ts->tick_stopped) { | 1049 | if (ts->tick_stopped) { |
1039 | tick_nohz_update_jiffies(now); | 1050 | tick_nohz_update_jiffies(now); |
1040 | tick_nohz_kick_tick(cpu, now); | 1051 | tick_nohz_kick_tick(cpu, now); |
1041 | } | 1052 | } |
1042 | } | 1053 | } |
1043 | 1054 | ||
1044 | #else | 1055 | #else |
1045 | 1056 | ||
1046 | static inline void tick_nohz_switch_to_nohz(void) { } | 1057 | static inline void tick_nohz_switch_to_nohz(void) { } |
1047 | static inline void tick_check_nohz(int cpu) { } | 1058 | static inline void tick_check_nohz(int cpu) { } |
1048 | 1059 | ||
1049 | #endif /* CONFIG_NO_HZ_COMMON */ | 1060 | #endif /* CONFIG_NO_HZ_COMMON */ |
1050 | 1061 | ||
1051 | /* | 1062 | /* |
1052 | * Called from irq_enter to notify about the possible interruption of idle() | 1063 | * Called from irq_enter to notify about the possible interruption of idle() |
1053 | */ | 1064 | */ |
1054 | void tick_check_idle(int cpu) | 1065 | void tick_check_idle(int cpu) |
1055 | { | 1066 | { |
1056 | tick_check_oneshot_broadcast(cpu); | 1067 | tick_check_oneshot_broadcast(cpu); |
1057 | tick_check_nohz(cpu); | 1068 | tick_check_nohz(cpu); |
1058 | } | 1069 | } |
1059 | 1070 | ||
1060 | /* | 1071 | /* |
1061 | * High resolution timer specific code | 1072 | * High resolution timer specific code |
1062 | */ | 1073 | */ |
1063 | #ifdef CONFIG_HIGH_RES_TIMERS | 1074 | #ifdef CONFIG_HIGH_RES_TIMERS |
1064 | /* | 1075 | /* |
1065 | * We rearm the timer until we get disabled by the idle code. | 1076 | * We rearm the timer until we get disabled by the idle code. |
1066 | * Called with interrupts disabled. | 1077 | * Called with interrupts disabled. |
1067 | */ | 1078 | */ |
1068 | static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) | 1079 | static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) |
1069 | { | 1080 | { |
1070 | struct tick_sched *ts = | 1081 | struct tick_sched *ts = |
1071 | container_of(timer, struct tick_sched, sched_timer); | 1082 | container_of(timer, struct tick_sched, sched_timer); |
1072 | struct pt_regs *regs = get_irq_regs(); | 1083 | struct pt_regs *regs = get_irq_regs(); |
1073 | ktime_t now = ktime_get(); | 1084 | ktime_t now = ktime_get(); |
1074 | 1085 | ||
1075 | tick_sched_do_timer(now); | 1086 | tick_sched_do_timer(now); |
1076 | 1087 | ||
1077 | /* | 1088 | /* |
1078 | * Do not call, when we are not in irq context and have | 1089 | * Do not call, when we are not in irq context and have |
1079 | * no valid regs pointer | 1090 | * no valid regs pointer |
1080 | */ | 1091 | */ |
1081 | if (regs) | 1092 | if (regs) |
1082 | tick_sched_handle(ts, regs); | 1093 | tick_sched_handle(ts, regs); |
1083 | 1094 | ||
1084 | hrtimer_forward(timer, now, tick_period); | 1095 | hrtimer_forward(timer, now, tick_period); |
1085 | 1096 | ||
1086 | return HRTIMER_RESTART; | 1097 | return HRTIMER_RESTART; |
1087 | } | 1098 | } |
1088 | 1099 | ||
1089 | static int sched_skew_tick; | 1100 | static int sched_skew_tick; |
1090 | 1101 | ||
1091 | static int __init skew_tick(char *str) | 1102 | static int __init skew_tick(char *str) |
1092 | { | 1103 | { |
1093 | get_option(&str, &sched_skew_tick); | 1104 | get_option(&str, &sched_skew_tick); |
1094 | 1105 | ||
1095 | return 0; | 1106 | return 0; |
1096 | } | 1107 | } |
1097 | early_param("skew_tick", skew_tick); | 1108 | early_param("skew_tick", skew_tick); |
1098 | 1109 | ||
1099 | /** | 1110 | /** |
1100 | * tick_setup_sched_timer - setup the tick emulation timer | 1111 | * tick_setup_sched_timer - setup the tick emulation timer |
1101 | */ | 1112 | */ |
1102 | void tick_setup_sched_timer(void) | 1113 | void tick_setup_sched_timer(void) |
1103 | { | 1114 | { |
1104 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 1115 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
1105 | ktime_t now = ktime_get(); | 1116 | ktime_t now = ktime_get(); |
1106 | 1117 | ||
1107 | /* | 1118 | /* |
1108 | * Emulate tick processing via per-CPU hrtimers: | 1119 | * Emulate tick processing via per-CPU hrtimers: |
1109 | */ | 1120 | */ |
1110 | hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | 1121 | hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); |
1111 | ts->sched_timer.function = tick_sched_timer; | 1122 | ts->sched_timer.function = tick_sched_timer; |
1112 | 1123 | ||
1113 | /* Get the next period (per cpu) */ | 1124 | /* Get the next period (per cpu) */ |
1114 | hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); | 1125 | hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); |
1115 | 1126 | ||
1116 | /* Offset the tick to avert jiffies_lock contention. */ | 1127 | /* Offset the tick to avert jiffies_lock contention. */ |
1117 | if (sched_skew_tick) { | 1128 | if (sched_skew_tick) { |
1118 | u64 offset = ktime_to_ns(tick_period) >> 1; | 1129 | u64 offset = ktime_to_ns(tick_period) >> 1; |
1119 | do_div(offset, num_possible_cpus()); | 1130 | do_div(offset, num_possible_cpus()); |
1120 | offset *= smp_processor_id(); | 1131 | offset *= smp_processor_id(); |
1121 | hrtimer_add_expires_ns(&ts->sched_timer, offset); | 1132 | hrtimer_add_expires_ns(&ts->sched_timer, offset); |
1122 | } | 1133 | } |
1123 | 1134 | ||
1124 | for (;;) { | 1135 | for (;;) { |
1125 | hrtimer_forward(&ts->sched_timer, now, tick_period); | 1136 | hrtimer_forward(&ts->sched_timer, now, tick_period); |
1126 | hrtimer_start_expires(&ts->sched_timer, | 1137 | hrtimer_start_expires(&ts->sched_timer, |
1127 | HRTIMER_MODE_ABS_PINNED); | 1138 | HRTIMER_MODE_ABS_PINNED); |
1128 | /* Check, if the timer was already in the past */ | 1139 | /* Check, if the timer was already in the past */ |
1129 | if (hrtimer_active(&ts->sched_timer)) | 1140 | if (hrtimer_active(&ts->sched_timer)) |
1130 | break; | 1141 | break; |
1131 | now = ktime_get(); | 1142 | now = ktime_get(); |
1132 | } | 1143 | } |
1133 | 1144 | ||
1134 | #ifdef CONFIG_NO_HZ_COMMON | 1145 | #ifdef CONFIG_NO_HZ_COMMON |
1135 | if (tick_nohz_enabled) | 1146 | if (tick_nohz_enabled) |
1136 | ts->nohz_mode = NOHZ_MODE_HIGHRES; | 1147 | ts->nohz_mode = NOHZ_MODE_HIGHRES; |
1137 | #endif | 1148 | #endif |
1138 | } | 1149 | } |
1139 | #endif /* HIGH_RES_TIMERS */ | 1150 | #endif /* HIGH_RES_TIMERS */ |
1140 | 1151 | ||
1141 | #if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS | 1152 | #if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS |
1142 | void tick_cancel_sched_timer(int cpu) | 1153 | void tick_cancel_sched_timer(int cpu) |
1143 | { | 1154 | { |
1144 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 1155 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
1145 | 1156 | ||
1146 | # ifdef CONFIG_HIGH_RES_TIMERS | 1157 | # ifdef CONFIG_HIGH_RES_TIMERS |
1147 | if (ts->sched_timer.base) | 1158 | if (ts->sched_timer.base) |
1148 | hrtimer_cancel(&ts->sched_timer); | 1159 | hrtimer_cancel(&ts->sched_timer); |
1149 | # endif | 1160 | # endif |
1150 | 1161 | ||
1151 | ts->nohz_mode = NOHZ_MODE_INACTIVE; | 1162 | ts->nohz_mode = NOHZ_MODE_INACTIVE; |
1152 | } | 1163 | } |
1153 | #endif | 1164 | #endif |
1154 | 1165 | ||
1155 | /** | 1166 | /** |
1156 | * Async notification about clocksource changes | 1167 | * Async notification about clocksource changes |
1157 | */ | 1168 | */ |
1158 | void tick_clock_notify(void) | 1169 | void tick_clock_notify(void) |
1159 | { | 1170 | { |
1160 | int cpu; | 1171 | int cpu; |
1161 | 1172 | ||
1162 | for_each_possible_cpu(cpu) | 1173 | for_each_possible_cpu(cpu) |
1163 | set_bit(0, &per_cpu(tick_cpu_sched, cpu).check_clocks); | 1174 | set_bit(0, &per_cpu(tick_cpu_sched, cpu).check_clocks); |
1164 | } | 1175 | } |
1165 | 1176 | ||
1166 | /* | 1177 | /* |
1167 | * Async notification about clock event changes | 1178 | * Async notification about clock event changes |
1168 | */ | 1179 | */ |
1169 | void tick_oneshot_notify(void) | 1180 | void tick_oneshot_notify(void) |
1170 | { | 1181 | { |
1171 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 1182 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
1172 | 1183 | ||
1173 | set_bit(0, &ts->check_clocks); | 1184 | set_bit(0, &ts->check_clocks); |
1174 | } | 1185 | } |
1175 | 1186 | ||
1176 | /** | 1187 | /** |
1177 | * Check, if a change happened, which makes oneshot possible. | 1188 | * Check, if a change happened, which makes oneshot possible. |
1178 | * | 1189 | * |
1179 | * Called cyclic from the hrtimer softirq (driven by the timer | 1190 | * Called cyclic from the hrtimer softirq (driven by the timer |
1180 | * softirq) allow_nohz signals, that we can switch into low-res nohz | 1191 | * softirq) allow_nohz signals, that we can switch into low-res nohz |
1181 | * mode, because high resolution timers are disabled (either compile | 1192 | * mode, because high resolution timers are disabled (either compile |
1182 | * or runtime). | 1193 | * or runtime). |
1183 | */ | 1194 | */ |
1184 | int tick_check_oneshot_change(int allow_nohz) | 1195 | int tick_check_oneshot_change(int allow_nohz) |
1185 | { | 1196 | { |
1186 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 1197 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
1187 | 1198 | ||
1188 | if (!test_and_clear_bit(0, &ts->check_clocks)) | 1199 | if (!test_and_clear_bit(0, &ts->check_clocks)) |
1189 | return 0; | 1200 | return 0; |
1190 | 1201 | ||
1191 | if (ts->nohz_mode != NOHZ_MODE_INACTIVE) | 1202 | if (ts->nohz_mode != NOHZ_MODE_INACTIVE) |
1192 | return 0; | 1203 | return 0; |
1193 | 1204 | ||
1194 | if (!timekeeping_valid_for_hres() || !tick_is_oneshot_available()) | 1205 | if (!timekeeping_valid_for_hres() || !tick_is_oneshot_available()) |
1195 | return 0; | 1206 | return 0; |
1196 | 1207 | ||
1197 | if (!allow_nohz) | 1208 | if (!allow_nohz) |
1198 | return 1; | 1209 | return 1; |
1199 | 1210 | ||
1200 | tick_nohz_switch_to_nohz(); | 1211 | tick_nohz_switch_to_nohz(); |
1201 | return 0; | 1212 | return 0; |
1202 | } | 1213 | } |
1203 | 1214 |