Commit b845b517b5e3706a3729f6ea83b88ab85f0725b0

Authored by Peter Zijlstra
Committed by Ingo Molnar
1 parent 796aadeb1b

printk: robustify printk

Avoid deadlocks against rq->lock and xtime_lock by deferring the klogd
wakeup by polling from the timer tick.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

Showing 4 changed files with 23 additions and 3 deletions Inline Diff

include/linux/kernel.h
1 #ifndef _LINUX_KERNEL_H 1 #ifndef _LINUX_KERNEL_H
2 #define _LINUX_KERNEL_H 2 #define _LINUX_KERNEL_H
3 3
4 /* 4 /*
5 * 'kernel.h' contains some often-used function prototypes etc 5 * 'kernel.h' contains some often-used function prototypes etc
6 */ 6 */
7 7
8 #ifdef __KERNEL__ 8 #ifdef __KERNEL__
9 9
10 #include <stdarg.h> 10 #include <stdarg.h>
11 #include <linux/linkage.h> 11 #include <linux/linkage.h>
12 #include <linux/stddef.h> 12 #include <linux/stddef.h>
13 #include <linux/types.h> 13 #include <linux/types.h>
14 #include <linux/compiler.h> 14 #include <linux/compiler.h>
15 #include <linux/bitops.h> 15 #include <linux/bitops.h>
16 #include <linux/log2.h> 16 #include <linux/log2.h>
17 #include <linux/typecheck.h> 17 #include <linux/typecheck.h>
18 #include <linux/ratelimit.h> 18 #include <linux/ratelimit.h>
19 #include <asm/byteorder.h> 19 #include <asm/byteorder.h>
20 #include <asm/bug.h> 20 #include <asm/bug.h>
21 21
22 extern const char linux_banner[]; 22 extern const char linux_banner[];
23 extern const char linux_proc_banner[]; 23 extern const char linux_proc_banner[];
24 24
25 #define USHORT_MAX ((u16)(~0U)) 25 #define USHORT_MAX ((u16)(~0U))
26 #define SHORT_MAX ((s16)(USHORT_MAX>>1)) 26 #define SHORT_MAX ((s16)(USHORT_MAX>>1))
27 #define SHORT_MIN (-SHORT_MAX - 1) 27 #define SHORT_MIN (-SHORT_MAX - 1)
28 #define INT_MAX ((int)(~0U>>1)) 28 #define INT_MAX ((int)(~0U>>1))
29 #define INT_MIN (-INT_MAX - 1) 29 #define INT_MIN (-INT_MAX - 1)
30 #define UINT_MAX (~0U) 30 #define UINT_MAX (~0U)
31 #define LONG_MAX ((long)(~0UL>>1)) 31 #define LONG_MAX ((long)(~0UL>>1))
32 #define LONG_MIN (-LONG_MAX - 1) 32 #define LONG_MIN (-LONG_MAX - 1)
33 #define ULONG_MAX (~0UL) 33 #define ULONG_MAX (~0UL)
34 #define LLONG_MAX ((long long)(~0ULL>>1)) 34 #define LLONG_MAX ((long long)(~0ULL>>1))
35 #define LLONG_MIN (-LLONG_MAX - 1) 35 #define LLONG_MIN (-LLONG_MAX - 1)
36 #define ULLONG_MAX (~0ULL) 36 #define ULLONG_MAX (~0ULL)
37 37
38 #define STACK_MAGIC 0xdeadbeef 38 #define STACK_MAGIC 0xdeadbeef
39 39
40 #define ALIGN(x,a) __ALIGN_MASK(x,(typeof(x))(a)-1) 40 #define ALIGN(x,a) __ALIGN_MASK(x,(typeof(x))(a)-1)
41 #define __ALIGN_MASK(x,mask) (((x)+(mask))&~(mask)) 41 #define __ALIGN_MASK(x,mask) (((x)+(mask))&~(mask))
42 #define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) 42 #define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a)))
43 #define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) 43 #define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0)
44 44
45 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) 45 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
46 46
47 #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) 47 #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
48 #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) 48 #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
49 #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) 49 #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y))
50 50
51 #define _RET_IP_ (unsigned long)__builtin_return_address(0) 51 #define _RET_IP_ (unsigned long)__builtin_return_address(0)
52 #define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) 52 #define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; })
53 53
54 #ifdef CONFIG_LBD 54 #ifdef CONFIG_LBD
55 # include <asm/div64.h> 55 # include <asm/div64.h>
56 # define sector_div(a, b) do_div(a, b) 56 # define sector_div(a, b) do_div(a, b)
57 #else 57 #else
58 # define sector_div(n, b)( \ 58 # define sector_div(n, b)( \
59 { \ 59 { \
60 int _res; \ 60 int _res; \
61 _res = (n) % (b); \ 61 _res = (n) % (b); \
62 (n) /= (b); \ 62 (n) /= (b); \
63 _res; \ 63 _res; \
64 } \ 64 } \
65 ) 65 )
66 #endif 66 #endif
67 67
68 /** 68 /**
69 * upper_32_bits - return bits 32-63 of a number 69 * upper_32_bits - return bits 32-63 of a number
70 * @n: the number we're accessing 70 * @n: the number we're accessing
71 * 71 *
72 * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress 72 * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress
73 * the "right shift count >= width of type" warning when that quantity is 73 * the "right shift count >= width of type" warning when that quantity is
74 * 32-bits. 74 * 32-bits.
75 */ 75 */
76 #define upper_32_bits(n) ((u32)(((n) >> 16) >> 16)) 76 #define upper_32_bits(n) ((u32)(((n) >> 16) >> 16))
77 77
78 /** 78 /**
79 * lower_32_bits - return bits 0-31 of a number 79 * lower_32_bits - return bits 0-31 of a number
80 * @n: the number we're accessing 80 * @n: the number we're accessing
81 */ 81 */
82 #define lower_32_bits(n) ((u32)(n)) 82 #define lower_32_bits(n) ((u32)(n))
83 83
84 #define KERN_EMERG "<0>" /* system is unusable */ 84 #define KERN_EMERG "<0>" /* system is unusable */
85 #define KERN_ALERT "<1>" /* action must be taken immediately */ 85 #define KERN_ALERT "<1>" /* action must be taken immediately */
86 #define KERN_CRIT "<2>" /* critical conditions */ 86 #define KERN_CRIT "<2>" /* critical conditions */
87 #define KERN_ERR "<3>" /* error conditions */ 87 #define KERN_ERR "<3>" /* error conditions */
88 #define KERN_WARNING "<4>" /* warning conditions */ 88 #define KERN_WARNING "<4>" /* warning conditions */
89 #define KERN_NOTICE "<5>" /* normal but significant condition */ 89 #define KERN_NOTICE "<5>" /* normal but significant condition */
90 #define KERN_INFO "<6>" /* informational */ 90 #define KERN_INFO "<6>" /* informational */
91 #define KERN_DEBUG "<7>" /* debug-level messages */ 91 #define KERN_DEBUG "<7>" /* debug-level messages */
92 92
93 /* 93 /*
94 * Annotation for a "continued" line of log printout (only done after a 94 * Annotation for a "continued" line of log printout (only done after a
95 * line that had no enclosing \n). Only to be used by core/arch code 95 * line that had no enclosing \n). Only to be used by core/arch code
96 * during early bootup (a continued line is not SMP-safe otherwise). 96 * during early bootup (a continued line is not SMP-safe otherwise).
97 */ 97 */
98 #define KERN_CONT "" 98 #define KERN_CONT ""
99 99
100 extern int console_printk[]; 100 extern int console_printk[];
101 101
102 #define console_loglevel (console_printk[0]) 102 #define console_loglevel (console_printk[0])
103 #define default_message_loglevel (console_printk[1]) 103 #define default_message_loglevel (console_printk[1])
104 #define minimum_console_loglevel (console_printk[2]) 104 #define minimum_console_loglevel (console_printk[2])
105 #define default_console_loglevel (console_printk[3]) 105 #define default_console_loglevel (console_printk[3])
106 106
107 struct completion; 107 struct completion;
108 struct pt_regs; 108 struct pt_regs;
109 struct user; 109 struct user;
110 110
111 /** 111 /**
112 * might_sleep - annotation for functions that can sleep 112 * might_sleep - annotation for functions that can sleep
113 * 113 *
114 * this macro will print a stack trace if it is executed in an atomic 114 * this macro will print a stack trace if it is executed in an atomic
115 * context (spinlock, irq-handler, ...). 115 * context (spinlock, irq-handler, ...).
116 * 116 *
117 * This is a useful debugging help to be able to catch problems early and not 117 * This is a useful debugging help to be able to catch problems early and not
118 * be bitten later when the calling function happens to sleep when it is not 118 * be bitten later when the calling function happens to sleep when it is not
119 * supposed to. 119 * supposed to.
120 */ 120 */
121 #ifdef CONFIG_PREEMPT_VOLUNTARY 121 #ifdef CONFIG_PREEMPT_VOLUNTARY
122 extern int _cond_resched(void); 122 extern int _cond_resched(void);
123 # define might_resched() _cond_resched() 123 # define might_resched() _cond_resched()
124 #else 124 #else
125 # define might_resched() do { } while (0) 125 # define might_resched() do { } while (0)
126 #endif 126 #endif
127 127
128 #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP 128 #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
129 void __might_sleep(char *file, int line); 129 void __might_sleep(char *file, int line);
130 # define might_sleep() \ 130 # define might_sleep() \
131 do { __might_sleep(__FILE__, __LINE__); might_resched(); } while (0) 131 do { __might_sleep(__FILE__, __LINE__); might_resched(); } while (0)
132 #else 132 #else
133 # define might_sleep() do { might_resched(); } while (0) 133 # define might_sleep() do { might_resched(); } while (0)
134 #endif 134 #endif
135 135
136 #define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0) 136 #define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0)
137 137
138 #define abs(x) ({ \ 138 #define abs(x) ({ \
139 int __x = (x); \ 139 int __x = (x); \
140 (__x < 0) ? -__x : __x; \ 140 (__x < 0) ? -__x : __x; \
141 }) 141 })
142 142
143 extern struct atomic_notifier_head panic_notifier_list; 143 extern struct atomic_notifier_head panic_notifier_list;
144 extern long (*panic_blink)(long time); 144 extern long (*panic_blink)(long time);
145 NORET_TYPE void panic(const char * fmt, ...) 145 NORET_TYPE void panic(const char * fmt, ...)
146 __attribute__ ((NORET_AND format (printf, 1, 2))) __cold; 146 __attribute__ ((NORET_AND format (printf, 1, 2))) __cold;
147 extern void oops_enter(void); 147 extern void oops_enter(void);
148 extern void oops_exit(void); 148 extern void oops_exit(void);
149 extern int oops_may_print(void); 149 extern int oops_may_print(void);
150 NORET_TYPE void do_exit(long error_code) 150 NORET_TYPE void do_exit(long error_code)
151 ATTRIB_NORET; 151 ATTRIB_NORET;
152 NORET_TYPE void complete_and_exit(struct completion *, long) 152 NORET_TYPE void complete_and_exit(struct completion *, long)
153 ATTRIB_NORET; 153 ATTRIB_NORET;
154 extern unsigned long simple_strtoul(const char *,char **,unsigned int); 154 extern unsigned long simple_strtoul(const char *,char **,unsigned int);
155 extern long simple_strtol(const char *,char **,unsigned int); 155 extern long simple_strtol(const char *,char **,unsigned int);
156 extern unsigned long long simple_strtoull(const char *,char **,unsigned int); 156 extern unsigned long long simple_strtoull(const char *,char **,unsigned int);
157 extern long long simple_strtoll(const char *,char **,unsigned int); 157 extern long long simple_strtoll(const char *,char **,unsigned int);
158 extern int strict_strtoul(const char *, unsigned int, unsigned long *); 158 extern int strict_strtoul(const char *, unsigned int, unsigned long *);
159 extern int strict_strtol(const char *, unsigned int, long *); 159 extern int strict_strtol(const char *, unsigned int, long *);
160 extern int strict_strtoull(const char *, unsigned int, unsigned long long *); 160 extern int strict_strtoull(const char *, unsigned int, unsigned long long *);
161 extern int strict_strtoll(const char *, unsigned int, long long *); 161 extern int strict_strtoll(const char *, unsigned int, long long *);
162 extern int sprintf(char * buf, const char * fmt, ...) 162 extern int sprintf(char * buf, const char * fmt, ...)
163 __attribute__ ((format (printf, 2, 3))); 163 __attribute__ ((format (printf, 2, 3)));
164 extern int vsprintf(char *buf, const char *, va_list) 164 extern int vsprintf(char *buf, const char *, va_list)
165 __attribute__ ((format (printf, 2, 0))); 165 __attribute__ ((format (printf, 2, 0)));
166 extern int snprintf(char * buf, size_t size, const char * fmt, ...) 166 extern int snprintf(char * buf, size_t size, const char * fmt, ...)
167 __attribute__ ((format (printf, 3, 4))); 167 __attribute__ ((format (printf, 3, 4)));
168 extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) 168 extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
169 __attribute__ ((format (printf, 3, 0))); 169 __attribute__ ((format (printf, 3, 0)));
170 extern int scnprintf(char * buf, size_t size, const char * fmt, ...) 170 extern int scnprintf(char * buf, size_t size, const char * fmt, ...)
171 __attribute__ ((format (printf, 3, 4))); 171 __attribute__ ((format (printf, 3, 4)));
172 extern int vscnprintf(char *buf, size_t size, const char *fmt, va_list args) 172 extern int vscnprintf(char *buf, size_t size, const char *fmt, va_list args)
173 __attribute__ ((format (printf, 3, 0))); 173 __attribute__ ((format (printf, 3, 0)));
174 extern char *kasprintf(gfp_t gfp, const char *fmt, ...) 174 extern char *kasprintf(gfp_t gfp, const char *fmt, ...)
175 __attribute__ ((format (printf, 2, 3))); 175 __attribute__ ((format (printf, 2, 3)));
176 extern char *kvasprintf(gfp_t gfp, const char *fmt, va_list args); 176 extern char *kvasprintf(gfp_t gfp, const char *fmt, va_list args);
177 177
178 extern int sscanf(const char *, const char *, ...) 178 extern int sscanf(const char *, const char *, ...)
179 __attribute__ ((format (scanf, 2, 3))); 179 __attribute__ ((format (scanf, 2, 3)));
180 extern int vsscanf(const char *, const char *, va_list) 180 extern int vsscanf(const char *, const char *, va_list)
181 __attribute__ ((format (scanf, 2, 0))); 181 __attribute__ ((format (scanf, 2, 0)));
182 182
183 extern int get_option(char **str, int *pint); 183 extern int get_option(char **str, int *pint);
184 extern char *get_options(const char *str, int nints, int *ints); 184 extern char *get_options(const char *str, int nints, int *ints);
185 extern unsigned long long memparse(char *ptr, char **retptr); 185 extern unsigned long long memparse(char *ptr, char **retptr);
186 186
187 extern int core_kernel_text(unsigned long addr); 187 extern int core_kernel_text(unsigned long addr);
188 extern int __kernel_text_address(unsigned long addr); 188 extern int __kernel_text_address(unsigned long addr);
189 extern int kernel_text_address(unsigned long addr); 189 extern int kernel_text_address(unsigned long addr);
190 struct pid; 190 struct pid;
191 extern struct pid *session_of_pgrp(struct pid *pgrp); 191 extern struct pid *session_of_pgrp(struct pid *pgrp);
192 192
193 #ifdef CONFIG_PRINTK 193 #ifdef CONFIG_PRINTK
194 asmlinkage int vprintk(const char *fmt, va_list args) 194 asmlinkage int vprintk(const char *fmt, va_list args)
195 __attribute__ ((format (printf, 1, 0))); 195 __attribute__ ((format (printf, 1, 0)));
196 asmlinkage int printk(const char * fmt, ...) 196 asmlinkage int printk(const char * fmt, ...)
197 __attribute__ ((format (printf, 1, 2))) __cold; 197 __attribute__ ((format (printf, 1, 2))) __cold;
198 198
199 extern struct ratelimit_state printk_ratelimit_state; 199 extern struct ratelimit_state printk_ratelimit_state;
200 extern int printk_ratelimit(void); 200 extern int printk_ratelimit(void);
201 extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, 201 extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
202 unsigned int interval_msec); 202 unsigned int interval_msec);
203 extern void printk_tick(void);
204 extern int printk_needs_cpu(int);
203 #else 205 #else
204 static inline int vprintk(const char *s, va_list args) 206 static inline int vprintk(const char *s, va_list args)
205 __attribute__ ((format (printf, 1, 0))); 207 __attribute__ ((format (printf, 1, 0)));
206 static inline int vprintk(const char *s, va_list args) { return 0; } 208 static inline int vprintk(const char *s, va_list args) { return 0; }
207 static inline int printk(const char *s, ...) 209 static inline int printk(const char *s, ...)
208 __attribute__ ((format (printf, 1, 2))); 210 __attribute__ ((format (printf, 1, 2)));
209 static inline int __cold printk(const char *s, ...) { return 0; } 211 static inline int __cold printk(const char *s, ...) { return 0; }
210 static inline int printk_ratelimit(void) { return 0; } 212 static inline int printk_ratelimit(void) { return 0; }
211 static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \ 213 static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \
212 unsigned int interval_msec) \ 214 unsigned int interval_msec) \
213 { return false; } 215 { return false; }
216 static inline void printk_tick(void) { }
217 static inline int printk_needs_cpu(int) { return 0; }
214 #endif 218 #endif
215 219
216 extern void asmlinkage __attribute__((format(printf, 1, 2))) 220 extern void asmlinkage __attribute__((format(printf, 1, 2)))
217 early_printk(const char *fmt, ...); 221 early_printk(const char *fmt, ...);
218 222
219 unsigned long int_sqrt(unsigned long); 223 unsigned long int_sqrt(unsigned long);
220 224
221 static inline void console_silent(void) 225 static inline void console_silent(void)
222 { 226 {
223 console_loglevel = 0; 227 console_loglevel = 0;
224 } 228 }
225 229
226 static inline void console_verbose(void) 230 static inline void console_verbose(void)
227 { 231 {
228 if (console_loglevel) 232 if (console_loglevel)
229 console_loglevel = 15; 233 console_loglevel = 15;
230 } 234 }
231 235
232 extern void bust_spinlocks(int yes); 236 extern void bust_spinlocks(int yes);
233 extern void wake_up_klogd(void); 237 extern void wake_up_klogd(void);
234 extern int oops_in_progress; /* If set, an oops, panic(), BUG() or die() is in progress */ 238 extern int oops_in_progress; /* If set, an oops, panic(), BUG() or die() is in progress */
235 extern int panic_timeout; 239 extern int panic_timeout;
236 extern int panic_on_oops; 240 extern int panic_on_oops;
237 extern int panic_on_unrecovered_nmi; 241 extern int panic_on_unrecovered_nmi;
238 extern int tainted; 242 extern int tainted;
239 extern const char *print_tainted(void); 243 extern const char *print_tainted(void);
240 extern void add_taint(unsigned); 244 extern void add_taint(unsigned);
241 extern int root_mountflags; 245 extern int root_mountflags;
242 246
243 /* Values used for system_state */ 247 /* Values used for system_state */
244 extern enum system_states { 248 extern enum system_states {
245 SYSTEM_BOOTING, 249 SYSTEM_BOOTING,
246 SYSTEM_RUNNING, 250 SYSTEM_RUNNING,
247 SYSTEM_HALT, 251 SYSTEM_HALT,
248 SYSTEM_POWER_OFF, 252 SYSTEM_POWER_OFF,
249 SYSTEM_RESTART, 253 SYSTEM_RESTART,
250 SYSTEM_SUSPEND_DISK, 254 SYSTEM_SUSPEND_DISK,
251 } system_state; 255 } system_state;
252 256
253 #define TAINT_PROPRIETARY_MODULE (1<<0) 257 #define TAINT_PROPRIETARY_MODULE (1<<0)
254 #define TAINT_FORCED_MODULE (1<<1) 258 #define TAINT_FORCED_MODULE (1<<1)
255 #define TAINT_UNSAFE_SMP (1<<2) 259 #define TAINT_UNSAFE_SMP (1<<2)
256 #define TAINT_FORCED_RMMOD (1<<3) 260 #define TAINT_FORCED_RMMOD (1<<3)
257 #define TAINT_MACHINE_CHECK (1<<4) 261 #define TAINT_MACHINE_CHECK (1<<4)
258 #define TAINT_BAD_PAGE (1<<5) 262 #define TAINT_BAD_PAGE (1<<5)
259 #define TAINT_USER (1<<6) 263 #define TAINT_USER (1<<6)
260 #define TAINT_DIE (1<<7) 264 #define TAINT_DIE (1<<7)
261 #define TAINT_OVERRIDDEN_ACPI_TABLE (1<<8) 265 #define TAINT_OVERRIDDEN_ACPI_TABLE (1<<8)
262 #define TAINT_WARN (1<<9) 266 #define TAINT_WARN (1<<9)
263 267
264 extern void dump_stack(void) __cold; 268 extern void dump_stack(void) __cold;
265 269
266 enum { 270 enum {
267 DUMP_PREFIX_NONE, 271 DUMP_PREFIX_NONE,
268 DUMP_PREFIX_ADDRESS, 272 DUMP_PREFIX_ADDRESS,
269 DUMP_PREFIX_OFFSET 273 DUMP_PREFIX_OFFSET
270 }; 274 };
271 extern void hex_dump_to_buffer(const void *buf, size_t len, 275 extern void hex_dump_to_buffer(const void *buf, size_t len,
272 int rowsize, int groupsize, 276 int rowsize, int groupsize,
273 char *linebuf, size_t linebuflen, bool ascii); 277 char *linebuf, size_t linebuflen, bool ascii);
274 extern void print_hex_dump(const char *level, const char *prefix_str, 278 extern void print_hex_dump(const char *level, const char *prefix_str,
275 int prefix_type, int rowsize, int groupsize, 279 int prefix_type, int rowsize, int groupsize,
276 const void *buf, size_t len, bool ascii); 280 const void *buf, size_t len, bool ascii);
277 extern void print_hex_dump_bytes(const char *prefix_str, int prefix_type, 281 extern void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
278 const void *buf, size_t len); 282 const void *buf, size_t len);
279 283
280 extern const char hex_asc[]; 284 extern const char hex_asc[];
281 #define hex_asc_lo(x) hex_asc[((x) & 0x0f)] 285 #define hex_asc_lo(x) hex_asc[((x) & 0x0f)]
282 #define hex_asc_hi(x) hex_asc[((x) & 0xf0) >> 4] 286 #define hex_asc_hi(x) hex_asc[((x) & 0xf0) >> 4]
283 287
284 static inline char *pack_hex_byte(char *buf, u8 byte) 288 static inline char *pack_hex_byte(char *buf, u8 byte)
285 { 289 {
286 *buf++ = hex_asc_hi(byte); 290 *buf++ = hex_asc_hi(byte);
287 *buf++ = hex_asc_lo(byte); 291 *buf++ = hex_asc_lo(byte);
288 return buf; 292 return buf;
289 } 293 }
290 294
291 #define pr_emerg(fmt, arg...) \ 295 #define pr_emerg(fmt, arg...) \
292 printk(KERN_EMERG fmt, ##arg) 296 printk(KERN_EMERG fmt, ##arg)
293 #define pr_alert(fmt, arg...) \ 297 #define pr_alert(fmt, arg...) \
294 printk(KERN_ALERT fmt, ##arg) 298 printk(KERN_ALERT fmt, ##arg)
295 #define pr_crit(fmt, arg...) \ 299 #define pr_crit(fmt, arg...) \
296 printk(KERN_CRIT fmt, ##arg) 300 printk(KERN_CRIT fmt, ##arg)
297 #define pr_err(fmt, arg...) \ 301 #define pr_err(fmt, arg...) \
298 printk(KERN_ERR fmt, ##arg) 302 printk(KERN_ERR fmt, ##arg)
299 #define pr_warning(fmt, arg...) \ 303 #define pr_warning(fmt, arg...) \
300 printk(KERN_WARNING fmt, ##arg) 304 printk(KERN_WARNING fmt, ##arg)
301 #define pr_notice(fmt, arg...) \ 305 #define pr_notice(fmt, arg...) \
302 printk(KERN_NOTICE fmt, ##arg) 306 printk(KERN_NOTICE fmt, ##arg)
303 #define pr_info(fmt, arg...) \ 307 #define pr_info(fmt, arg...) \
304 printk(KERN_INFO fmt, ##arg) 308 printk(KERN_INFO fmt, ##arg)
305 309
306 #ifdef DEBUG 310 #ifdef DEBUG
307 /* If you are writing a driver, please use dev_dbg instead */ 311 /* If you are writing a driver, please use dev_dbg instead */
308 #define pr_debug(fmt, arg...) \ 312 #define pr_debug(fmt, arg...) \
309 printk(KERN_DEBUG fmt, ##arg) 313 printk(KERN_DEBUG fmt, ##arg)
310 #else 314 #else
311 #define pr_debug(fmt, arg...) \ 315 #define pr_debug(fmt, arg...) \
312 ({ if (0) printk(KERN_DEBUG fmt, ##arg); 0; }) 316 ({ if (0) printk(KERN_DEBUG fmt, ##arg); 0; })
313 #endif 317 #endif
314 318
315 /* 319 /*
316 * Display an IP address in readable format. 320 * Display an IP address in readable format.
317 */ 321 */
318 322
319 #define NIPQUAD(addr) \ 323 #define NIPQUAD(addr) \
320 ((unsigned char *)&addr)[0], \ 324 ((unsigned char *)&addr)[0], \
321 ((unsigned char *)&addr)[1], \ 325 ((unsigned char *)&addr)[1], \
322 ((unsigned char *)&addr)[2], \ 326 ((unsigned char *)&addr)[2], \
323 ((unsigned char *)&addr)[3] 327 ((unsigned char *)&addr)[3]
324 #define NIPQUAD_FMT "%u.%u.%u.%u" 328 #define NIPQUAD_FMT "%u.%u.%u.%u"
325 329
326 #define NIP6(addr) \ 330 #define NIP6(addr) \
327 ntohs((addr).s6_addr16[0]), \ 331 ntohs((addr).s6_addr16[0]), \
328 ntohs((addr).s6_addr16[1]), \ 332 ntohs((addr).s6_addr16[1]), \
329 ntohs((addr).s6_addr16[2]), \ 333 ntohs((addr).s6_addr16[2]), \
330 ntohs((addr).s6_addr16[3]), \ 334 ntohs((addr).s6_addr16[3]), \
331 ntohs((addr).s6_addr16[4]), \ 335 ntohs((addr).s6_addr16[4]), \
332 ntohs((addr).s6_addr16[5]), \ 336 ntohs((addr).s6_addr16[5]), \
333 ntohs((addr).s6_addr16[6]), \ 337 ntohs((addr).s6_addr16[6]), \
334 ntohs((addr).s6_addr16[7]) 338 ntohs((addr).s6_addr16[7])
335 #define NIP6_FMT "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x" 339 #define NIP6_FMT "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x"
336 #define NIP6_SEQFMT "%04x%04x%04x%04x%04x%04x%04x%04x" 340 #define NIP6_SEQFMT "%04x%04x%04x%04x%04x%04x%04x%04x"
337 341
338 #if defined(__LITTLE_ENDIAN) 342 #if defined(__LITTLE_ENDIAN)
339 #define HIPQUAD(addr) \ 343 #define HIPQUAD(addr) \
340 ((unsigned char *)&addr)[3], \ 344 ((unsigned char *)&addr)[3], \
341 ((unsigned char *)&addr)[2], \ 345 ((unsigned char *)&addr)[2], \
342 ((unsigned char *)&addr)[1], \ 346 ((unsigned char *)&addr)[1], \
343 ((unsigned char *)&addr)[0] 347 ((unsigned char *)&addr)[0]
344 #elif defined(__BIG_ENDIAN) 348 #elif defined(__BIG_ENDIAN)
345 #define HIPQUAD NIPQUAD 349 #define HIPQUAD NIPQUAD
346 #else 350 #else
347 #error "Please fix asm/byteorder.h" 351 #error "Please fix asm/byteorder.h"
348 #endif /* __LITTLE_ENDIAN */ 352 #endif /* __LITTLE_ENDIAN */
349 353
350 /* 354 /*
351 * min()/max()/clamp() macros that also do 355 * min()/max()/clamp() macros that also do
352 * strict type-checking.. See the 356 * strict type-checking.. See the
353 * "unnecessary" pointer comparison. 357 * "unnecessary" pointer comparison.
354 */ 358 */
355 #define min(x, y) ({ \ 359 #define min(x, y) ({ \
356 typeof(x) _min1 = (x); \ 360 typeof(x) _min1 = (x); \
357 typeof(y) _min2 = (y); \ 361 typeof(y) _min2 = (y); \
358 (void) (&_min1 == &_min2); \ 362 (void) (&_min1 == &_min2); \
359 _min1 < _min2 ? _min1 : _min2; }) 363 _min1 < _min2 ? _min1 : _min2; })
360 364
361 #define max(x, y) ({ \ 365 #define max(x, y) ({ \
362 typeof(x) _max1 = (x); \ 366 typeof(x) _max1 = (x); \
363 typeof(y) _max2 = (y); \ 367 typeof(y) _max2 = (y); \
364 (void) (&_max1 == &_max2); \ 368 (void) (&_max1 == &_max2); \
365 _max1 > _max2 ? _max1 : _max2; }) 369 _max1 > _max2 ? _max1 : _max2; })
366 370
367 /** 371 /**
368 * clamp - return a value clamped to a given range with strict typechecking 372 * clamp - return a value clamped to a given range with strict typechecking
369 * @val: current value 373 * @val: current value
370 * @min: minimum allowable value 374 * @min: minimum allowable value
371 * @max: maximum allowable value 375 * @max: maximum allowable value
372 * 376 *
373 * This macro does strict typechecking of min/max to make sure they are of the 377 * This macro does strict typechecking of min/max to make sure they are of the
374 * same type as val. See the unnecessary pointer comparisons. 378 * same type as val. See the unnecessary pointer comparisons.
375 */ 379 */
376 #define clamp(val, min, max) ({ \ 380 #define clamp(val, min, max) ({ \
377 typeof(val) __val = (val); \ 381 typeof(val) __val = (val); \
378 typeof(min) __min = (min); \ 382 typeof(min) __min = (min); \
379 typeof(max) __max = (max); \ 383 typeof(max) __max = (max); \
380 (void) (&__val == &__min); \ 384 (void) (&__val == &__min); \
381 (void) (&__val == &__max); \ 385 (void) (&__val == &__max); \
382 __val = __val < __min ? __min: __val; \ 386 __val = __val < __min ? __min: __val; \
383 __val > __max ? __max: __val; }) 387 __val > __max ? __max: __val; })
384 388
385 /* 389 /*
386 * ..and if you can't take the strict 390 * ..and if you can't take the strict
387 * types, you can specify one yourself. 391 * types, you can specify one yourself.
388 * 392 *
389 * Or not use min/max/clamp at all, of course. 393 * Or not use min/max/clamp at all, of course.
390 */ 394 */
391 #define min_t(type, x, y) ({ \ 395 #define min_t(type, x, y) ({ \
392 type __min1 = (x); \ 396 type __min1 = (x); \
393 type __min2 = (y); \ 397 type __min2 = (y); \
394 __min1 < __min2 ? __min1: __min2; }) 398 __min1 < __min2 ? __min1: __min2; })
395 399
396 #define max_t(type, x, y) ({ \ 400 #define max_t(type, x, y) ({ \
397 type __max1 = (x); \ 401 type __max1 = (x); \
398 type __max2 = (y); \ 402 type __max2 = (y); \
399 __max1 > __max2 ? __max1: __max2; }) 403 __max1 > __max2 ? __max1: __max2; })
400 404
401 /** 405 /**
402 * clamp_t - return a value clamped to a given range using a given type 406 * clamp_t - return a value clamped to a given range using a given type
403 * @type: the type of variable to use 407 * @type: the type of variable to use
404 * @val: current value 408 * @val: current value
405 * @min: minimum allowable value 409 * @min: minimum allowable value
406 * @max: maximum allowable value 410 * @max: maximum allowable value
407 * 411 *
408 * This macro does no typechecking and uses temporary variables of type 412 * This macro does no typechecking and uses temporary variables of type
409 * 'type' to make all the comparisons. 413 * 'type' to make all the comparisons.
410 */ 414 */
411 #define clamp_t(type, val, min, max) ({ \ 415 #define clamp_t(type, val, min, max) ({ \
412 type __val = (val); \ 416 type __val = (val); \
413 type __min = (min); \ 417 type __min = (min); \
414 type __max = (max); \ 418 type __max = (max); \
415 __val = __val < __min ? __min: __val; \ 419 __val = __val < __min ? __min: __val; \
416 __val > __max ? __max: __val; }) 420 __val > __max ? __max: __val; })
417 421
418 /** 422 /**
419 * clamp_val - return a value clamped to a given range using val's type 423 * clamp_val - return a value clamped to a given range using val's type
420 * @val: current value 424 * @val: current value
421 * @min: minimum allowable value 425 * @min: minimum allowable value
422 * @max: maximum allowable value 426 * @max: maximum allowable value
423 * 427 *
424 * This macro does no typechecking and uses temporary variables of whatever 428 * This macro does no typechecking and uses temporary variables of whatever
425 * type the input argument 'val' is. This is useful when val is an unsigned 429 * type the input argument 'val' is. This is useful when val is an unsigned
426 * type and min and max are literals that will otherwise be assigned a signed 430 * type and min and max are literals that will otherwise be assigned a signed
427 * integer type. 431 * integer type.
428 */ 432 */
429 #define clamp_val(val, min, max) ({ \ 433 #define clamp_val(val, min, max) ({ \
430 typeof(val) __val = (val); \ 434 typeof(val) __val = (val); \
431 typeof(val) __min = (min); \ 435 typeof(val) __min = (min); \
432 typeof(val) __max = (max); \ 436 typeof(val) __max = (max); \
433 __val = __val < __min ? __min: __val; \ 437 __val = __val < __min ? __min: __val; \
434 __val > __max ? __max: __val; }) 438 __val > __max ? __max: __val; })
435 439
436 /** 440 /**
437 * container_of - cast a member of a structure out to the containing structure 441 * container_of - cast a member of a structure out to the containing structure
438 * @ptr: the pointer to the member. 442 * @ptr: the pointer to the member.
439 * @type: the type of the container struct this is embedded in. 443 * @type: the type of the container struct this is embedded in.
440 * @member: the name of the member within the struct. 444 * @member: the name of the member within the struct.
441 * 445 *
442 */ 446 */
443 #define container_of(ptr, type, member) ({ \ 447 #define container_of(ptr, type, member) ({ \
444 const typeof( ((type *)0)->member ) *__mptr = (ptr); \ 448 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
445 (type *)( (char *)__mptr - offsetof(type,member) );}) 449 (type *)( (char *)__mptr - offsetof(type,member) );})
446 450
447 struct sysinfo; 451 struct sysinfo;
448 extern int do_sysinfo(struct sysinfo *info); 452 extern int do_sysinfo(struct sysinfo *info);
449 453
450 #endif /* __KERNEL__ */ 454 #endif /* __KERNEL__ */
451 455
452 #define SI_LOAD_SHIFT 16 456 #define SI_LOAD_SHIFT 16
453 struct sysinfo { 457 struct sysinfo {
454 long uptime; /* Seconds since boot */ 458 long uptime; /* Seconds since boot */
455 unsigned long loads[3]; /* 1, 5, and 15 minute load averages */ 459 unsigned long loads[3]; /* 1, 5, and 15 minute load averages */
456 unsigned long totalram; /* Total usable main memory size */ 460 unsigned long totalram; /* Total usable main memory size */
457 unsigned long freeram; /* Available memory size */ 461 unsigned long freeram; /* Available memory size */
458 unsigned long sharedram; /* Amount of shared memory */ 462 unsigned long sharedram; /* Amount of shared memory */
459 unsigned long bufferram; /* Memory used by buffers */ 463 unsigned long bufferram; /* Memory used by buffers */
460 unsigned long totalswap; /* Total swap space size */ 464 unsigned long totalswap; /* Total swap space size */
461 unsigned long freeswap; /* swap space still available */ 465 unsigned long freeswap; /* swap space still available */
462 unsigned short procs; /* Number of current processes */ 466 unsigned short procs; /* Number of current processes */
463 unsigned short pad; /* explicit padding for m68k */ 467 unsigned short pad; /* explicit padding for m68k */
464 unsigned long totalhigh; /* Total high memory size */ 468 unsigned long totalhigh; /* Total high memory size */
465 unsigned long freehigh; /* Available high memory size */ 469 unsigned long freehigh; /* Available high memory size */
466 unsigned int mem_unit; /* Memory unit size in bytes */ 470 unsigned int mem_unit; /* Memory unit size in bytes */
467 char _f[20-2*sizeof(long)-sizeof(int)]; /* Padding: libc5 uses this.. */ 471 char _f[20-2*sizeof(long)-sizeof(int)]; /* Padding: libc5 uses this.. */
468 }; 472 };
469 473
470 /* Force a compilation error if condition is true */ 474 /* Force a compilation error if condition is true */
471 #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) 475 #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
472 476
473 /* Force a compilation error if condition is true, but also produce a 477 /* Force a compilation error if condition is true, but also produce a
474 result (of value 0 and type size_t), so the expression can be used 478 result (of value 0 and type size_t), so the expression can be used
475 e.g. in a structure initializer (or where-ever else comma expressions 479 e.g. in a structure initializer (or where-ever else comma expressions
476 aren't permitted). */ 480 aren't permitted). */
477 #define BUILD_BUG_ON_ZERO(e) (sizeof(char[1 - 2 * !!(e)]) - 1) 481 #define BUILD_BUG_ON_ZERO(e) (sizeof(char[1 - 2 * !!(e)]) - 1)
478 482
479 /* Trap pasters of __FUNCTION__ at compile-time */ 483 /* Trap pasters of __FUNCTION__ at compile-time */
480 #define __FUNCTION__ (__func__) 484 #define __FUNCTION__ (__func__)
481 485
482 /* This helps us to avoid #ifdef CONFIG_NUMA */ 486 /* This helps us to avoid #ifdef CONFIG_NUMA */
483 #ifdef CONFIG_NUMA 487 #ifdef CONFIG_NUMA
484 #define NUMA_BUILD 1 488 #define NUMA_BUILD 1
485 #else 489 #else
486 #define NUMA_BUILD 0 490 #define NUMA_BUILD 0
487 #endif 491 #endif
488 492
489 #endif 493 #endif
490 494
1 /* 1 /*
2 * linux/kernel/printk.c 2 * linux/kernel/printk.c
3 * 3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds 4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * 5 *
6 * Modified to make sys_syslog() more flexible: added commands to 6 * Modified to make sys_syslog() more flexible: added commands to
7 * return the last 4k of kernel messages, regardless of whether 7 * return the last 4k of kernel messages, regardless of whether
8 * they've been read or not. Added option to suppress kernel printk's 8 * they've been read or not. Added option to suppress kernel printk's
9 * to the console. Added hook for sending the console messages 9 * to the console. Added hook for sending the console messages
10 * elsewhere, in preparation for a serial line console (someday). 10 * elsewhere, in preparation for a serial line console (someday).
11 * Ted Ts'o, 2/11/93. 11 * Ted Ts'o, 2/11/93.
12 * Modified for sysctl support, 1/8/97, Chris Horn. 12 * Modified for sysctl support, 1/8/97, Chris Horn.
13 * Fixed SMP synchronization, 08/08/99, Manfred Spraul 13 * Fixed SMP synchronization, 08/08/99, Manfred Spraul
14 * manfred@colorfullife.com 14 * manfred@colorfullife.com
15 * Rewrote bits to get rid of console_lock 15 * Rewrote bits to get rid of console_lock
16 * 01Mar01 Andrew Morton <andrewm@uow.edu.au> 16 * 01Mar01 Andrew Morton <andrewm@uow.edu.au>
17 */ 17 */
18 18
19 #include <linux/kernel.h> 19 #include <linux/kernel.h>
20 #include <linux/mm.h> 20 #include <linux/mm.h>
21 #include <linux/tty.h> 21 #include <linux/tty.h>
22 #include <linux/tty_driver.h> 22 #include <linux/tty_driver.h>
23 #include <linux/console.h> 23 #include <linux/console.h>
24 #include <linux/init.h> 24 #include <linux/init.h>
25 #include <linux/jiffies.h> 25 #include <linux/jiffies.h>
26 #include <linux/nmi.h> 26 #include <linux/nmi.h>
27 #include <linux/module.h> 27 #include <linux/module.h>
28 #include <linux/moduleparam.h> 28 #include <linux/moduleparam.h>
29 #include <linux/interrupt.h> /* For in_interrupt() */ 29 #include <linux/interrupt.h> /* For in_interrupt() */
30 #include <linux/delay.h> 30 #include <linux/delay.h>
31 #include <linux/smp.h> 31 #include <linux/smp.h>
32 #include <linux/security.h> 32 #include <linux/security.h>
33 #include <linux/bootmem.h> 33 #include <linux/bootmem.h>
34 #include <linux/syscalls.h> 34 #include <linux/syscalls.h>
35 35
36 #include <asm/uaccess.h> 36 #include <asm/uaccess.h>
37 37
38 /* 38 /*
39 * Architectures can override it: 39 * Architectures can override it:
40 */ 40 */
41 void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...) 41 void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...)
42 { 42 {
43 } 43 }
44 44
45 #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) 45 #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
46 46
47 /* printk's without a loglevel use this.. */ 47 /* printk's without a loglevel use this.. */
48 #define DEFAULT_MESSAGE_LOGLEVEL 4 /* KERN_WARNING */ 48 #define DEFAULT_MESSAGE_LOGLEVEL 4 /* KERN_WARNING */
49 49
50 /* We show everything that is MORE important than this.. */ 50 /* We show everything that is MORE important than this.. */
51 #define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */ 51 #define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */
52 #define DEFAULT_CONSOLE_LOGLEVEL 7 /* anything MORE serious than KERN_DEBUG */ 52 #define DEFAULT_CONSOLE_LOGLEVEL 7 /* anything MORE serious than KERN_DEBUG */
53 53
54 DECLARE_WAIT_QUEUE_HEAD(log_wait); 54 DECLARE_WAIT_QUEUE_HEAD(log_wait);
55 55
56 int console_printk[4] = { 56 int console_printk[4] = {
57 DEFAULT_CONSOLE_LOGLEVEL, /* console_loglevel */ 57 DEFAULT_CONSOLE_LOGLEVEL, /* console_loglevel */
58 DEFAULT_MESSAGE_LOGLEVEL, /* default_message_loglevel */ 58 DEFAULT_MESSAGE_LOGLEVEL, /* default_message_loglevel */
59 MINIMUM_CONSOLE_LOGLEVEL, /* minimum_console_loglevel */ 59 MINIMUM_CONSOLE_LOGLEVEL, /* minimum_console_loglevel */
60 DEFAULT_CONSOLE_LOGLEVEL, /* default_console_loglevel */ 60 DEFAULT_CONSOLE_LOGLEVEL, /* default_console_loglevel */
61 }; 61 };
62 62
63 /* 63 /*
64 * Low level drivers may need that to know if they can schedule in 64 * Low level drivers may need that to know if they can schedule in
65 * their unblank() callback or not. So let's export it. 65 * their unblank() callback or not. So let's export it.
66 */ 66 */
67 int oops_in_progress; 67 int oops_in_progress;
68 EXPORT_SYMBOL(oops_in_progress); 68 EXPORT_SYMBOL(oops_in_progress);
69 69
70 /* 70 /*
71 * console_sem protects the console_drivers list, and also 71 * console_sem protects the console_drivers list, and also
72 * provides serialisation for access to the entire console 72 * provides serialisation for access to the entire console
73 * driver system. 73 * driver system.
74 */ 74 */
75 static DECLARE_MUTEX(console_sem); 75 static DECLARE_MUTEX(console_sem);
76 static DECLARE_MUTEX(secondary_console_sem); 76 static DECLARE_MUTEX(secondary_console_sem);
77 struct console *console_drivers; 77 struct console *console_drivers;
78 EXPORT_SYMBOL_GPL(console_drivers); 78 EXPORT_SYMBOL_GPL(console_drivers);
79 79
80 /* 80 /*
81 * This is used for debugging the mess that is the VT code by 81 * This is used for debugging the mess that is the VT code by
82 * keeping track if we have the console semaphore held. It's 82 * keeping track if we have the console semaphore held. It's
83 * definitely not the perfect debug tool (we don't know if _WE_ 83 * definitely not the perfect debug tool (we don't know if _WE_
84 * hold it are racing, but it helps tracking those weird code 84 * hold it are racing, but it helps tracking those weird code
85 * path in the console code where we end up in places I want 85 * path in the console code where we end up in places I want
86 * locked without the console sempahore held 86 * locked without the console sempahore held
87 */ 87 */
88 static int console_locked, console_suspended; 88 static int console_locked, console_suspended;
89 89
90 /* 90 /*
91 * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars 91 * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars
92 * It is also used in interesting ways to provide interlocking in 92 * It is also used in interesting ways to provide interlocking in
93 * release_console_sem(). 93 * release_console_sem().
94 */ 94 */
95 static DEFINE_SPINLOCK(logbuf_lock); 95 static DEFINE_SPINLOCK(logbuf_lock);
96 96
97 #define LOG_BUF_MASK (log_buf_len-1) 97 #define LOG_BUF_MASK (log_buf_len-1)
98 #define LOG_BUF(idx) (log_buf[(idx) & LOG_BUF_MASK]) 98 #define LOG_BUF(idx) (log_buf[(idx) & LOG_BUF_MASK])
99 99
100 /* 100 /*
101 * The indices into log_buf are not constrained to log_buf_len - they 101 * The indices into log_buf are not constrained to log_buf_len - they
102 * must be masked before subscripting 102 * must be masked before subscripting
103 */ 103 */
104 static unsigned log_start; /* Index into log_buf: next char to be read by syslog() */ 104 static unsigned log_start; /* Index into log_buf: next char to be read by syslog() */
105 static unsigned con_start; /* Index into log_buf: next char to be sent to consoles */ 105 static unsigned con_start; /* Index into log_buf: next char to be sent to consoles */
106 static unsigned log_end; /* Index into log_buf: most-recently-written-char + 1 */ 106 static unsigned log_end; /* Index into log_buf: most-recently-written-char + 1 */
107 107
108 /* 108 /*
109 * Array of consoles built from command line options (console=) 109 * Array of consoles built from command line options (console=)
110 */ 110 */
111 struct console_cmdline 111 struct console_cmdline
112 { 112 {
113 char name[8]; /* Name of the driver */ 113 char name[8]; /* Name of the driver */
114 int index; /* Minor dev. to use */ 114 int index; /* Minor dev. to use */
115 char *options; /* Options for the driver */ 115 char *options; /* Options for the driver */
116 #ifdef CONFIG_A11Y_BRAILLE_CONSOLE 116 #ifdef CONFIG_A11Y_BRAILLE_CONSOLE
117 char *brl_options; /* Options for braille driver */ 117 char *brl_options; /* Options for braille driver */
118 #endif 118 #endif
119 }; 119 };
120 120
121 #define MAX_CMDLINECONSOLES 8 121 #define MAX_CMDLINECONSOLES 8
122 122
123 static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES]; 123 static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES];
124 static int selected_console = -1; 124 static int selected_console = -1;
125 static int preferred_console = -1; 125 static int preferred_console = -1;
126 int console_set_on_cmdline; 126 int console_set_on_cmdline;
127 EXPORT_SYMBOL(console_set_on_cmdline); 127 EXPORT_SYMBOL(console_set_on_cmdline);
128 128
129 /* Flag: console code may call schedule() */ 129 /* Flag: console code may call schedule() */
130 static int console_may_schedule; 130 static int console_may_schedule;
131 131
132 #ifdef CONFIG_PRINTK 132 #ifdef CONFIG_PRINTK
133 133
134 static char __log_buf[__LOG_BUF_LEN]; 134 static char __log_buf[__LOG_BUF_LEN];
135 static char *log_buf = __log_buf; 135 static char *log_buf = __log_buf;
136 static int log_buf_len = __LOG_BUF_LEN; 136 static int log_buf_len = __LOG_BUF_LEN;
137 static unsigned logged_chars; /* Number of chars produced since last read+clear operation */ 137 static unsigned logged_chars; /* Number of chars produced since last read+clear operation */
138 138
139 static int __init log_buf_len_setup(char *str) 139 static int __init log_buf_len_setup(char *str)
140 { 140 {
141 unsigned size = memparse(str, &str); 141 unsigned size = memparse(str, &str);
142 unsigned long flags; 142 unsigned long flags;
143 143
144 if (size) 144 if (size)
145 size = roundup_pow_of_two(size); 145 size = roundup_pow_of_two(size);
146 if (size > log_buf_len) { 146 if (size > log_buf_len) {
147 unsigned start, dest_idx, offset; 147 unsigned start, dest_idx, offset;
148 char *new_log_buf; 148 char *new_log_buf;
149 149
150 new_log_buf = alloc_bootmem(size); 150 new_log_buf = alloc_bootmem(size);
151 if (!new_log_buf) { 151 if (!new_log_buf) {
152 printk(KERN_WARNING "log_buf_len: allocation failed\n"); 152 printk(KERN_WARNING "log_buf_len: allocation failed\n");
153 goto out; 153 goto out;
154 } 154 }
155 155
156 spin_lock_irqsave(&logbuf_lock, flags); 156 spin_lock_irqsave(&logbuf_lock, flags);
157 log_buf_len = size; 157 log_buf_len = size;
158 log_buf = new_log_buf; 158 log_buf = new_log_buf;
159 159
160 offset = start = min(con_start, log_start); 160 offset = start = min(con_start, log_start);
161 dest_idx = 0; 161 dest_idx = 0;
162 while (start != log_end) { 162 while (start != log_end) {
163 log_buf[dest_idx] = __log_buf[start & (__LOG_BUF_LEN - 1)]; 163 log_buf[dest_idx] = __log_buf[start & (__LOG_BUF_LEN - 1)];
164 start++; 164 start++;
165 dest_idx++; 165 dest_idx++;
166 } 166 }
167 log_start -= offset; 167 log_start -= offset;
168 con_start -= offset; 168 con_start -= offset;
169 log_end -= offset; 169 log_end -= offset;
170 spin_unlock_irqrestore(&logbuf_lock, flags); 170 spin_unlock_irqrestore(&logbuf_lock, flags);
171 171
172 printk(KERN_NOTICE "log_buf_len: %d\n", log_buf_len); 172 printk(KERN_NOTICE "log_buf_len: %d\n", log_buf_len);
173 } 173 }
174 out: 174 out:
175 return 1; 175 return 1;
176 } 176 }
177 177
178 __setup("log_buf_len=", log_buf_len_setup); 178 __setup("log_buf_len=", log_buf_len_setup);
179 179
180 #ifdef CONFIG_BOOT_PRINTK_DELAY 180 #ifdef CONFIG_BOOT_PRINTK_DELAY
181 181
182 static unsigned int boot_delay; /* msecs delay after each printk during bootup */ 182 static unsigned int boot_delay; /* msecs delay after each printk during bootup */
183 static unsigned long long printk_delay_msec; /* per msec, based on boot_delay */ 183 static unsigned long long printk_delay_msec; /* per msec, based on boot_delay */
184 184
185 static int __init boot_delay_setup(char *str) 185 static int __init boot_delay_setup(char *str)
186 { 186 {
187 unsigned long lpj; 187 unsigned long lpj;
188 unsigned long long loops_per_msec; 188 unsigned long long loops_per_msec;
189 189
190 lpj = preset_lpj ? preset_lpj : 1000000; /* some guess */ 190 lpj = preset_lpj ? preset_lpj : 1000000; /* some guess */
191 loops_per_msec = (unsigned long long)lpj / 1000 * HZ; 191 loops_per_msec = (unsigned long long)lpj / 1000 * HZ;
192 192
193 get_option(&str, &boot_delay); 193 get_option(&str, &boot_delay);
194 if (boot_delay > 10 * 1000) 194 if (boot_delay > 10 * 1000)
195 boot_delay = 0; 195 boot_delay = 0;
196 196
197 printk_delay_msec = loops_per_msec; 197 printk_delay_msec = loops_per_msec;
198 printk(KERN_DEBUG "boot_delay: %u, preset_lpj: %ld, lpj: %lu, " 198 printk(KERN_DEBUG "boot_delay: %u, preset_lpj: %ld, lpj: %lu, "
199 "HZ: %d, printk_delay_msec: %llu\n", 199 "HZ: %d, printk_delay_msec: %llu\n",
200 boot_delay, preset_lpj, lpj, HZ, printk_delay_msec); 200 boot_delay, preset_lpj, lpj, HZ, printk_delay_msec);
201 return 1; 201 return 1;
202 } 202 }
203 __setup("boot_delay=", boot_delay_setup); 203 __setup("boot_delay=", boot_delay_setup);
204 204
205 static void boot_delay_msec(void) 205 static void boot_delay_msec(void)
206 { 206 {
207 unsigned long long k; 207 unsigned long long k;
208 unsigned long timeout; 208 unsigned long timeout;
209 209
210 if (boot_delay == 0 || system_state != SYSTEM_BOOTING) 210 if (boot_delay == 0 || system_state != SYSTEM_BOOTING)
211 return; 211 return;
212 212
213 k = (unsigned long long)printk_delay_msec * boot_delay; 213 k = (unsigned long long)printk_delay_msec * boot_delay;
214 214
215 timeout = jiffies + msecs_to_jiffies(boot_delay); 215 timeout = jiffies + msecs_to_jiffies(boot_delay);
216 while (k) { 216 while (k) {
217 k--; 217 k--;
218 cpu_relax(); 218 cpu_relax();
219 /* 219 /*
220 * use (volatile) jiffies to prevent 220 * use (volatile) jiffies to prevent
221 * compiler reduction; loop termination via jiffies 221 * compiler reduction; loop termination via jiffies
222 * is secondary and may or may not happen. 222 * is secondary and may or may not happen.
223 */ 223 */
224 if (time_after(jiffies, timeout)) 224 if (time_after(jiffies, timeout))
225 break; 225 break;
226 touch_nmi_watchdog(); 226 touch_nmi_watchdog();
227 } 227 }
228 } 228 }
229 #else 229 #else
230 static inline void boot_delay_msec(void) 230 static inline void boot_delay_msec(void)
231 { 231 {
232 } 232 }
233 #endif 233 #endif
234 234
235 /* 235 /*
236 * Return the number of unread characters in the log buffer. 236 * Return the number of unread characters in the log buffer.
237 */ 237 */
238 static int log_buf_get_len(void) 238 static int log_buf_get_len(void)
239 { 239 {
240 return logged_chars; 240 return logged_chars;
241 } 241 }
242 242
243 /* 243 /*
244 * Copy a range of characters from the log buffer. 244 * Copy a range of characters from the log buffer.
245 */ 245 */
246 int log_buf_copy(char *dest, int idx, int len) 246 int log_buf_copy(char *dest, int idx, int len)
247 { 247 {
248 int ret, max; 248 int ret, max;
249 bool took_lock = false; 249 bool took_lock = false;
250 250
251 if (!oops_in_progress) { 251 if (!oops_in_progress) {
252 spin_lock_irq(&logbuf_lock); 252 spin_lock_irq(&logbuf_lock);
253 took_lock = true; 253 took_lock = true;
254 } 254 }
255 255
256 max = log_buf_get_len(); 256 max = log_buf_get_len();
257 if (idx < 0 || idx >= max) { 257 if (idx < 0 || idx >= max) {
258 ret = -1; 258 ret = -1;
259 } else { 259 } else {
260 if (len > max) 260 if (len > max)
261 len = max; 261 len = max;
262 ret = len; 262 ret = len;
263 idx += (log_end - max); 263 idx += (log_end - max);
264 while (len-- > 0) 264 while (len-- > 0)
265 dest[len] = LOG_BUF(idx + len); 265 dest[len] = LOG_BUF(idx + len);
266 } 266 }
267 267
268 if (took_lock) 268 if (took_lock)
269 spin_unlock_irq(&logbuf_lock); 269 spin_unlock_irq(&logbuf_lock);
270 270
271 return ret; 271 return ret;
272 } 272 }
273 273
274 /* 274 /*
275 * Commands to do_syslog: 275 * Commands to do_syslog:
276 * 276 *
277 * 0 -- Close the log. Currently a NOP. 277 * 0 -- Close the log. Currently a NOP.
278 * 1 -- Open the log. Currently a NOP. 278 * 1 -- Open the log. Currently a NOP.
279 * 2 -- Read from the log. 279 * 2 -- Read from the log.
280 * 3 -- Read all messages remaining in the ring buffer. 280 * 3 -- Read all messages remaining in the ring buffer.
281 * 4 -- Read and clear all messages remaining in the ring buffer 281 * 4 -- Read and clear all messages remaining in the ring buffer
282 * 5 -- Clear ring buffer. 282 * 5 -- Clear ring buffer.
283 * 6 -- Disable printk's to console 283 * 6 -- Disable printk's to console
284 * 7 -- Enable printk's to console 284 * 7 -- Enable printk's to console
285 * 8 -- Set level of messages printed to console 285 * 8 -- Set level of messages printed to console
286 * 9 -- Return number of unread characters in the log buffer 286 * 9 -- Return number of unread characters in the log buffer
287 * 10 -- Return size of the log buffer 287 * 10 -- Return size of the log buffer
288 */ 288 */
289 int do_syslog(int type, char __user *buf, int len) 289 int do_syslog(int type, char __user *buf, int len)
290 { 290 {
291 unsigned i, j, limit, count; 291 unsigned i, j, limit, count;
292 int do_clear = 0; 292 int do_clear = 0;
293 char c; 293 char c;
294 int error = 0; 294 int error = 0;
295 295
296 error = security_syslog(type); 296 error = security_syslog(type);
297 if (error) 297 if (error)
298 return error; 298 return error;
299 299
300 switch (type) { 300 switch (type) {
301 case 0: /* Close log */ 301 case 0: /* Close log */
302 break; 302 break;
303 case 1: /* Open log */ 303 case 1: /* Open log */
304 break; 304 break;
305 case 2: /* Read from log */ 305 case 2: /* Read from log */
306 error = -EINVAL; 306 error = -EINVAL;
307 if (!buf || len < 0) 307 if (!buf || len < 0)
308 goto out; 308 goto out;
309 error = 0; 309 error = 0;
310 if (!len) 310 if (!len)
311 goto out; 311 goto out;
312 if (!access_ok(VERIFY_WRITE, buf, len)) { 312 if (!access_ok(VERIFY_WRITE, buf, len)) {
313 error = -EFAULT; 313 error = -EFAULT;
314 goto out; 314 goto out;
315 } 315 }
316 error = wait_event_interruptible(log_wait, 316 error = wait_event_interruptible(log_wait,
317 (log_start - log_end)); 317 (log_start - log_end));
318 if (error) 318 if (error)
319 goto out; 319 goto out;
320 i = 0; 320 i = 0;
321 spin_lock_irq(&logbuf_lock); 321 spin_lock_irq(&logbuf_lock);
322 while (!error && (log_start != log_end) && i < len) { 322 while (!error && (log_start != log_end) && i < len) {
323 c = LOG_BUF(log_start); 323 c = LOG_BUF(log_start);
324 log_start++; 324 log_start++;
325 spin_unlock_irq(&logbuf_lock); 325 spin_unlock_irq(&logbuf_lock);
326 error = __put_user(c,buf); 326 error = __put_user(c,buf);
327 buf++; 327 buf++;
328 i++; 328 i++;
329 cond_resched(); 329 cond_resched();
330 spin_lock_irq(&logbuf_lock); 330 spin_lock_irq(&logbuf_lock);
331 } 331 }
332 spin_unlock_irq(&logbuf_lock); 332 spin_unlock_irq(&logbuf_lock);
333 if (!error) 333 if (!error)
334 error = i; 334 error = i;
335 break; 335 break;
336 case 4: /* Read/clear last kernel messages */ 336 case 4: /* Read/clear last kernel messages */
337 do_clear = 1; 337 do_clear = 1;
338 /* FALL THRU */ 338 /* FALL THRU */
339 case 3: /* Read last kernel messages */ 339 case 3: /* Read last kernel messages */
340 error = -EINVAL; 340 error = -EINVAL;
341 if (!buf || len < 0) 341 if (!buf || len < 0)
342 goto out; 342 goto out;
343 error = 0; 343 error = 0;
344 if (!len) 344 if (!len)
345 goto out; 345 goto out;
346 if (!access_ok(VERIFY_WRITE, buf, len)) { 346 if (!access_ok(VERIFY_WRITE, buf, len)) {
347 error = -EFAULT; 347 error = -EFAULT;
348 goto out; 348 goto out;
349 } 349 }
350 count = len; 350 count = len;
351 if (count > log_buf_len) 351 if (count > log_buf_len)
352 count = log_buf_len; 352 count = log_buf_len;
353 spin_lock_irq(&logbuf_lock); 353 spin_lock_irq(&logbuf_lock);
354 if (count > logged_chars) 354 if (count > logged_chars)
355 count = logged_chars; 355 count = logged_chars;
356 if (do_clear) 356 if (do_clear)
357 logged_chars = 0; 357 logged_chars = 0;
358 limit = log_end; 358 limit = log_end;
359 /* 359 /*
360 * __put_user() could sleep, and while we sleep 360 * __put_user() could sleep, and while we sleep
361 * printk() could overwrite the messages 361 * printk() could overwrite the messages
362 * we try to copy to user space. Therefore 362 * we try to copy to user space. Therefore
363 * the messages are copied in reverse. <manfreds> 363 * the messages are copied in reverse. <manfreds>
364 */ 364 */
365 for (i = 0; i < count && !error; i++) { 365 for (i = 0; i < count && !error; i++) {
366 j = limit-1-i; 366 j = limit-1-i;
367 if (j + log_buf_len < log_end) 367 if (j + log_buf_len < log_end)
368 break; 368 break;
369 c = LOG_BUF(j); 369 c = LOG_BUF(j);
370 spin_unlock_irq(&logbuf_lock); 370 spin_unlock_irq(&logbuf_lock);
371 error = __put_user(c,&buf[count-1-i]); 371 error = __put_user(c,&buf[count-1-i]);
372 cond_resched(); 372 cond_resched();
373 spin_lock_irq(&logbuf_lock); 373 spin_lock_irq(&logbuf_lock);
374 } 374 }
375 spin_unlock_irq(&logbuf_lock); 375 spin_unlock_irq(&logbuf_lock);
376 if (error) 376 if (error)
377 break; 377 break;
378 error = i; 378 error = i;
379 if (i != count) { 379 if (i != count) {
380 int offset = count-error; 380 int offset = count-error;
381 /* buffer overflow during copy, correct user buffer. */ 381 /* buffer overflow during copy, correct user buffer. */
382 for (i = 0; i < error; i++) { 382 for (i = 0; i < error; i++) {
383 if (__get_user(c,&buf[i+offset]) || 383 if (__get_user(c,&buf[i+offset]) ||
384 __put_user(c,&buf[i])) { 384 __put_user(c,&buf[i])) {
385 error = -EFAULT; 385 error = -EFAULT;
386 break; 386 break;
387 } 387 }
388 cond_resched(); 388 cond_resched();
389 } 389 }
390 } 390 }
391 break; 391 break;
392 case 5: /* Clear ring buffer */ 392 case 5: /* Clear ring buffer */
393 logged_chars = 0; 393 logged_chars = 0;
394 break; 394 break;
395 case 6: /* Disable logging to console */ 395 case 6: /* Disable logging to console */
396 console_loglevel = minimum_console_loglevel; 396 console_loglevel = minimum_console_loglevel;
397 break; 397 break;
398 case 7: /* Enable logging to console */ 398 case 7: /* Enable logging to console */
399 console_loglevel = default_console_loglevel; 399 console_loglevel = default_console_loglevel;
400 break; 400 break;
401 case 8: /* Set level of messages printed to console */ 401 case 8: /* Set level of messages printed to console */
402 error = -EINVAL; 402 error = -EINVAL;
403 if (len < 1 || len > 8) 403 if (len < 1 || len > 8)
404 goto out; 404 goto out;
405 if (len < minimum_console_loglevel) 405 if (len < minimum_console_loglevel)
406 len = minimum_console_loglevel; 406 len = minimum_console_loglevel;
407 console_loglevel = len; 407 console_loglevel = len;
408 error = 0; 408 error = 0;
409 break; 409 break;
410 case 9: /* Number of chars in the log buffer */ 410 case 9: /* Number of chars in the log buffer */
411 error = log_end - log_start; 411 error = log_end - log_start;
412 break; 412 break;
413 case 10: /* Size of the log buffer */ 413 case 10: /* Size of the log buffer */
414 error = log_buf_len; 414 error = log_buf_len;
415 break; 415 break;
416 default: 416 default:
417 error = -EINVAL; 417 error = -EINVAL;
418 break; 418 break;
419 } 419 }
420 out: 420 out:
421 return error; 421 return error;
422 } 422 }
423 423
424 asmlinkage long sys_syslog(int type, char __user *buf, int len) 424 asmlinkage long sys_syslog(int type, char __user *buf, int len)
425 { 425 {
426 return do_syslog(type, buf, len); 426 return do_syslog(type, buf, len);
427 } 427 }
428 428
429 /* 429 /*
430 * Call the console drivers on a range of log_buf 430 * Call the console drivers on a range of log_buf
431 */ 431 */
432 static void __call_console_drivers(unsigned start, unsigned end) 432 static void __call_console_drivers(unsigned start, unsigned end)
433 { 433 {
434 struct console *con; 434 struct console *con;
435 435
436 for (con = console_drivers; con; con = con->next) { 436 for (con = console_drivers; con; con = con->next) {
437 if ((con->flags & CON_ENABLED) && con->write && 437 if ((con->flags & CON_ENABLED) && con->write &&
438 (cpu_online(smp_processor_id()) || 438 (cpu_online(smp_processor_id()) ||
439 (con->flags & CON_ANYTIME))) 439 (con->flags & CON_ANYTIME)))
440 con->write(con, &LOG_BUF(start), end - start); 440 con->write(con, &LOG_BUF(start), end - start);
441 } 441 }
442 } 442 }
443 443
444 static int __read_mostly ignore_loglevel; 444 static int __read_mostly ignore_loglevel;
445 445
446 static int __init ignore_loglevel_setup(char *str) 446 static int __init ignore_loglevel_setup(char *str)
447 { 447 {
448 ignore_loglevel = 1; 448 ignore_loglevel = 1;
449 printk(KERN_INFO "debug: ignoring loglevel setting.\n"); 449 printk(KERN_INFO "debug: ignoring loglevel setting.\n");
450 450
451 return 0; 451 return 0;
452 } 452 }
453 453
454 early_param("ignore_loglevel", ignore_loglevel_setup); 454 early_param("ignore_loglevel", ignore_loglevel_setup);
455 455
456 /* 456 /*
457 * Write out chars from start to end - 1 inclusive 457 * Write out chars from start to end - 1 inclusive
458 */ 458 */
459 static void _call_console_drivers(unsigned start, 459 static void _call_console_drivers(unsigned start,
460 unsigned end, int msg_log_level) 460 unsigned end, int msg_log_level)
461 { 461 {
462 if ((msg_log_level < console_loglevel || ignore_loglevel) && 462 if ((msg_log_level < console_loglevel || ignore_loglevel) &&
463 console_drivers && start != end) { 463 console_drivers && start != end) {
464 if ((start & LOG_BUF_MASK) > (end & LOG_BUF_MASK)) { 464 if ((start & LOG_BUF_MASK) > (end & LOG_BUF_MASK)) {
465 /* wrapped write */ 465 /* wrapped write */
466 __call_console_drivers(start & LOG_BUF_MASK, 466 __call_console_drivers(start & LOG_BUF_MASK,
467 log_buf_len); 467 log_buf_len);
468 __call_console_drivers(0, end & LOG_BUF_MASK); 468 __call_console_drivers(0, end & LOG_BUF_MASK);
469 } else { 469 } else {
470 __call_console_drivers(start, end); 470 __call_console_drivers(start, end);
471 } 471 }
472 } 472 }
473 } 473 }
474 474
475 /* 475 /*
476 * Call the console drivers, asking them to write out 476 * Call the console drivers, asking them to write out
477 * log_buf[start] to log_buf[end - 1]. 477 * log_buf[start] to log_buf[end - 1].
478 * The console_sem must be held. 478 * The console_sem must be held.
479 */ 479 */
480 static void call_console_drivers(unsigned start, unsigned end) 480 static void call_console_drivers(unsigned start, unsigned end)
481 { 481 {
482 unsigned cur_index, start_print; 482 unsigned cur_index, start_print;
483 static int msg_level = -1; 483 static int msg_level = -1;
484 484
485 BUG_ON(((int)(start - end)) > 0); 485 BUG_ON(((int)(start - end)) > 0);
486 486
487 cur_index = start; 487 cur_index = start;
488 start_print = start; 488 start_print = start;
489 while (cur_index != end) { 489 while (cur_index != end) {
490 if (msg_level < 0 && ((end - cur_index) > 2) && 490 if (msg_level < 0 && ((end - cur_index) > 2) &&
491 LOG_BUF(cur_index + 0) == '<' && 491 LOG_BUF(cur_index + 0) == '<' &&
492 LOG_BUF(cur_index + 1) >= '0' && 492 LOG_BUF(cur_index + 1) >= '0' &&
493 LOG_BUF(cur_index + 1) <= '7' && 493 LOG_BUF(cur_index + 1) <= '7' &&
494 LOG_BUF(cur_index + 2) == '>') { 494 LOG_BUF(cur_index + 2) == '>') {
495 msg_level = LOG_BUF(cur_index + 1) - '0'; 495 msg_level = LOG_BUF(cur_index + 1) - '0';
496 cur_index += 3; 496 cur_index += 3;
497 start_print = cur_index; 497 start_print = cur_index;
498 } 498 }
499 while (cur_index != end) { 499 while (cur_index != end) {
500 char c = LOG_BUF(cur_index); 500 char c = LOG_BUF(cur_index);
501 501
502 cur_index++; 502 cur_index++;
503 if (c == '\n') { 503 if (c == '\n') {
504 if (msg_level < 0) { 504 if (msg_level < 0) {
505 /* 505 /*
506 * printk() has already given us loglevel tags in 506 * printk() has already given us loglevel tags in
507 * the buffer. This code is here in case the 507 * the buffer. This code is here in case the
508 * log buffer has wrapped right round and scribbled 508 * log buffer has wrapped right round and scribbled
509 * on those tags 509 * on those tags
510 */ 510 */
511 msg_level = default_message_loglevel; 511 msg_level = default_message_loglevel;
512 } 512 }
513 _call_console_drivers(start_print, cur_index, msg_level); 513 _call_console_drivers(start_print, cur_index, msg_level);
514 msg_level = -1; 514 msg_level = -1;
515 start_print = cur_index; 515 start_print = cur_index;
516 break; 516 break;
517 } 517 }
518 } 518 }
519 } 519 }
520 _call_console_drivers(start_print, end, msg_level); 520 _call_console_drivers(start_print, end, msg_level);
521 } 521 }
522 522
523 static void emit_log_char(char c) 523 static void emit_log_char(char c)
524 { 524 {
525 LOG_BUF(log_end) = c; 525 LOG_BUF(log_end) = c;
526 log_end++; 526 log_end++;
527 if (log_end - log_start > log_buf_len) 527 if (log_end - log_start > log_buf_len)
528 log_start = log_end - log_buf_len; 528 log_start = log_end - log_buf_len;
529 if (log_end - con_start > log_buf_len) 529 if (log_end - con_start > log_buf_len)
530 con_start = log_end - log_buf_len; 530 con_start = log_end - log_buf_len;
531 if (logged_chars < log_buf_len) 531 if (logged_chars < log_buf_len)
532 logged_chars++; 532 logged_chars++;
533 } 533 }
534 534
535 /* 535 /*
536 * Zap console related locks when oopsing. Only zap at most once 536 * Zap console related locks when oopsing. Only zap at most once
537 * every 10 seconds, to leave time for slow consoles to print a 537 * every 10 seconds, to leave time for slow consoles to print a
538 * full oops. 538 * full oops.
539 */ 539 */
540 static void zap_locks(void) 540 static void zap_locks(void)
541 { 541 {
542 static unsigned long oops_timestamp; 542 static unsigned long oops_timestamp;
543 543
544 if (time_after_eq(jiffies, oops_timestamp) && 544 if (time_after_eq(jiffies, oops_timestamp) &&
545 !time_after(jiffies, oops_timestamp + 30 * HZ)) 545 !time_after(jiffies, oops_timestamp + 30 * HZ))
546 return; 546 return;
547 547
548 oops_timestamp = jiffies; 548 oops_timestamp = jiffies;
549 549
550 /* If a crash is occurring, make sure we can't deadlock */ 550 /* If a crash is occurring, make sure we can't deadlock */
551 spin_lock_init(&logbuf_lock); 551 spin_lock_init(&logbuf_lock);
552 /* And make sure that we print immediately */ 552 /* And make sure that we print immediately */
553 init_MUTEX(&console_sem); 553 init_MUTEX(&console_sem);
554 } 554 }
555 555
556 #if defined(CONFIG_PRINTK_TIME) 556 #if defined(CONFIG_PRINTK_TIME)
557 static int printk_time = 1; 557 static int printk_time = 1;
558 #else 558 #else
559 static int printk_time = 0; 559 static int printk_time = 0;
560 #endif 560 #endif
561 module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); 561 module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
562 562
563 /* Check if we have any console registered that can be called early in boot. */ 563 /* Check if we have any console registered that can be called early in boot. */
564 static int have_callable_console(void) 564 static int have_callable_console(void)
565 { 565 {
566 struct console *con; 566 struct console *con;
567 567
568 for (con = console_drivers; con; con = con->next) 568 for (con = console_drivers; con; con = con->next)
569 if (con->flags & CON_ANYTIME) 569 if (con->flags & CON_ANYTIME)
570 return 1; 570 return 1;
571 571
572 return 0; 572 return 0;
573 } 573 }
574 574
575 /** 575 /**
576 * printk - print a kernel message 576 * printk - print a kernel message
577 * @fmt: format string 577 * @fmt: format string
578 * 578 *
579 * This is printk(). It can be called from any context. We want it to work. 579 * This is printk(). It can be called from any context. We want it to work.
580 * Be aware of the fact that if oops_in_progress is not set, we might try to 580 * Be aware of the fact that if oops_in_progress is not set, we might try to
581 * wake klogd up which could deadlock on runqueue lock if printk() is called 581 * wake klogd up which could deadlock on runqueue lock if printk() is called
582 * from scheduler code. 582 * from scheduler code.
583 * 583 *
584 * We try to grab the console_sem. If we succeed, it's easy - we log the output and 584 * We try to grab the console_sem. If we succeed, it's easy - we log the output and
585 * call the console drivers. If we fail to get the semaphore we place the output 585 * call the console drivers. If we fail to get the semaphore we place the output
586 * into the log buffer and return. The current holder of the console_sem will 586 * into the log buffer and return. The current holder of the console_sem will
587 * notice the new output in release_console_sem() and will send it to the 587 * notice the new output in release_console_sem() and will send it to the
588 * consoles before releasing the semaphore. 588 * consoles before releasing the semaphore.
589 * 589 *
590 * One effect of this deferred printing is that code which calls printk() and 590 * One effect of this deferred printing is that code which calls printk() and
591 * then changes console_loglevel may break. This is because console_loglevel 591 * then changes console_loglevel may break. This is because console_loglevel
592 * is inspected when the actual printing occurs. 592 * is inspected when the actual printing occurs.
593 * 593 *
594 * See also: 594 * See also:
595 * printf(3) 595 * printf(3)
596 */ 596 */
597 597
598 asmlinkage int printk(const char *fmt, ...) 598 asmlinkage int printk(const char *fmt, ...)
599 { 599 {
600 va_list args; 600 va_list args;
601 int r; 601 int r;
602 602
603 va_start(args, fmt); 603 va_start(args, fmt);
604 r = vprintk(fmt, args); 604 r = vprintk(fmt, args);
605 va_end(args); 605 va_end(args);
606 606
607 return r; 607 return r;
608 } 608 }
609 609
610 /* cpu currently holding logbuf_lock */ 610 /* cpu currently holding logbuf_lock */
611 static volatile unsigned int printk_cpu = UINT_MAX; 611 static volatile unsigned int printk_cpu = UINT_MAX;
612 612
613 /* 613 /*
614 * Can we actually use the console at this time on this cpu? 614 * Can we actually use the console at this time on this cpu?
615 * 615 *
616 * Console drivers may assume that per-cpu resources have 616 * Console drivers may assume that per-cpu resources have
617 * been allocated. So unless they're explicitly marked as 617 * been allocated. So unless they're explicitly marked as
618 * being able to cope (CON_ANYTIME) don't call them until 618 * being able to cope (CON_ANYTIME) don't call them until
619 * this CPU is officially up. 619 * this CPU is officially up.
620 */ 620 */
621 static inline int can_use_console(unsigned int cpu) 621 static inline int can_use_console(unsigned int cpu)
622 { 622 {
623 return cpu_online(cpu) || have_callable_console(); 623 return cpu_online(cpu) || have_callable_console();
624 } 624 }
625 625
626 /* 626 /*
627 * Try to get console ownership to actually show the kernel 627 * Try to get console ownership to actually show the kernel
628 * messages from a 'printk'. Return true (and with the 628 * messages from a 'printk'. Return true (and with the
629 * console_semaphore held, and 'console_locked' set) if it 629 * console_semaphore held, and 'console_locked' set) if it
630 * is successful, false otherwise. 630 * is successful, false otherwise.
631 * 631 *
632 * This gets called with the 'logbuf_lock' spinlock held and 632 * This gets called with the 'logbuf_lock' spinlock held and
633 * interrupts disabled. It should return with 'lockbuf_lock' 633 * interrupts disabled. It should return with 'lockbuf_lock'
634 * released but interrupts still disabled. 634 * released but interrupts still disabled.
635 */ 635 */
636 static int acquire_console_semaphore_for_printk(unsigned int cpu) 636 static int acquire_console_semaphore_for_printk(unsigned int cpu)
637 { 637 {
638 int retval = 0; 638 int retval = 0;
639 639
640 if (!try_acquire_console_sem()) { 640 if (!try_acquire_console_sem()) {
641 retval = 1; 641 retval = 1;
642 642
643 /* 643 /*
644 * If we can't use the console, we need to release 644 * If we can't use the console, we need to release
645 * the console semaphore by hand to avoid flushing 645 * the console semaphore by hand to avoid flushing
646 * the buffer. We need to hold the console semaphore 646 * the buffer. We need to hold the console semaphore
647 * in order to do this test safely. 647 * in order to do this test safely.
648 */ 648 */
649 if (!can_use_console(cpu)) { 649 if (!can_use_console(cpu)) {
650 console_locked = 0; 650 console_locked = 0;
651 up(&console_sem); 651 up(&console_sem);
652 retval = 0; 652 retval = 0;
653 } 653 }
654 } 654 }
655 printk_cpu = UINT_MAX; 655 printk_cpu = UINT_MAX;
656 spin_unlock(&logbuf_lock); 656 spin_unlock(&logbuf_lock);
657 return retval; 657 return retval;
658 } 658 }
659 static const char recursion_bug_msg [] = 659 static const char recursion_bug_msg [] =
660 KERN_CRIT "BUG: recent printk recursion!\n"; 660 KERN_CRIT "BUG: recent printk recursion!\n";
661 static int recursion_bug; 661 static int recursion_bug;
662 static int new_text_line = 1; 662 static int new_text_line = 1;
663 static char printk_buf[1024]; 663 static char printk_buf[1024];
664 664
665 asmlinkage int vprintk(const char *fmt, va_list args) 665 asmlinkage int vprintk(const char *fmt, va_list args)
666 { 666 {
667 int printed_len = 0; 667 int printed_len = 0;
668 int current_log_level = default_message_loglevel; 668 int current_log_level = default_message_loglevel;
669 unsigned long flags; 669 unsigned long flags;
670 int this_cpu; 670 int this_cpu;
671 char *p; 671 char *p;
672 672
673 boot_delay_msec(); 673 boot_delay_msec();
674 674
675 preempt_disable(); 675 preempt_disable();
676 /* This stops the holder of console_sem just where we want him */ 676 /* This stops the holder of console_sem just where we want him */
677 raw_local_irq_save(flags); 677 raw_local_irq_save(flags);
678 this_cpu = smp_processor_id(); 678 this_cpu = smp_processor_id();
679 679
680 /* 680 /*
681 * Ouch, printk recursed into itself! 681 * Ouch, printk recursed into itself!
682 */ 682 */
683 if (unlikely(printk_cpu == this_cpu)) { 683 if (unlikely(printk_cpu == this_cpu)) {
684 /* 684 /*
685 * If a crash is occurring during printk() on this CPU, 685 * If a crash is occurring during printk() on this CPU,
686 * then try to get the crash message out but make sure 686 * then try to get the crash message out but make sure
687 * we can't deadlock. Otherwise just return to avoid the 687 * we can't deadlock. Otherwise just return to avoid the
688 * recursion and return - but flag the recursion so that 688 * recursion and return - but flag the recursion so that
689 * it can be printed at the next appropriate moment: 689 * it can be printed at the next appropriate moment:
690 */ 690 */
691 if (!oops_in_progress) { 691 if (!oops_in_progress) {
692 recursion_bug = 1; 692 recursion_bug = 1;
693 goto out_restore_irqs; 693 goto out_restore_irqs;
694 } 694 }
695 zap_locks(); 695 zap_locks();
696 } 696 }
697 697
698 lockdep_off(); 698 lockdep_off();
699 spin_lock(&logbuf_lock); 699 spin_lock(&logbuf_lock);
700 printk_cpu = this_cpu; 700 printk_cpu = this_cpu;
701 701
702 if (recursion_bug) { 702 if (recursion_bug) {
703 recursion_bug = 0; 703 recursion_bug = 0;
704 strcpy(printk_buf, recursion_bug_msg); 704 strcpy(printk_buf, recursion_bug_msg);
705 printed_len = sizeof(recursion_bug_msg); 705 printed_len = sizeof(recursion_bug_msg);
706 } 706 }
707 /* Emit the output into the temporary buffer */ 707 /* Emit the output into the temporary buffer */
708 printed_len += vscnprintf(printk_buf + printed_len, 708 printed_len += vscnprintf(printk_buf + printed_len,
709 sizeof(printk_buf) - printed_len, fmt, args); 709 sizeof(printk_buf) - printed_len, fmt, args);
710 710
711 711
712 /* 712 /*
713 * Copy the output into log_buf. If the caller didn't provide 713 * Copy the output into log_buf. If the caller didn't provide
714 * appropriate log level tags, we insert them here 714 * appropriate log level tags, we insert them here
715 */ 715 */
716 for (p = printk_buf; *p; p++) { 716 for (p = printk_buf; *p; p++) {
717 if (new_text_line) { 717 if (new_text_line) {
718 /* If a token, set current_log_level and skip over */ 718 /* If a token, set current_log_level and skip over */
719 if (p[0] == '<' && p[1] >= '0' && p[1] <= '7' && 719 if (p[0] == '<' && p[1] >= '0' && p[1] <= '7' &&
720 p[2] == '>') { 720 p[2] == '>') {
721 current_log_level = p[1] - '0'; 721 current_log_level = p[1] - '0';
722 p += 3; 722 p += 3;
723 printed_len -= 3; 723 printed_len -= 3;
724 } 724 }
725 725
726 /* Always output the token */ 726 /* Always output the token */
727 emit_log_char('<'); 727 emit_log_char('<');
728 emit_log_char(current_log_level + '0'); 728 emit_log_char(current_log_level + '0');
729 emit_log_char('>'); 729 emit_log_char('>');
730 printed_len += 3; 730 printed_len += 3;
731 new_text_line = 0; 731 new_text_line = 0;
732 732
733 if (printk_time) { 733 if (printk_time) {
734 /* Follow the token with the time */ 734 /* Follow the token with the time */
735 char tbuf[50], *tp; 735 char tbuf[50], *tp;
736 unsigned tlen; 736 unsigned tlen;
737 unsigned long long t; 737 unsigned long long t;
738 unsigned long nanosec_rem; 738 unsigned long nanosec_rem;
739 739
740 t = cpu_clock(printk_cpu); 740 t = cpu_clock(printk_cpu);
741 nanosec_rem = do_div(t, 1000000000); 741 nanosec_rem = do_div(t, 1000000000);
742 tlen = sprintf(tbuf, "[%5lu.%06lu] ", 742 tlen = sprintf(tbuf, "[%5lu.%06lu] ",
743 (unsigned long) t, 743 (unsigned long) t,
744 nanosec_rem / 1000); 744 nanosec_rem / 1000);
745 745
746 for (tp = tbuf; tp < tbuf + tlen; tp++) 746 for (tp = tbuf; tp < tbuf + tlen; tp++)
747 emit_log_char(*tp); 747 emit_log_char(*tp);
748 printed_len += tlen; 748 printed_len += tlen;
749 } 749 }
750 750
751 if (!*p) 751 if (!*p)
752 break; 752 break;
753 } 753 }
754 754
755 emit_log_char(*p); 755 emit_log_char(*p);
756 if (*p == '\n') 756 if (*p == '\n')
757 new_text_line = 1; 757 new_text_line = 1;
758 } 758 }
759 759
760 /* 760 /*
761 * Try to acquire and then immediately release the 761 * Try to acquire and then immediately release the
762 * console semaphore. The release will do all the 762 * console semaphore. The release will do all the
763 * actual magic (print out buffers, wake up klogd, 763 * actual magic (print out buffers, wake up klogd,
764 * etc). 764 * etc).
765 * 765 *
766 * The acquire_console_semaphore_for_printk() function 766 * The acquire_console_semaphore_for_printk() function
767 * will release 'logbuf_lock' regardless of whether it 767 * will release 'logbuf_lock' regardless of whether it
768 * actually gets the semaphore or not. 768 * actually gets the semaphore or not.
769 */ 769 */
770 if (acquire_console_semaphore_for_printk(this_cpu)) 770 if (acquire_console_semaphore_for_printk(this_cpu))
771 release_console_sem(); 771 release_console_sem();
772 772
773 lockdep_on(); 773 lockdep_on();
774 out_restore_irqs: 774 out_restore_irqs:
775 raw_local_irq_restore(flags); 775 raw_local_irq_restore(flags);
776 776
777 preempt_enable(); 777 preempt_enable();
778 return printed_len; 778 return printed_len;
779 } 779 }
780 EXPORT_SYMBOL(printk); 780 EXPORT_SYMBOL(printk);
781 EXPORT_SYMBOL(vprintk); 781 EXPORT_SYMBOL(vprintk);
782 782
783 #else 783 #else
784 784
785 asmlinkage long sys_syslog(int type, char __user *buf, int len) 785 asmlinkage long sys_syslog(int type, char __user *buf, int len)
786 { 786 {
787 return -ENOSYS; 787 return -ENOSYS;
788 } 788 }
789 789
790 static void call_console_drivers(unsigned start, unsigned end) 790 static void call_console_drivers(unsigned start, unsigned end)
791 { 791 {
792 } 792 }
793 793
794 #endif 794 #endif
795 795
796 static int __add_preferred_console(char *name, int idx, char *options, 796 static int __add_preferred_console(char *name, int idx, char *options,
797 char *brl_options) 797 char *brl_options)
798 { 798 {
799 struct console_cmdline *c; 799 struct console_cmdline *c;
800 int i; 800 int i;
801 801
802 /* 802 /*
803 * See if this tty is not yet registered, and 803 * See if this tty is not yet registered, and
804 * if we have a slot free. 804 * if we have a slot free.
805 */ 805 */
806 for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) 806 for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++)
807 if (strcmp(console_cmdline[i].name, name) == 0 && 807 if (strcmp(console_cmdline[i].name, name) == 0 &&
808 console_cmdline[i].index == idx) { 808 console_cmdline[i].index == idx) {
809 if (!brl_options) 809 if (!brl_options)
810 selected_console = i; 810 selected_console = i;
811 return 0; 811 return 0;
812 } 812 }
813 if (i == MAX_CMDLINECONSOLES) 813 if (i == MAX_CMDLINECONSOLES)
814 return -E2BIG; 814 return -E2BIG;
815 if (!brl_options) 815 if (!brl_options)
816 selected_console = i; 816 selected_console = i;
817 c = &console_cmdline[i]; 817 c = &console_cmdline[i];
818 strlcpy(c->name, name, sizeof(c->name)); 818 strlcpy(c->name, name, sizeof(c->name));
819 c->options = options; 819 c->options = options;
820 #ifdef CONFIG_A11Y_BRAILLE_CONSOLE 820 #ifdef CONFIG_A11Y_BRAILLE_CONSOLE
821 c->brl_options = brl_options; 821 c->brl_options = brl_options;
822 #endif 822 #endif
823 c->index = idx; 823 c->index = idx;
824 return 0; 824 return 0;
825 } 825 }
826 /* 826 /*
827 * Set up a list of consoles. Called from init/main.c 827 * Set up a list of consoles. Called from init/main.c
828 */ 828 */
829 static int __init console_setup(char *str) 829 static int __init console_setup(char *str)
830 { 830 {
831 char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for index */ 831 char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for index */
832 char *s, *options, *brl_options = NULL; 832 char *s, *options, *brl_options = NULL;
833 int idx; 833 int idx;
834 834
835 #ifdef CONFIG_A11Y_BRAILLE_CONSOLE 835 #ifdef CONFIG_A11Y_BRAILLE_CONSOLE
836 if (!memcmp(str, "brl,", 4)) { 836 if (!memcmp(str, "brl,", 4)) {
837 brl_options = ""; 837 brl_options = "";
838 str += 4; 838 str += 4;
839 } else if (!memcmp(str, "brl=", 4)) { 839 } else if (!memcmp(str, "brl=", 4)) {
840 brl_options = str + 4; 840 brl_options = str + 4;
841 str = strchr(brl_options, ','); 841 str = strchr(brl_options, ',');
842 if (!str) { 842 if (!str) {
843 printk(KERN_ERR "need port name after brl=\n"); 843 printk(KERN_ERR "need port name after brl=\n");
844 return 1; 844 return 1;
845 } 845 }
846 *(str++) = 0; 846 *(str++) = 0;
847 } 847 }
848 #endif 848 #endif
849 849
850 /* 850 /*
851 * Decode str into name, index, options. 851 * Decode str into name, index, options.
852 */ 852 */
853 if (str[0] >= '0' && str[0] <= '9') { 853 if (str[0] >= '0' && str[0] <= '9') {
854 strcpy(buf, "ttyS"); 854 strcpy(buf, "ttyS");
855 strncpy(buf + 4, str, sizeof(buf) - 5); 855 strncpy(buf + 4, str, sizeof(buf) - 5);
856 } else { 856 } else {
857 strncpy(buf, str, sizeof(buf) - 1); 857 strncpy(buf, str, sizeof(buf) - 1);
858 } 858 }
859 buf[sizeof(buf) - 1] = 0; 859 buf[sizeof(buf) - 1] = 0;
860 if ((options = strchr(str, ',')) != NULL) 860 if ((options = strchr(str, ',')) != NULL)
861 *(options++) = 0; 861 *(options++) = 0;
862 #ifdef __sparc__ 862 #ifdef __sparc__
863 if (!strcmp(str, "ttya")) 863 if (!strcmp(str, "ttya"))
864 strcpy(buf, "ttyS0"); 864 strcpy(buf, "ttyS0");
865 if (!strcmp(str, "ttyb")) 865 if (!strcmp(str, "ttyb"))
866 strcpy(buf, "ttyS1"); 866 strcpy(buf, "ttyS1");
867 #endif 867 #endif
868 for (s = buf; *s; s++) 868 for (s = buf; *s; s++)
869 if ((*s >= '0' && *s <= '9') || *s == ',') 869 if ((*s >= '0' && *s <= '9') || *s == ',')
870 break; 870 break;
871 idx = simple_strtoul(s, NULL, 10); 871 idx = simple_strtoul(s, NULL, 10);
872 *s = 0; 872 *s = 0;
873 873
874 __add_preferred_console(buf, idx, options, brl_options); 874 __add_preferred_console(buf, idx, options, brl_options);
875 console_set_on_cmdline = 1; 875 console_set_on_cmdline = 1;
876 return 1; 876 return 1;
877 } 877 }
878 __setup("console=", console_setup); 878 __setup("console=", console_setup);
879 879
880 /** 880 /**
881 * add_preferred_console - add a device to the list of preferred consoles. 881 * add_preferred_console - add a device to the list of preferred consoles.
882 * @name: device name 882 * @name: device name
883 * @idx: device index 883 * @idx: device index
884 * @options: options for this console 884 * @options: options for this console
885 * 885 *
886 * The last preferred console added will be used for kernel messages 886 * The last preferred console added will be used for kernel messages
887 * and stdin/out/err for init. Normally this is used by console_setup 887 * and stdin/out/err for init. Normally this is used by console_setup
888 * above to handle user-supplied console arguments; however it can also 888 * above to handle user-supplied console arguments; however it can also
889 * be used by arch-specific code either to override the user or more 889 * be used by arch-specific code either to override the user or more
890 * commonly to provide a default console (ie from PROM variables) when 890 * commonly to provide a default console (ie from PROM variables) when
891 * the user has not supplied one. 891 * the user has not supplied one.
892 */ 892 */
893 int add_preferred_console(char *name, int idx, char *options) 893 int add_preferred_console(char *name, int idx, char *options)
894 { 894 {
895 return __add_preferred_console(name, idx, options, NULL); 895 return __add_preferred_console(name, idx, options, NULL);
896 } 896 }
897 897
898 int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options) 898 int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options)
899 { 899 {
900 struct console_cmdline *c; 900 struct console_cmdline *c;
901 int i; 901 int i;
902 902
903 for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) 903 for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++)
904 if (strcmp(console_cmdline[i].name, name) == 0 && 904 if (strcmp(console_cmdline[i].name, name) == 0 &&
905 console_cmdline[i].index == idx) { 905 console_cmdline[i].index == idx) {
906 c = &console_cmdline[i]; 906 c = &console_cmdline[i];
907 strlcpy(c->name, name_new, sizeof(c->name)); 907 strlcpy(c->name, name_new, sizeof(c->name));
908 c->name[sizeof(c->name) - 1] = 0; 908 c->name[sizeof(c->name) - 1] = 0;
909 c->options = options; 909 c->options = options;
910 c->index = idx_new; 910 c->index = idx_new;
911 return i; 911 return i;
912 } 912 }
913 /* not found */ 913 /* not found */
914 return -1; 914 return -1;
915 } 915 }
916 916
917 int console_suspend_enabled = 1; 917 int console_suspend_enabled = 1;
918 EXPORT_SYMBOL(console_suspend_enabled); 918 EXPORT_SYMBOL(console_suspend_enabled);
919 919
920 static int __init console_suspend_disable(char *str) 920 static int __init console_suspend_disable(char *str)
921 { 921 {
922 console_suspend_enabled = 0; 922 console_suspend_enabled = 0;
923 return 1; 923 return 1;
924 } 924 }
925 __setup("no_console_suspend", console_suspend_disable); 925 __setup("no_console_suspend", console_suspend_disable);
926 926
927 /** 927 /**
928 * suspend_console - suspend the console subsystem 928 * suspend_console - suspend the console subsystem
929 * 929 *
930 * This disables printk() while we go into suspend states 930 * This disables printk() while we go into suspend states
931 */ 931 */
932 void suspend_console(void) 932 void suspend_console(void)
933 { 933 {
934 if (!console_suspend_enabled) 934 if (!console_suspend_enabled)
935 return; 935 return;
936 printk("Suspending console(s) (use no_console_suspend to debug)\n"); 936 printk("Suspending console(s) (use no_console_suspend to debug)\n");
937 acquire_console_sem(); 937 acquire_console_sem();
938 console_suspended = 1; 938 console_suspended = 1;
939 } 939 }
940 940
941 void resume_console(void) 941 void resume_console(void)
942 { 942 {
943 if (!console_suspend_enabled) 943 if (!console_suspend_enabled)
944 return; 944 return;
945 console_suspended = 0; 945 console_suspended = 0;
946 release_console_sem(); 946 release_console_sem();
947 } 947 }
948 948
949 /** 949 /**
950 * acquire_console_sem - lock the console system for exclusive use. 950 * acquire_console_sem - lock the console system for exclusive use.
951 * 951 *
952 * Acquires a semaphore which guarantees that the caller has 952 * Acquires a semaphore which guarantees that the caller has
953 * exclusive access to the console system and the console_drivers list. 953 * exclusive access to the console system and the console_drivers list.
954 * 954 *
955 * Can sleep, returns nothing. 955 * Can sleep, returns nothing.
956 */ 956 */
957 void acquire_console_sem(void) 957 void acquire_console_sem(void)
958 { 958 {
959 BUG_ON(in_interrupt()); 959 BUG_ON(in_interrupt());
960 if (console_suspended) { 960 if (console_suspended) {
961 down(&secondary_console_sem); 961 down(&secondary_console_sem);
962 return; 962 return;
963 } 963 }
964 down(&console_sem); 964 down(&console_sem);
965 console_locked = 1; 965 console_locked = 1;
966 console_may_schedule = 1; 966 console_may_schedule = 1;
967 } 967 }
968 EXPORT_SYMBOL(acquire_console_sem); 968 EXPORT_SYMBOL(acquire_console_sem);
969 969
970 int try_acquire_console_sem(void) 970 int try_acquire_console_sem(void)
971 { 971 {
972 if (down_trylock(&console_sem)) 972 if (down_trylock(&console_sem))
973 return -1; 973 return -1;
974 console_locked = 1; 974 console_locked = 1;
975 console_may_schedule = 0; 975 console_may_schedule = 0;
976 return 0; 976 return 0;
977 } 977 }
978 EXPORT_SYMBOL(try_acquire_console_sem); 978 EXPORT_SYMBOL(try_acquire_console_sem);
979 979
980 int is_console_locked(void) 980 int is_console_locked(void)
981 { 981 {
982 return console_locked; 982 return console_locked;
983 } 983 }
984 984
985 void wake_up_klogd(void) 985 static DEFINE_PER_CPU(int, printk_pending);
986
987 void printk_tick(void)
986 { 988 {
987 if (!oops_in_progress && waitqueue_active(&log_wait)) 989 if (__get_cpu_var(printk_pending)) {
990 __get_cpu_var(printk_pending) = 0;
988 wake_up_interruptible(&log_wait); 991 wake_up_interruptible(&log_wait);
992 }
993 }
994
995 int printk_needs_cpu(int cpu)
996 {
997 return per_cpu(printk_pending, cpu);
998 }
999
1000 void wake_up_klogd(void)
1001 {
1002 if (waitqueue_active(&log_wait))
1003 __get_cpu_var(printk_pending) = 1;
989 } 1004 }
990 1005
991 /** 1006 /**
992 * release_console_sem - unlock the console system 1007 * release_console_sem - unlock the console system
993 * 1008 *
994 * Releases the semaphore which the caller holds on the console system 1009 * Releases the semaphore which the caller holds on the console system
995 * and the console driver list. 1010 * and the console driver list.
996 * 1011 *
997 * While the semaphore was held, console output may have been buffered 1012 * While the semaphore was held, console output may have been buffered
998 * by printk(). If this is the case, release_console_sem() emits 1013 * by printk(). If this is the case, release_console_sem() emits
999 * the output prior to releasing the semaphore. 1014 * the output prior to releasing the semaphore.
1000 * 1015 *
1001 * If there is output waiting for klogd, we wake it up. 1016 * If there is output waiting for klogd, we wake it up.
1002 * 1017 *
1003 * release_console_sem() may be called from any context. 1018 * release_console_sem() may be called from any context.
1004 */ 1019 */
1005 void release_console_sem(void) 1020 void release_console_sem(void)
1006 { 1021 {
1007 unsigned long flags; 1022 unsigned long flags;
1008 unsigned _con_start, _log_end; 1023 unsigned _con_start, _log_end;
1009 unsigned wake_klogd = 0; 1024 unsigned wake_klogd = 0;
1010 1025
1011 if (console_suspended) { 1026 if (console_suspended) {
1012 up(&secondary_console_sem); 1027 up(&secondary_console_sem);
1013 return; 1028 return;
1014 } 1029 }
1015 1030
1016 console_may_schedule = 0; 1031 console_may_schedule = 0;
1017 1032
1018 for ( ; ; ) { 1033 for ( ; ; ) {
1019 spin_lock_irqsave(&logbuf_lock, flags); 1034 spin_lock_irqsave(&logbuf_lock, flags);
1020 wake_klogd |= log_start - log_end; 1035 wake_klogd |= log_start - log_end;
1021 if (con_start == log_end) 1036 if (con_start == log_end)
1022 break; /* Nothing to print */ 1037 break; /* Nothing to print */
1023 _con_start = con_start; 1038 _con_start = con_start;
1024 _log_end = log_end; 1039 _log_end = log_end;
1025 con_start = log_end; /* Flush */ 1040 con_start = log_end; /* Flush */
1026 spin_unlock(&logbuf_lock); 1041 spin_unlock(&logbuf_lock);
1027 stop_critical_timings(); /* don't trace print latency */ 1042 stop_critical_timings(); /* don't trace print latency */
1028 call_console_drivers(_con_start, _log_end); 1043 call_console_drivers(_con_start, _log_end);
1029 start_critical_timings(); 1044 start_critical_timings();
1030 local_irq_restore(flags); 1045 local_irq_restore(flags);
1031 } 1046 }
1032 console_locked = 0; 1047 console_locked = 0;
1033 up(&console_sem); 1048 up(&console_sem);
1034 spin_unlock_irqrestore(&logbuf_lock, flags); 1049 spin_unlock_irqrestore(&logbuf_lock, flags);
1035 if (wake_klogd) 1050 if (wake_klogd)
1036 wake_up_klogd(); 1051 wake_up_klogd();
1037 } 1052 }
1038 EXPORT_SYMBOL(release_console_sem); 1053 EXPORT_SYMBOL(release_console_sem);
1039 1054
1040 /** 1055 /**
1041 * console_conditional_schedule - yield the CPU if required 1056 * console_conditional_schedule - yield the CPU if required
1042 * 1057 *
1043 * If the console code is currently allowed to sleep, and 1058 * If the console code is currently allowed to sleep, and
1044 * if this CPU should yield the CPU to another task, do 1059 * if this CPU should yield the CPU to another task, do
1045 * so here. 1060 * so here.
1046 * 1061 *
1047 * Must be called within acquire_console_sem(). 1062 * Must be called within acquire_console_sem().
1048 */ 1063 */
1049 void __sched console_conditional_schedule(void) 1064 void __sched console_conditional_schedule(void)
1050 { 1065 {
1051 if (console_may_schedule) 1066 if (console_may_schedule)
1052 cond_resched(); 1067 cond_resched();
1053 } 1068 }
1054 EXPORT_SYMBOL(console_conditional_schedule); 1069 EXPORT_SYMBOL(console_conditional_schedule);
1055 1070
1056 void console_print(const char *s) 1071 void console_print(const char *s)
1057 { 1072 {
1058 printk(KERN_EMERG "%s", s); 1073 printk(KERN_EMERG "%s", s);
1059 } 1074 }
1060 EXPORT_SYMBOL(console_print); 1075 EXPORT_SYMBOL(console_print);
1061 1076
1062 void console_unblank(void) 1077 void console_unblank(void)
1063 { 1078 {
1064 struct console *c; 1079 struct console *c;
1065 1080
1066 /* 1081 /*
1067 * console_unblank can no longer be called in interrupt context unless 1082 * console_unblank can no longer be called in interrupt context unless
1068 * oops_in_progress is set to 1.. 1083 * oops_in_progress is set to 1..
1069 */ 1084 */
1070 if (oops_in_progress) { 1085 if (oops_in_progress) {
1071 if (down_trylock(&console_sem) != 0) 1086 if (down_trylock(&console_sem) != 0)
1072 return; 1087 return;
1073 } else 1088 } else
1074 acquire_console_sem(); 1089 acquire_console_sem();
1075 1090
1076 console_locked = 1; 1091 console_locked = 1;
1077 console_may_schedule = 0; 1092 console_may_schedule = 0;
1078 for (c = console_drivers; c != NULL; c = c->next) 1093 for (c = console_drivers; c != NULL; c = c->next)
1079 if ((c->flags & CON_ENABLED) && c->unblank) 1094 if ((c->flags & CON_ENABLED) && c->unblank)
1080 c->unblank(); 1095 c->unblank();
1081 release_console_sem(); 1096 release_console_sem();
1082 } 1097 }
1083 1098
1084 /* 1099 /*
1085 * Return the console tty driver structure and its associated index 1100 * Return the console tty driver structure and its associated index
1086 */ 1101 */
1087 struct tty_driver *console_device(int *index) 1102 struct tty_driver *console_device(int *index)
1088 { 1103 {
1089 struct console *c; 1104 struct console *c;
1090 struct tty_driver *driver = NULL; 1105 struct tty_driver *driver = NULL;
1091 1106
1092 acquire_console_sem(); 1107 acquire_console_sem();
1093 for (c = console_drivers; c != NULL; c = c->next) { 1108 for (c = console_drivers; c != NULL; c = c->next) {
1094 if (!c->device) 1109 if (!c->device)
1095 continue; 1110 continue;
1096 driver = c->device(c, index); 1111 driver = c->device(c, index);
1097 if (driver) 1112 if (driver)
1098 break; 1113 break;
1099 } 1114 }
1100 release_console_sem(); 1115 release_console_sem();
1101 return driver; 1116 return driver;
1102 } 1117 }
1103 1118
1104 /* 1119 /*
1105 * Prevent further output on the passed console device so that (for example) 1120 * Prevent further output on the passed console device so that (for example)
1106 * serial drivers can disable console output before suspending a port, and can 1121 * serial drivers can disable console output before suspending a port, and can
1107 * re-enable output afterwards. 1122 * re-enable output afterwards.
1108 */ 1123 */
1109 void console_stop(struct console *console) 1124 void console_stop(struct console *console)
1110 { 1125 {
1111 acquire_console_sem(); 1126 acquire_console_sem();
1112 console->flags &= ~CON_ENABLED; 1127 console->flags &= ~CON_ENABLED;
1113 release_console_sem(); 1128 release_console_sem();
1114 } 1129 }
1115 EXPORT_SYMBOL(console_stop); 1130 EXPORT_SYMBOL(console_stop);
1116 1131
1117 void console_start(struct console *console) 1132 void console_start(struct console *console)
1118 { 1133 {
1119 acquire_console_sem(); 1134 acquire_console_sem();
1120 console->flags |= CON_ENABLED; 1135 console->flags |= CON_ENABLED;
1121 release_console_sem(); 1136 release_console_sem();
1122 } 1137 }
1123 EXPORT_SYMBOL(console_start); 1138 EXPORT_SYMBOL(console_start);
1124 1139
1125 /* 1140 /*
1126 * The console driver calls this routine during kernel initialization 1141 * The console driver calls this routine during kernel initialization
1127 * to register the console printing procedure with printk() and to 1142 * to register the console printing procedure with printk() and to
1128 * print any messages that were printed by the kernel before the 1143 * print any messages that were printed by the kernel before the
1129 * console driver was initialized. 1144 * console driver was initialized.
1130 */ 1145 */
1131 void register_console(struct console *console) 1146 void register_console(struct console *console)
1132 { 1147 {
1133 int i; 1148 int i;
1134 unsigned long flags; 1149 unsigned long flags;
1135 struct console *bootconsole = NULL; 1150 struct console *bootconsole = NULL;
1136 1151
1137 if (console_drivers) { 1152 if (console_drivers) {
1138 if (console->flags & CON_BOOT) 1153 if (console->flags & CON_BOOT)
1139 return; 1154 return;
1140 if (console_drivers->flags & CON_BOOT) 1155 if (console_drivers->flags & CON_BOOT)
1141 bootconsole = console_drivers; 1156 bootconsole = console_drivers;
1142 } 1157 }
1143 1158
1144 if (preferred_console < 0 || bootconsole || !console_drivers) 1159 if (preferred_console < 0 || bootconsole || !console_drivers)
1145 preferred_console = selected_console; 1160 preferred_console = selected_console;
1146 1161
1147 if (console->early_setup) 1162 if (console->early_setup)
1148 console->early_setup(); 1163 console->early_setup();
1149 1164
1150 /* 1165 /*
1151 * See if we want to use this console driver. If we 1166 * See if we want to use this console driver. If we
1152 * didn't select a console we take the first one 1167 * didn't select a console we take the first one
1153 * that registers here. 1168 * that registers here.
1154 */ 1169 */
1155 if (preferred_console < 0) { 1170 if (preferred_console < 0) {
1156 if (console->index < 0) 1171 if (console->index < 0)
1157 console->index = 0; 1172 console->index = 0;
1158 if (console->setup == NULL || 1173 if (console->setup == NULL ||
1159 console->setup(console, NULL) == 0) { 1174 console->setup(console, NULL) == 0) {
1160 console->flags |= CON_ENABLED; 1175 console->flags |= CON_ENABLED;
1161 if (console->device) { 1176 if (console->device) {
1162 console->flags |= CON_CONSDEV; 1177 console->flags |= CON_CONSDEV;
1163 preferred_console = 0; 1178 preferred_console = 0;
1164 } 1179 }
1165 } 1180 }
1166 } 1181 }
1167 1182
1168 /* 1183 /*
1169 * See if this console matches one we selected on 1184 * See if this console matches one we selected on
1170 * the command line. 1185 * the command line.
1171 */ 1186 */
1172 for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; 1187 for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0];
1173 i++) { 1188 i++) {
1174 if (strcmp(console_cmdline[i].name, console->name) != 0) 1189 if (strcmp(console_cmdline[i].name, console->name) != 0)
1175 continue; 1190 continue;
1176 if (console->index >= 0 && 1191 if (console->index >= 0 &&
1177 console->index != console_cmdline[i].index) 1192 console->index != console_cmdline[i].index)
1178 continue; 1193 continue;
1179 if (console->index < 0) 1194 if (console->index < 0)
1180 console->index = console_cmdline[i].index; 1195 console->index = console_cmdline[i].index;
1181 #ifdef CONFIG_A11Y_BRAILLE_CONSOLE 1196 #ifdef CONFIG_A11Y_BRAILLE_CONSOLE
1182 if (console_cmdline[i].brl_options) { 1197 if (console_cmdline[i].brl_options) {
1183 console->flags |= CON_BRL; 1198 console->flags |= CON_BRL;
1184 braille_register_console(console, 1199 braille_register_console(console,
1185 console_cmdline[i].index, 1200 console_cmdline[i].index,
1186 console_cmdline[i].options, 1201 console_cmdline[i].options,
1187 console_cmdline[i].brl_options); 1202 console_cmdline[i].brl_options);
1188 return; 1203 return;
1189 } 1204 }
1190 #endif 1205 #endif
1191 if (console->setup && 1206 if (console->setup &&
1192 console->setup(console, console_cmdline[i].options) != 0) 1207 console->setup(console, console_cmdline[i].options) != 0)
1193 break; 1208 break;
1194 console->flags |= CON_ENABLED; 1209 console->flags |= CON_ENABLED;
1195 console->index = console_cmdline[i].index; 1210 console->index = console_cmdline[i].index;
1196 if (i == selected_console) { 1211 if (i == selected_console) {
1197 console->flags |= CON_CONSDEV; 1212 console->flags |= CON_CONSDEV;
1198 preferred_console = selected_console; 1213 preferred_console = selected_console;
1199 } 1214 }
1200 break; 1215 break;
1201 } 1216 }
1202 1217
1203 if (!(console->flags & CON_ENABLED)) 1218 if (!(console->flags & CON_ENABLED))
1204 return; 1219 return;
1205 1220
1206 if (bootconsole && (console->flags & CON_CONSDEV)) { 1221 if (bootconsole && (console->flags & CON_CONSDEV)) {
1207 printk(KERN_INFO "console handover: boot [%s%d] -> real [%s%d]\n", 1222 printk(KERN_INFO "console handover: boot [%s%d] -> real [%s%d]\n",
1208 bootconsole->name, bootconsole->index, 1223 bootconsole->name, bootconsole->index,
1209 console->name, console->index); 1224 console->name, console->index);
1210 unregister_console(bootconsole); 1225 unregister_console(bootconsole);
1211 console->flags &= ~CON_PRINTBUFFER; 1226 console->flags &= ~CON_PRINTBUFFER;
1212 } else { 1227 } else {
1213 printk(KERN_INFO "console [%s%d] enabled\n", 1228 printk(KERN_INFO "console [%s%d] enabled\n",
1214 console->name, console->index); 1229 console->name, console->index);
1215 } 1230 }
1216 1231
1217 /* 1232 /*
1218 * Put this console in the list - keep the 1233 * Put this console in the list - keep the
1219 * preferred driver at the head of the list. 1234 * preferred driver at the head of the list.
1220 */ 1235 */
1221 acquire_console_sem(); 1236 acquire_console_sem();
1222 if ((console->flags & CON_CONSDEV) || console_drivers == NULL) { 1237 if ((console->flags & CON_CONSDEV) || console_drivers == NULL) {
1223 console->next = console_drivers; 1238 console->next = console_drivers;
1224 console_drivers = console; 1239 console_drivers = console;
1225 if (console->next) 1240 if (console->next)
1226 console->next->flags &= ~CON_CONSDEV; 1241 console->next->flags &= ~CON_CONSDEV;
1227 } else { 1242 } else {
1228 console->next = console_drivers->next; 1243 console->next = console_drivers->next;
1229 console_drivers->next = console; 1244 console_drivers->next = console;
1230 } 1245 }
1231 if (console->flags & CON_PRINTBUFFER) { 1246 if (console->flags & CON_PRINTBUFFER) {
1232 /* 1247 /*
1233 * release_console_sem() will print out the buffered messages 1248 * release_console_sem() will print out the buffered messages
1234 * for us. 1249 * for us.
1235 */ 1250 */
1236 spin_lock_irqsave(&logbuf_lock, flags); 1251 spin_lock_irqsave(&logbuf_lock, flags);
1237 con_start = log_start; 1252 con_start = log_start;
1238 spin_unlock_irqrestore(&logbuf_lock, flags); 1253 spin_unlock_irqrestore(&logbuf_lock, flags);
1239 } 1254 }
1240 release_console_sem(); 1255 release_console_sem();
1241 } 1256 }
1242 EXPORT_SYMBOL(register_console); 1257 EXPORT_SYMBOL(register_console);
1243 1258
1244 int unregister_console(struct console *console) 1259 int unregister_console(struct console *console)
1245 { 1260 {
1246 struct console *a, *b; 1261 struct console *a, *b;
1247 int res = 1; 1262 int res = 1;
1248 1263
1249 #ifdef CONFIG_A11Y_BRAILLE_CONSOLE 1264 #ifdef CONFIG_A11Y_BRAILLE_CONSOLE
1250 if (console->flags & CON_BRL) 1265 if (console->flags & CON_BRL)
1251 return braille_unregister_console(console); 1266 return braille_unregister_console(console);
1252 #endif 1267 #endif
1253 1268
1254 acquire_console_sem(); 1269 acquire_console_sem();
1255 if (console_drivers == console) { 1270 if (console_drivers == console) {
1256 console_drivers=console->next; 1271 console_drivers=console->next;
1257 res = 0; 1272 res = 0;
1258 } else if (console_drivers) { 1273 } else if (console_drivers) {
1259 for (a=console_drivers->next, b=console_drivers ; 1274 for (a=console_drivers->next, b=console_drivers ;
1260 a; b=a, a=b->next) { 1275 a; b=a, a=b->next) {
1261 if (a == console) { 1276 if (a == console) {
1262 b->next = a->next; 1277 b->next = a->next;
1263 res = 0; 1278 res = 0;
1264 break; 1279 break;
1265 } 1280 }
1266 } 1281 }
1267 } 1282 }
1268 1283
1269 /* 1284 /*
1270 * If this isn't the last console and it has CON_CONSDEV set, we 1285 * If this isn't the last console and it has CON_CONSDEV set, we
1271 * need to set it on the next preferred console. 1286 * need to set it on the next preferred console.
1272 */ 1287 */
1273 if (console_drivers != NULL && console->flags & CON_CONSDEV) 1288 if (console_drivers != NULL && console->flags & CON_CONSDEV)
1274 console_drivers->flags |= CON_CONSDEV; 1289 console_drivers->flags |= CON_CONSDEV;
1275 1290
1276 release_console_sem(); 1291 release_console_sem();
1277 return res; 1292 return res;
1278 } 1293 }
1279 EXPORT_SYMBOL(unregister_console); 1294 EXPORT_SYMBOL(unregister_console);
1280 1295
1281 static int __init disable_boot_consoles(void) 1296 static int __init disable_boot_consoles(void)
1282 { 1297 {
1283 if (console_drivers != NULL) { 1298 if (console_drivers != NULL) {
1284 if (console_drivers->flags & CON_BOOT) { 1299 if (console_drivers->flags & CON_BOOT) {
1285 printk(KERN_INFO "turn off boot console %s%d\n", 1300 printk(KERN_INFO "turn off boot console %s%d\n",
1286 console_drivers->name, console_drivers->index); 1301 console_drivers->name, console_drivers->index);
1287 return unregister_console(console_drivers); 1302 return unregister_console(console_drivers);
1288 } 1303 }
1289 } 1304 }
1290 return 0; 1305 return 0;
1291 } 1306 }
1292 late_initcall(disable_boot_consoles); 1307 late_initcall(disable_boot_consoles);
1293 1308
1294 /** 1309 /**
1295 * tty_write_message - write a message to a certain tty, not just the console. 1310 * tty_write_message - write a message to a certain tty, not just the console.
1296 * @tty: the destination tty_struct 1311 * @tty: the destination tty_struct
1297 * @msg: the message to write 1312 * @msg: the message to write
1298 * 1313 *
1299 * This is used for messages that need to be redirected to a specific tty. 1314 * This is used for messages that need to be redirected to a specific tty.
1300 * We don't put it into the syslog queue right now maybe in the future if 1315 * We don't put it into the syslog queue right now maybe in the future if
1301 * really needed. 1316 * really needed.
1302 */ 1317 */
1303 void tty_write_message(struct tty_struct *tty, char *msg) 1318 void tty_write_message(struct tty_struct *tty, char *msg)
1304 { 1319 {
1305 if (tty && tty->ops->write) 1320 if (tty && tty->ops->write)
1306 tty->ops->write(tty, msg, strlen(msg)); 1321 tty->ops->write(tty, msg, strlen(msg));
1307 return; 1322 return;
1308 } 1323 }
1309 1324
1310 #if defined CONFIG_PRINTK 1325 #if defined CONFIG_PRINTK
1311 1326
1312 /* 1327 /*
1313 * printk rate limiting, lifted from the networking subsystem. 1328 * printk rate limiting, lifted from the networking subsystem.
1314 * 1329 *
1315 * This enforces a rate limit: not more than 10 kernel messages 1330 * This enforces a rate limit: not more than 10 kernel messages
1316 * every 5s to make a denial-of-service attack impossible. 1331 * every 5s to make a denial-of-service attack impossible.
1317 */ 1332 */
1318 DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10); 1333 DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10);
1319 1334
1320 int printk_ratelimit(void) 1335 int printk_ratelimit(void)
1321 { 1336 {
1322 return __ratelimit(&printk_ratelimit_state); 1337 return __ratelimit(&printk_ratelimit_state);
1323 } 1338 }
1324 EXPORT_SYMBOL(printk_ratelimit); 1339 EXPORT_SYMBOL(printk_ratelimit);
1325 1340
1326 /** 1341 /**
1327 * printk_timed_ratelimit - caller-controlled printk ratelimiting 1342 * printk_timed_ratelimit - caller-controlled printk ratelimiting
1328 * @caller_jiffies: pointer to caller's state 1343 * @caller_jiffies: pointer to caller's state
1329 * @interval_msecs: minimum interval between prints 1344 * @interval_msecs: minimum interval between prints
1330 * 1345 *
1331 * printk_timed_ratelimit() returns true if more than @interval_msecs 1346 * printk_timed_ratelimit() returns true if more than @interval_msecs
1332 * milliseconds have elapsed since the last time printk_timed_ratelimit() 1347 * milliseconds have elapsed since the last time printk_timed_ratelimit()
1333 * returned true. 1348 * returned true.
1334 */ 1349 */
1335 bool printk_timed_ratelimit(unsigned long *caller_jiffies, 1350 bool printk_timed_ratelimit(unsigned long *caller_jiffies,
1336 unsigned int interval_msecs) 1351 unsigned int interval_msecs)
1337 { 1352 {
1338 if (*caller_jiffies == 0 || time_after(jiffies, *caller_jiffies)) { 1353 if (*caller_jiffies == 0 || time_after(jiffies, *caller_jiffies)) {
1339 *caller_jiffies = jiffies + msecs_to_jiffies(interval_msecs); 1354 *caller_jiffies = jiffies + msecs_to_jiffies(interval_msecs);
1340 return true; 1355 return true;
1341 } 1356 }
1342 return false; 1357 return false;
1343 } 1358 }
1344 EXPORT_SYMBOL(printk_timed_ratelimit); 1359 EXPORT_SYMBOL(printk_timed_ratelimit);
1345 #endif 1360 #endif
1346 1361
kernel/time/tick-sched.c
1 /* 1 /*
2 * linux/kernel/time/tick-sched.c 2 * linux/kernel/time/tick-sched.c
3 * 3 *
4 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> 4 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
5 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar 5 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
6 * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner 6 * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner
7 * 7 *
8 * No idle tick implementation for low and high resolution timers 8 * No idle tick implementation for low and high resolution timers
9 * 9 *
10 * Started by: Thomas Gleixner and Ingo Molnar 10 * Started by: Thomas Gleixner and Ingo Molnar
11 * 11 *
12 * Distribute under GPLv2. 12 * Distribute under GPLv2.
13 */ 13 */
14 #include <linux/cpu.h> 14 #include <linux/cpu.h>
15 #include <linux/err.h> 15 #include <linux/err.h>
16 #include <linux/hrtimer.h> 16 #include <linux/hrtimer.h>
17 #include <linux/interrupt.h> 17 #include <linux/interrupt.h>
18 #include <linux/kernel_stat.h> 18 #include <linux/kernel_stat.h>
19 #include <linux/percpu.h> 19 #include <linux/percpu.h>
20 #include <linux/profile.h> 20 #include <linux/profile.h>
21 #include <linux/sched.h> 21 #include <linux/sched.h>
22 #include <linux/tick.h> 22 #include <linux/tick.h>
23 23
24 #include <asm/irq_regs.h> 24 #include <asm/irq_regs.h>
25 25
26 #include "tick-internal.h" 26 #include "tick-internal.h"
27 27
28 /* 28 /*
29 * Per cpu nohz control structure 29 * Per cpu nohz control structure
30 */ 30 */
31 static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); 31 static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
32 32
33 /* 33 /*
34 * The time, when the last jiffy update happened. Protected by xtime_lock. 34 * The time, when the last jiffy update happened. Protected by xtime_lock.
35 */ 35 */
36 static ktime_t last_jiffies_update; 36 static ktime_t last_jiffies_update;
37 37
38 struct tick_sched *tick_get_tick_sched(int cpu) 38 struct tick_sched *tick_get_tick_sched(int cpu)
39 { 39 {
40 return &per_cpu(tick_cpu_sched, cpu); 40 return &per_cpu(tick_cpu_sched, cpu);
41 } 41 }
42 42
43 /* 43 /*
44 * Must be called with interrupts disabled ! 44 * Must be called with interrupts disabled !
45 */ 45 */
46 static void tick_do_update_jiffies64(ktime_t now) 46 static void tick_do_update_jiffies64(ktime_t now)
47 { 47 {
48 unsigned long ticks = 0; 48 unsigned long ticks = 0;
49 ktime_t delta; 49 ktime_t delta;
50 50
51 /* 51 /*
52 * Do a quick check without holding xtime_lock: 52 * Do a quick check without holding xtime_lock:
53 */ 53 */
54 delta = ktime_sub(now, last_jiffies_update); 54 delta = ktime_sub(now, last_jiffies_update);
55 if (delta.tv64 < tick_period.tv64) 55 if (delta.tv64 < tick_period.tv64)
56 return; 56 return;
57 57
58 /* Reevalute with xtime_lock held */ 58 /* Reevalute with xtime_lock held */
59 write_seqlock(&xtime_lock); 59 write_seqlock(&xtime_lock);
60 60
61 delta = ktime_sub(now, last_jiffies_update); 61 delta = ktime_sub(now, last_jiffies_update);
62 if (delta.tv64 >= tick_period.tv64) { 62 if (delta.tv64 >= tick_period.tv64) {
63 63
64 delta = ktime_sub(delta, tick_period); 64 delta = ktime_sub(delta, tick_period);
65 last_jiffies_update = ktime_add(last_jiffies_update, 65 last_jiffies_update = ktime_add(last_jiffies_update,
66 tick_period); 66 tick_period);
67 67
68 /* Slow path for long timeouts */ 68 /* Slow path for long timeouts */
69 if (unlikely(delta.tv64 >= tick_period.tv64)) { 69 if (unlikely(delta.tv64 >= tick_period.tv64)) {
70 s64 incr = ktime_to_ns(tick_period); 70 s64 incr = ktime_to_ns(tick_period);
71 71
72 ticks = ktime_divns(delta, incr); 72 ticks = ktime_divns(delta, incr);
73 73
74 last_jiffies_update = ktime_add_ns(last_jiffies_update, 74 last_jiffies_update = ktime_add_ns(last_jiffies_update,
75 incr * ticks); 75 incr * ticks);
76 } 76 }
77 do_timer(++ticks); 77 do_timer(++ticks);
78 } 78 }
79 write_sequnlock(&xtime_lock); 79 write_sequnlock(&xtime_lock);
80 } 80 }
81 81
82 /* 82 /*
83 * Initialize and return retrieve the jiffies update. 83 * Initialize and return retrieve the jiffies update.
84 */ 84 */
85 static ktime_t tick_init_jiffy_update(void) 85 static ktime_t tick_init_jiffy_update(void)
86 { 86 {
87 ktime_t period; 87 ktime_t period;
88 88
89 write_seqlock(&xtime_lock); 89 write_seqlock(&xtime_lock);
90 /* Did we start the jiffies update yet ? */ 90 /* Did we start the jiffies update yet ? */
91 if (last_jiffies_update.tv64 == 0) 91 if (last_jiffies_update.tv64 == 0)
92 last_jiffies_update = tick_next_period; 92 last_jiffies_update = tick_next_period;
93 period = last_jiffies_update; 93 period = last_jiffies_update;
94 write_sequnlock(&xtime_lock); 94 write_sequnlock(&xtime_lock);
95 return period; 95 return period;
96 } 96 }
97 97
98 /* 98 /*
99 * NOHZ - aka dynamic tick functionality 99 * NOHZ - aka dynamic tick functionality
100 */ 100 */
101 #ifdef CONFIG_NO_HZ 101 #ifdef CONFIG_NO_HZ
102 /* 102 /*
103 * NO HZ enabled ? 103 * NO HZ enabled ?
104 */ 104 */
105 static int tick_nohz_enabled __read_mostly = 1; 105 static int tick_nohz_enabled __read_mostly = 1;
106 106
107 /* 107 /*
108 * Enable / Disable tickless mode 108 * Enable / Disable tickless mode
109 */ 109 */
110 static int __init setup_tick_nohz(char *str) 110 static int __init setup_tick_nohz(char *str)
111 { 111 {
112 if (!strcmp(str, "off")) 112 if (!strcmp(str, "off"))
113 tick_nohz_enabled = 0; 113 tick_nohz_enabled = 0;
114 else if (!strcmp(str, "on")) 114 else if (!strcmp(str, "on"))
115 tick_nohz_enabled = 1; 115 tick_nohz_enabled = 1;
116 else 116 else
117 return 0; 117 return 0;
118 return 1; 118 return 1;
119 } 119 }
120 120
121 __setup("nohz=", setup_tick_nohz); 121 __setup("nohz=", setup_tick_nohz);
122 122
123 /** 123 /**
124 * tick_nohz_update_jiffies - update jiffies when idle was interrupted 124 * tick_nohz_update_jiffies - update jiffies when idle was interrupted
125 * 125 *
126 * Called from interrupt entry when the CPU was idle 126 * Called from interrupt entry when the CPU was idle
127 * 127 *
128 * In case the sched_tick was stopped on this CPU, we have to check if jiffies 128 * In case the sched_tick was stopped on this CPU, we have to check if jiffies
129 * must be updated. Otherwise an interrupt handler could use a stale jiffy 129 * must be updated. Otherwise an interrupt handler could use a stale jiffy
130 * value. We do this unconditionally on any cpu, as we don't know whether the 130 * value. We do this unconditionally on any cpu, as we don't know whether the
131 * cpu, which has the update task assigned is in a long sleep. 131 * cpu, which has the update task assigned is in a long sleep.
132 */ 132 */
133 void tick_nohz_update_jiffies(void) 133 void tick_nohz_update_jiffies(void)
134 { 134 {
135 int cpu = smp_processor_id(); 135 int cpu = smp_processor_id();
136 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 136 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
137 unsigned long flags; 137 unsigned long flags;
138 ktime_t now; 138 ktime_t now;
139 139
140 if (!ts->tick_stopped) 140 if (!ts->tick_stopped)
141 return; 141 return;
142 142
143 cpu_clear(cpu, nohz_cpu_mask); 143 cpu_clear(cpu, nohz_cpu_mask);
144 now = ktime_get(); 144 now = ktime_get();
145 ts->idle_waketime = now; 145 ts->idle_waketime = now;
146 146
147 local_irq_save(flags); 147 local_irq_save(flags);
148 tick_do_update_jiffies64(now); 148 tick_do_update_jiffies64(now);
149 local_irq_restore(flags); 149 local_irq_restore(flags);
150 150
151 touch_softlockup_watchdog(); 151 touch_softlockup_watchdog();
152 } 152 }
153 153
154 void tick_nohz_stop_idle(int cpu) 154 void tick_nohz_stop_idle(int cpu)
155 { 155 {
156 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 156 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
157 157
158 if (ts->idle_active) { 158 if (ts->idle_active) {
159 ktime_t now, delta; 159 ktime_t now, delta;
160 now = ktime_get(); 160 now = ktime_get();
161 delta = ktime_sub(now, ts->idle_entrytime); 161 delta = ktime_sub(now, ts->idle_entrytime);
162 ts->idle_lastupdate = now; 162 ts->idle_lastupdate = now;
163 ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); 163 ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
164 ts->idle_active = 0; 164 ts->idle_active = 0;
165 } 165 }
166 } 166 }
167 167
168 static ktime_t tick_nohz_start_idle(struct tick_sched *ts) 168 static ktime_t tick_nohz_start_idle(struct tick_sched *ts)
169 { 169 {
170 ktime_t now, delta; 170 ktime_t now, delta;
171 171
172 now = ktime_get(); 172 now = ktime_get();
173 if (ts->idle_active) { 173 if (ts->idle_active) {
174 delta = ktime_sub(now, ts->idle_entrytime); 174 delta = ktime_sub(now, ts->idle_entrytime);
175 ts->idle_lastupdate = now; 175 ts->idle_lastupdate = now;
176 ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); 176 ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
177 } 177 }
178 ts->idle_entrytime = now; 178 ts->idle_entrytime = now;
179 ts->idle_active = 1; 179 ts->idle_active = 1;
180 return now; 180 return now;
181 } 181 }
182 182
183 u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) 183 u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
184 { 184 {
185 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 185 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
186 186
187 *last_update_time = ktime_to_us(ts->idle_lastupdate); 187 *last_update_time = ktime_to_us(ts->idle_lastupdate);
188 return ktime_to_us(ts->idle_sleeptime); 188 return ktime_to_us(ts->idle_sleeptime);
189 } 189 }
190 190
191 /** 191 /**
192 * tick_nohz_stop_sched_tick - stop the idle tick from the idle task 192 * tick_nohz_stop_sched_tick - stop the idle tick from the idle task
193 * 193 *
194 * When the next event is more than a tick into the future, stop the idle tick 194 * When the next event is more than a tick into the future, stop the idle tick
195 * Called either from the idle loop or from irq_exit() when an idle period was 195 * Called either from the idle loop or from irq_exit() when an idle period was
196 * just interrupted by an interrupt which did not cause a reschedule. 196 * just interrupted by an interrupt which did not cause a reschedule.
197 */ 197 */
198 void tick_nohz_stop_sched_tick(int inidle) 198 void tick_nohz_stop_sched_tick(int inidle)
199 { 199 {
200 unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags; 200 unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
201 struct tick_sched *ts; 201 struct tick_sched *ts;
202 ktime_t last_update, expires, now; 202 ktime_t last_update, expires, now;
203 struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; 203 struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
204 int cpu; 204 int cpu;
205 205
206 local_irq_save(flags); 206 local_irq_save(flags);
207 207
208 cpu = smp_processor_id(); 208 cpu = smp_processor_id();
209 ts = &per_cpu(tick_cpu_sched, cpu); 209 ts = &per_cpu(tick_cpu_sched, cpu);
210 now = tick_nohz_start_idle(ts); 210 now = tick_nohz_start_idle(ts);
211 211
212 /* 212 /*
213 * If this cpu is offline and it is the one which updates 213 * If this cpu is offline and it is the one which updates
214 * jiffies, then give up the assignment and let it be taken by 214 * jiffies, then give up the assignment and let it be taken by
215 * the cpu which runs the tick timer next. If we don't drop 215 * the cpu which runs the tick timer next. If we don't drop
216 * this here the jiffies might be stale and do_timer() never 216 * this here the jiffies might be stale and do_timer() never
217 * invoked. 217 * invoked.
218 */ 218 */
219 if (unlikely(!cpu_online(cpu))) { 219 if (unlikely(!cpu_online(cpu))) {
220 if (cpu == tick_do_timer_cpu) 220 if (cpu == tick_do_timer_cpu)
221 tick_do_timer_cpu = -1; 221 tick_do_timer_cpu = -1;
222 } 222 }
223 223
224 if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) 224 if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
225 goto end; 225 goto end;
226 226
227 if (!inidle && !ts->inidle) 227 if (!inidle && !ts->inidle)
228 goto end; 228 goto end;
229 229
230 ts->inidle = 1; 230 ts->inidle = 1;
231 231
232 if (need_resched()) 232 if (need_resched())
233 goto end; 233 goto end;
234 234
235 if (unlikely(local_softirq_pending())) { 235 if (unlikely(local_softirq_pending())) {
236 static int ratelimit; 236 static int ratelimit;
237 237
238 if (ratelimit < 10) { 238 if (ratelimit < 10) {
239 printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", 239 printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
240 local_softirq_pending()); 240 local_softirq_pending());
241 ratelimit++; 241 ratelimit++;
242 } 242 }
243 goto end; 243 goto end;
244 } 244 }
245 245
246 ts->idle_calls++; 246 ts->idle_calls++;
247 /* Read jiffies and the time when jiffies were updated last */ 247 /* Read jiffies and the time when jiffies were updated last */
248 do { 248 do {
249 seq = read_seqbegin(&xtime_lock); 249 seq = read_seqbegin(&xtime_lock);
250 last_update = last_jiffies_update; 250 last_update = last_jiffies_update;
251 last_jiffies = jiffies; 251 last_jiffies = jiffies;
252 } while (read_seqretry(&xtime_lock, seq)); 252 } while (read_seqretry(&xtime_lock, seq));
253 253
254 /* Get the next timer wheel timer */ 254 /* Get the next timer wheel timer */
255 next_jiffies = get_next_timer_interrupt(last_jiffies); 255 next_jiffies = get_next_timer_interrupt(last_jiffies);
256 delta_jiffies = next_jiffies - last_jiffies; 256 delta_jiffies = next_jiffies - last_jiffies;
257 257
258 if (rcu_needs_cpu(cpu)) 258 if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu))
259 delta_jiffies = 1; 259 delta_jiffies = 1;
260 /* 260 /*
261 * Do not stop the tick, if we are only one off 261 * Do not stop the tick, if we are only one off
262 * or if the cpu is required for rcu 262 * or if the cpu is required for rcu
263 */ 263 */
264 if (!ts->tick_stopped && delta_jiffies == 1) 264 if (!ts->tick_stopped && delta_jiffies == 1)
265 goto out; 265 goto out;
266 266
267 /* Schedule the tick, if we are at least one jiffie off */ 267 /* Schedule the tick, if we are at least one jiffie off */
268 if ((long)delta_jiffies >= 1) { 268 if ((long)delta_jiffies >= 1) {
269 269
270 if (delta_jiffies > 1) 270 if (delta_jiffies > 1)
271 cpu_set(cpu, nohz_cpu_mask); 271 cpu_set(cpu, nohz_cpu_mask);
272 /* 272 /*
273 * nohz_stop_sched_tick can be called several times before 273 * nohz_stop_sched_tick can be called several times before
274 * the nohz_restart_sched_tick is called. This happens when 274 * the nohz_restart_sched_tick is called. This happens when
275 * interrupts arrive which do not cause a reschedule. In the 275 * interrupts arrive which do not cause a reschedule. In the
276 * first call we save the current tick time, so we can restart 276 * first call we save the current tick time, so we can restart
277 * the scheduler tick in nohz_restart_sched_tick. 277 * the scheduler tick in nohz_restart_sched_tick.
278 */ 278 */
279 if (!ts->tick_stopped) { 279 if (!ts->tick_stopped) {
280 if (select_nohz_load_balancer(1)) { 280 if (select_nohz_load_balancer(1)) {
281 /* 281 /*
282 * sched tick not stopped! 282 * sched tick not stopped!
283 */ 283 */
284 cpu_clear(cpu, nohz_cpu_mask); 284 cpu_clear(cpu, nohz_cpu_mask);
285 goto out; 285 goto out;
286 } 286 }
287 287
288 ts->idle_tick = ts->sched_timer.expires; 288 ts->idle_tick = ts->sched_timer.expires;
289 ts->tick_stopped = 1; 289 ts->tick_stopped = 1;
290 ts->idle_jiffies = last_jiffies; 290 ts->idle_jiffies = last_jiffies;
291 rcu_enter_nohz(); 291 rcu_enter_nohz();
292 sched_clock_tick_stop(cpu); 292 sched_clock_tick_stop(cpu);
293 } 293 }
294 294
295 /* 295 /*
296 * If this cpu is the one which updates jiffies, then 296 * If this cpu is the one which updates jiffies, then
297 * give up the assignment and let it be taken by the 297 * give up the assignment and let it be taken by the
298 * cpu which runs the tick timer next, which might be 298 * cpu which runs the tick timer next, which might be
299 * this cpu as well. If we don't drop this here the 299 * this cpu as well. If we don't drop this here the
300 * jiffies might be stale and do_timer() never 300 * jiffies might be stale and do_timer() never
301 * invoked. 301 * invoked.
302 */ 302 */
303 if (cpu == tick_do_timer_cpu) 303 if (cpu == tick_do_timer_cpu)
304 tick_do_timer_cpu = -1; 304 tick_do_timer_cpu = -1;
305 305
306 ts->idle_sleeps++; 306 ts->idle_sleeps++;
307 307
308 /* 308 /*
309 * delta_jiffies >= NEXT_TIMER_MAX_DELTA signals that 309 * delta_jiffies >= NEXT_TIMER_MAX_DELTA signals that
310 * there is no timer pending or at least extremly far 310 * there is no timer pending or at least extremly far
311 * into the future (12 days for HZ=1000). In this case 311 * into the future (12 days for HZ=1000). In this case
312 * we simply stop the tick timer: 312 * we simply stop the tick timer:
313 */ 313 */
314 if (unlikely(delta_jiffies >= NEXT_TIMER_MAX_DELTA)) { 314 if (unlikely(delta_jiffies >= NEXT_TIMER_MAX_DELTA)) {
315 ts->idle_expires.tv64 = KTIME_MAX; 315 ts->idle_expires.tv64 = KTIME_MAX;
316 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) 316 if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
317 hrtimer_cancel(&ts->sched_timer); 317 hrtimer_cancel(&ts->sched_timer);
318 goto out; 318 goto out;
319 } 319 }
320 320
321 /* 321 /*
322 * calculate the expiry time for the next timer wheel 322 * calculate the expiry time for the next timer wheel
323 * timer 323 * timer
324 */ 324 */
325 expires = ktime_add_ns(last_update, tick_period.tv64 * 325 expires = ktime_add_ns(last_update, tick_period.tv64 *
326 delta_jiffies); 326 delta_jiffies);
327 ts->idle_expires = expires; 327 ts->idle_expires = expires;
328 328
329 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { 329 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
330 hrtimer_start(&ts->sched_timer, expires, 330 hrtimer_start(&ts->sched_timer, expires,
331 HRTIMER_MODE_ABS); 331 HRTIMER_MODE_ABS);
332 /* Check, if the timer was already in the past */ 332 /* Check, if the timer was already in the past */
333 if (hrtimer_active(&ts->sched_timer)) 333 if (hrtimer_active(&ts->sched_timer))
334 goto out; 334 goto out;
335 } else if (!tick_program_event(expires, 0)) 335 } else if (!tick_program_event(expires, 0))
336 goto out; 336 goto out;
337 /* 337 /*
338 * We are past the event already. So we crossed a 338 * We are past the event already. So we crossed a
339 * jiffie boundary. Update jiffies and raise the 339 * jiffie boundary. Update jiffies and raise the
340 * softirq. 340 * softirq.
341 */ 341 */
342 tick_do_update_jiffies64(ktime_get()); 342 tick_do_update_jiffies64(ktime_get());
343 cpu_clear(cpu, nohz_cpu_mask); 343 cpu_clear(cpu, nohz_cpu_mask);
344 } 344 }
345 raise_softirq_irqoff(TIMER_SOFTIRQ); 345 raise_softirq_irqoff(TIMER_SOFTIRQ);
346 out: 346 out:
347 ts->next_jiffies = next_jiffies; 347 ts->next_jiffies = next_jiffies;
348 ts->last_jiffies = last_jiffies; 348 ts->last_jiffies = last_jiffies;
349 ts->sleep_length = ktime_sub(dev->next_event, now); 349 ts->sleep_length = ktime_sub(dev->next_event, now);
350 end: 350 end:
351 local_irq_restore(flags); 351 local_irq_restore(flags);
352 } 352 }
353 353
354 /** 354 /**
355 * tick_nohz_get_sleep_length - return the length of the current sleep 355 * tick_nohz_get_sleep_length - return the length of the current sleep
356 * 356 *
357 * Called from power state control code with interrupts disabled 357 * Called from power state control code with interrupts disabled
358 */ 358 */
359 ktime_t tick_nohz_get_sleep_length(void) 359 ktime_t tick_nohz_get_sleep_length(void)
360 { 360 {
361 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); 361 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
362 362
363 return ts->sleep_length; 363 return ts->sleep_length;
364 } 364 }
365 365
366 /** 366 /**
367 * tick_nohz_restart_sched_tick - restart the idle tick from the idle task 367 * tick_nohz_restart_sched_tick - restart the idle tick from the idle task
368 * 368 *
369 * Restart the idle tick when the CPU is woken up from idle 369 * Restart the idle tick when the CPU is woken up from idle
370 */ 370 */
371 void tick_nohz_restart_sched_tick(void) 371 void tick_nohz_restart_sched_tick(void)
372 { 372 {
373 int cpu = smp_processor_id(); 373 int cpu = smp_processor_id();
374 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 374 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
375 unsigned long ticks; 375 unsigned long ticks;
376 ktime_t now; 376 ktime_t now;
377 377
378 local_irq_disable(); 378 local_irq_disable();
379 tick_nohz_stop_idle(cpu); 379 tick_nohz_stop_idle(cpu);
380 380
381 if (!ts->inidle || !ts->tick_stopped) { 381 if (!ts->inidle || !ts->tick_stopped) {
382 ts->inidle = 0; 382 ts->inidle = 0;
383 local_irq_enable(); 383 local_irq_enable();
384 return; 384 return;
385 } 385 }
386 386
387 ts->inidle = 0; 387 ts->inidle = 0;
388 388
389 rcu_exit_nohz(); 389 rcu_exit_nohz();
390 390
391 /* Update jiffies first */ 391 /* Update jiffies first */
392 select_nohz_load_balancer(0); 392 select_nohz_load_balancer(0);
393 now = ktime_get(); 393 now = ktime_get();
394 tick_do_update_jiffies64(now); 394 tick_do_update_jiffies64(now);
395 sched_clock_tick_start(cpu); 395 sched_clock_tick_start(cpu);
396 cpu_clear(cpu, nohz_cpu_mask); 396 cpu_clear(cpu, nohz_cpu_mask);
397 397
398 /* 398 /*
399 * We stopped the tick in idle. Update process times would miss the 399 * We stopped the tick in idle. Update process times would miss the
400 * time we slept as update_process_times does only a 1 tick 400 * time we slept as update_process_times does only a 1 tick
401 * accounting. Enforce that this is accounted to idle ! 401 * accounting. Enforce that this is accounted to idle !
402 */ 402 */
403 ticks = jiffies - ts->idle_jiffies; 403 ticks = jiffies - ts->idle_jiffies;
404 /* 404 /*
405 * We might be one off. Do not randomly account a huge number of ticks! 405 * We might be one off. Do not randomly account a huge number of ticks!
406 */ 406 */
407 if (ticks && ticks < LONG_MAX) { 407 if (ticks && ticks < LONG_MAX) {
408 add_preempt_count(HARDIRQ_OFFSET); 408 add_preempt_count(HARDIRQ_OFFSET);
409 account_system_time(current, HARDIRQ_OFFSET, 409 account_system_time(current, HARDIRQ_OFFSET,
410 jiffies_to_cputime(ticks)); 410 jiffies_to_cputime(ticks));
411 sub_preempt_count(HARDIRQ_OFFSET); 411 sub_preempt_count(HARDIRQ_OFFSET);
412 } 412 }
413 413
414 touch_softlockup_watchdog(); 414 touch_softlockup_watchdog();
415 /* 415 /*
416 * Cancel the scheduled timer and restore the tick 416 * Cancel the scheduled timer and restore the tick
417 */ 417 */
418 ts->tick_stopped = 0; 418 ts->tick_stopped = 0;
419 ts->idle_exittime = now; 419 ts->idle_exittime = now;
420 hrtimer_cancel(&ts->sched_timer); 420 hrtimer_cancel(&ts->sched_timer);
421 ts->sched_timer.expires = ts->idle_tick; 421 ts->sched_timer.expires = ts->idle_tick;
422 422
423 while (1) { 423 while (1) {
424 /* Forward the time to expire in the future */ 424 /* Forward the time to expire in the future */
425 hrtimer_forward(&ts->sched_timer, now, tick_period); 425 hrtimer_forward(&ts->sched_timer, now, tick_period);
426 426
427 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { 427 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
428 hrtimer_start(&ts->sched_timer, 428 hrtimer_start(&ts->sched_timer,
429 ts->sched_timer.expires, 429 ts->sched_timer.expires,
430 HRTIMER_MODE_ABS); 430 HRTIMER_MODE_ABS);
431 /* Check, if the timer was already in the past */ 431 /* Check, if the timer was already in the past */
432 if (hrtimer_active(&ts->sched_timer)) 432 if (hrtimer_active(&ts->sched_timer))
433 break; 433 break;
434 } else { 434 } else {
435 if (!tick_program_event(ts->sched_timer.expires, 0)) 435 if (!tick_program_event(ts->sched_timer.expires, 0))
436 break; 436 break;
437 } 437 }
438 /* Update jiffies and reread time */ 438 /* Update jiffies and reread time */
439 tick_do_update_jiffies64(now); 439 tick_do_update_jiffies64(now);
440 now = ktime_get(); 440 now = ktime_get();
441 } 441 }
442 local_irq_enable(); 442 local_irq_enable();
443 } 443 }
444 444
445 static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now) 445 static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now)
446 { 446 {
447 hrtimer_forward(&ts->sched_timer, now, tick_period); 447 hrtimer_forward(&ts->sched_timer, now, tick_period);
448 return tick_program_event(ts->sched_timer.expires, 0); 448 return tick_program_event(ts->sched_timer.expires, 0);
449 } 449 }
450 450
451 /* 451 /*
452 * The nohz low res interrupt handler 452 * The nohz low res interrupt handler
453 */ 453 */
454 static void tick_nohz_handler(struct clock_event_device *dev) 454 static void tick_nohz_handler(struct clock_event_device *dev)
455 { 455 {
456 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); 456 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
457 struct pt_regs *regs = get_irq_regs(); 457 struct pt_regs *regs = get_irq_regs();
458 int cpu = smp_processor_id(); 458 int cpu = smp_processor_id();
459 ktime_t now = ktime_get(); 459 ktime_t now = ktime_get();
460 460
461 dev->next_event.tv64 = KTIME_MAX; 461 dev->next_event.tv64 = KTIME_MAX;
462 462
463 /* 463 /*
464 * Check if the do_timer duty was dropped. We don't care about 464 * Check if the do_timer duty was dropped. We don't care about
465 * concurrency: This happens only when the cpu in charge went 465 * concurrency: This happens only when the cpu in charge went
466 * into a long sleep. If two cpus happen to assign themself to 466 * into a long sleep. If two cpus happen to assign themself to
467 * this duty, then the jiffies update is still serialized by 467 * this duty, then the jiffies update is still serialized by
468 * xtime_lock. 468 * xtime_lock.
469 */ 469 */
470 if (unlikely(tick_do_timer_cpu == -1)) 470 if (unlikely(tick_do_timer_cpu == -1))
471 tick_do_timer_cpu = cpu; 471 tick_do_timer_cpu = cpu;
472 472
473 /* Check, if the jiffies need an update */ 473 /* Check, if the jiffies need an update */
474 if (tick_do_timer_cpu == cpu) 474 if (tick_do_timer_cpu == cpu)
475 tick_do_update_jiffies64(now); 475 tick_do_update_jiffies64(now);
476 476
477 /* 477 /*
478 * When we are idle and the tick is stopped, we have to touch 478 * When we are idle and the tick is stopped, we have to touch
479 * the watchdog as we might not schedule for a really long 479 * the watchdog as we might not schedule for a really long
480 * time. This happens on complete idle SMP systems while 480 * time. This happens on complete idle SMP systems while
481 * waiting on the login prompt. We also increment the "start 481 * waiting on the login prompt. We also increment the "start
482 * of idle" jiffy stamp so the idle accounting adjustment we 482 * of idle" jiffy stamp so the idle accounting adjustment we
483 * do when we go busy again does not account too much ticks. 483 * do when we go busy again does not account too much ticks.
484 */ 484 */
485 if (ts->tick_stopped) { 485 if (ts->tick_stopped) {
486 touch_softlockup_watchdog(); 486 touch_softlockup_watchdog();
487 ts->idle_jiffies++; 487 ts->idle_jiffies++;
488 } 488 }
489 489
490 update_process_times(user_mode(regs)); 490 update_process_times(user_mode(regs));
491 profile_tick(CPU_PROFILING); 491 profile_tick(CPU_PROFILING);
492 492
493 /* Do not restart, when we are in the idle loop */ 493 /* Do not restart, when we are in the idle loop */
494 if (ts->tick_stopped) 494 if (ts->tick_stopped)
495 return; 495 return;
496 496
497 while (tick_nohz_reprogram(ts, now)) { 497 while (tick_nohz_reprogram(ts, now)) {
498 now = ktime_get(); 498 now = ktime_get();
499 tick_do_update_jiffies64(now); 499 tick_do_update_jiffies64(now);
500 } 500 }
501 } 501 }
502 502
503 /** 503 /**
504 * tick_nohz_switch_to_nohz - switch to nohz mode 504 * tick_nohz_switch_to_nohz - switch to nohz mode
505 */ 505 */
506 static void tick_nohz_switch_to_nohz(void) 506 static void tick_nohz_switch_to_nohz(void)
507 { 507 {
508 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); 508 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
509 ktime_t next; 509 ktime_t next;
510 510
511 if (!tick_nohz_enabled) 511 if (!tick_nohz_enabled)
512 return; 512 return;
513 513
514 local_irq_disable(); 514 local_irq_disable();
515 if (tick_switch_to_oneshot(tick_nohz_handler)) { 515 if (tick_switch_to_oneshot(tick_nohz_handler)) {
516 local_irq_enable(); 516 local_irq_enable();
517 return; 517 return;
518 } 518 }
519 519
520 ts->nohz_mode = NOHZ_MODE_LOWRES; 520 ts->nohz_mode = NOHZ_MODE_LOWRES;
521 521
522 /* 522 /*
523 * Recycle the hrtimer in ts, so we can share the 523 * Recycle the hrtimer in ts, so we can share the
524 * hrtimer_forward with the highres code. 524 * hrtimer_forward with the highres code.
525 */ 525 */
526 hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 526 hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
527 /* Get the next period */ 527 /* Get the next period */
528 next = tick_init_jiffy_update(); 528 next = tick_init_jiffy_update();
529 529
530 for (;;) { 530 for (;;) {
531 ts->sched_timer.expires = next; 531 ts->sched_timer.expires = next;
532 if (!tick_program_event(next, 0)) 532 if (!tick_program_event(next, 0))
533 break; 533 break;
534 next = ktime_add(next, tick_period); 534 next = ktime_add(next, tick_period);
535 } 535 }
536 local_irq_enable(); 536 local_irq_enable();
537 537
538 printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", 538 printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n",
539 smp_processor_id()); 539 smp_processor_id());
540 } 540 }
541 541
542 #else 542 #else
543 543
544 static inline void tick_nohz_switch_to_nohz(void) { } 544 static inline void tick_nohz_switch_to_nohz(void) { }
545 545
546 #endif /* NO_HZ */ 546 #endif /* NO_HZ */
547 547
548 /* 548 /*
549 * High resolution timer specific code 549 * High resolution timer specific code
550 */ 550 */
551 #ifdef CONFIG_HIGH_RES_TIMERS 551 #ifdef CONFIG_HIGH_RES_TIMERS
552 /* 552 /*
553 * We rearm the timer until we get disabled by the idle code. 553 * We rearm the timer until we get disabled by the idle code.
554 * Called with interrupts disabled and timer->base->cpu_base->lock held. 554 * Called with interrupts disabled and timer->base->cpu_base->lock held.
555 */ 555 */
556 static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) 556 static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
557 { 557 {
558 struct tick_sched *ts = 558 struct tick_sched *ts =
559 container_of(timer, struct tick_sched, sched_timer); 559 container_of(timer, struct tick_sched, sched_timer);
560 struct pt_regs *regs = get_irq_regs(); 560 struct pt_regs *regs = get_irq_regs();
561 ktime_t now = ktime_get(); 561 ktime_t now = ktime_get();
562 int cpu = smp_processor_id(); 562 int cpu = smp_processor_id();
563 563
564 #ifdef CONFIG_NO_HZ 564 #ifdef CONFIG_NO_HZ
565 /* 565 /*
566 * Check if the do_timer duty was dropped. We don't care about 566 * Check if the do_timer duty was dropped. We don't care about
567 * concurrency: This happens only when the cpu in charge went 567 * concurrency: This happens only when the cpu in charge went
568 * into a long sleep. If two cpus happen to assign themself to 568 * into a long sleep. If two cpus happen to assign themself to
569 * this duty, then the jiffies update is still serialized by 569 * this duty, then the jiffies update is still serialized by
570 * xtime_lock. 570 * xtime_lock.
571 */ 571 */
572 if (unlikely(tick_do_timer_cpu == -1)) 572 if (unlikely(tick_do_timer_cpu == -1))
573 tick_do_timer_cpu = cpu; 573 tick_do_timer_cpu = cpu;
574 #endif 574 #endif
575 575
576 /* Check, if the jiffies need an update */ 576 /* Check, if the jiffies need an update */
577 if (tick_do_timer_cpu == cpu) 577 if (tick_do_timer_cpu == cpu)
578 tick_do_update_jiffies64(now); 578 tick_do_update_jiffies64(now);
579 579
580 /* 580 /*
581 * Do not call, when we are not in irq context and have 581 * Do not call, when we are not in irq context and have
582 * no valid regs pointer 582 * no valid regs pointer
583 */ 583 */
584 if (regs) { 584 if (regs) {
585 /* 585 /*
586 * When we are idle and the tick is stopped, we have to touch 586 * When we are idle and the tick is stopped, we have to touch
587 * the watchdog as we might not schedule for a really long 587 * the watchdog as we might not schedule for a really long
588 * time. This happens on complete idle SMP systems while 588 * time. This happens on complete idle SMP systems while
589 * waiting on the login prompt. We also increment the "start of 589 * waiting on the login prompt. We also increment the "start of
590 * idle" jiffy stamp so the idle accounting adjustment we do 590 * idle" jiffy stamp so the idle accounting adjustment we do
591 * when we go busy again does not account too much ticks. 591 * when we go busy again does not account too much ticks.
592 */ 592 */
593 if (ts->tick_stopped) { 593 if (ts->tick_stopped) {
594 touch_softlockup_watchdog(); 594 touch_softlockup_watchdog();
595 ts->idle_jiffies++; 595 ts->idle_jiffies++;
596 } 596 }
597 update_process_times(user_mode(regs)); 597 update_process_times(user_mode(regs));
598 profile_tick(CPU_PROFILING); 598 profile_tick(CPU_PROFILING);
599 } 599 }
600 600
601 /* Do not restart, when we are in the idle loop */ 601 /* Do not restart, when we are in the idle loop */
602 if (ts->tick_stopped) 602 if (ts->tick_stopped)
603 return HRTIMER_NORESTART; 603 return HRTIMER_NORESTART;
604 604
605 hrtimer_forward(timer, now, tick_period); 605 hrtimer_forward(timer, now, tick_period);
606 606
607 return HRTIMER_RESTART; 607 return HRTIMER_RESTART;
608 } 608 }
609 609
610 /** 610 /**
611 * tick_setup_sched_timer - setup the tick emulation timer 611 * tick_setup_sched_timer - setup the tick emulation timer
612 */ 612 */
613 void tick_setup_sched_timer(void) 613 void tick_setup_sched_timer(void)
614 { 614 {
615 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); 615 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
616 ktime_t now = ktime_get(); 616 ktime_t now = ktime_get();
617 u64 offset; 617 u64 offset;
618 618
619 /* 619 /*
620 * Emulate tick processing via per-CPU hrtimers: 620 * Emulate tick processing via per-CPU hrtimers:
621 */ 621 */
622 hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 622 hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
623 ts->sched_timer.function = tick_sched_timer; 623 ts->sched_timer.function = tick_sched_timer;
624 ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; 624 ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
625 625
626 /* Get the next period (per cpu) */ 626 /* Get the next period (per cpu) */
627 ts->sched_timer.expires = tick_init_jiffy_update(); 627 ts->sched_timer.expires = tick_init_jiffy_update();
628 offset = ktime_to_ns(tick_period) >> 1; 628 offset = ktime_to_ns(tick_period) >> 1;
629 do_div(offset, num_possible_cpus()); 629 do_div(offset, num_possible_cpus());
630 offset *= smp_processor_id(); 630 offset *= smp_processor_id();
631 ts->sched_timer.expires = ktime_add_ns(ts->sched_timer.expires, offset); 631 ts->sched_timer.expires = ktime_add_ns(ts->sched_timer.expires, offset);
632 632
633 for (;;) { 633 for (;;) {
634 hrtimer_forward(&ts->sched_timer, now, tick_period); 634 hrtimer_forward(&ts->sched_timer, now, tick_period);
635 hrtimer_start(&ts->sched_timer, ts->sched_timer.expires, 635 hrtimer_start(&ts->sched_timer, ts->sched_timer.expires,
636 HRTIMER_MODE_ABS); 636 HRTIMER_MODE_ABS);
637 /* Check, if the timer was already in the past */ 637 /* Check, if the timer was already in the past */
638 if (hrtimer_active(&ts->sched_timer)) 638 if (hrtimer_active(&ts->sched_timer))
639 break; 639 break;
640 now = ktime_get(); 640 now = ktime_get();
641 } 641 }
642 642
643 #ifdef CONFIG_NO_HZ 643 #ifdef CONFIG_NO_HZ
644 if (tick_nohz_enabled) 644 if (tick_nohz_enabled)
645 ts->nohz_mode = NOHZ_MODE_HIGHRES; 645 ts->nohz_mode = NOHZ_MODE_HIGHRES;
646 #endif 646 #endif
647 } 647 }
648 648
649 void tick_cancel_sched_timer(int cpu) 649 void tick_cancel_sched_timer(int cpu)
650 { 650 {
651 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 651 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
652 652
653 if (ts->sched_timer.base) 653 if (ts->sched_timer.base)
654 hrtimer_cancel(&ts->sched_timer); 654 hrtimer_cancel(&ts->sched_timer);
655 655
656 ts->nohz_mode = NOHZ_MODE_INACTIVE; 656 ts->nohz_mode = NOHZ_MODE_INACTIVE;
657 } 657 }
658 #endif /* HIGH_RES_TIMERS */ 658 #endif /* HIGH_RES_TIMERS */
659 659
660 /** 660 /**
661 * Async notification about clocksource changes 661 * Async notification about clocksource changes
662 */ 662 */
663 void tick_clock_notify(void) 663 void tick_clock_notify(void)
664 { 664 {
665 int cpu; 665 int cpu;
666 666
667 for_each_possible_cpu(cpu) 667 for_each_possible_cpu(cpu)
668 set_bit(0, &per_cpu(tick_cpu_sched, cpu).check_clocks); 668 set_bit(0, &per_cpu(tick_cpu_sched, cpu).check_clocks);
669 } 669 }
670 670
671 /* 671 /*
672 * Async notification about clock event changes 672 * Async notification about clock event changes
673 */ 673 */
674 void tick_oneshot_notify(void) 674 void tick_oneshot_notify(void)
675 { 675 {
676 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); 676 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
677 677
678 set_bit(0, &ts->check_clocks); 678 set_bit(0, &ts->check_clocks);
679 } 679 }
680 680
681 /** 681 /**
682 * Check, if a change happened, which makes oneshot possible. 682 * Check, if a change happened, which makes oneshot possible.
683 * 683 *
684 * Called cyclic from the hrtimer softirq (driven by the timer 684 * Called cyclic from the hrtimer softirq (driven by the timer
685 * softirq) allow_nohz signals, that we can switch into low-res nohz 685 * softirq) allow_nohz signals, that we can switch into low-res nohz
686 * mode, because high resolution timers are disabled (either compile 686 * mode, because high resolution timers are disabled (either compile
687 * or runtime). 687 * or runtime).
688 */ 688 */
689 int tick_check_oneshot_change(int allow_nohz) 689 int tick_check_oneshot_change(int allow_nohz)
690 { 690 {
691 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); 691 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
692 692
693 if (!test_and_clear_bit(0, &ts->check_clocks)) 693 if (!test_and_clear_bit(0, &ts->check_clocks))
694 return 0; 694 return 0;
695 695
696 if (ts->nohz_mode != NOHZ_MODE_INACTIVE) 696 if (ts->nohz_mode != NOHZ_MODE_INACTIVE)
697 return 0; 697 return 0;
698 698
699 if (!timekeeping_valid_for_hres() || !tick_is_oneshot_available()) 699 if (!timekeeping_valid_for_hres() || !tick_is_oneshot_available())
700 return 0; 700 return 0;
701 701
702 if (!allow_nohz) 702 if (!allow_nohz)
703 return 1; 703 return 1;
704 704
705 tick_nohz_switch_to_nohz(); 705 tick_nohz_switch_to_nohz();
706 return 0; 706 return 0;
707 } 707 }
708 708
1 /* 1 /*
2 * linux/kernel/timer.c 2 * linux/kernel/timer.c
3 * 3 *
4 * Kernel internal timers, basic process system calls 4 * Kernel internal timers, basic process system calls
5 * 5 *
6 * Copyright (C) 1991, 1992 Linus Torvalds 6 * Copyright (C) 1991, 1992 Linus Torvalds
7 * 7 *
8 * 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better. 8 * 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better.
9 * 9 *
10 * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 10 * 1997-09-10 Updated NTP code according to technical memorandum Jan '96
11 * "A Kernel Model for Precision Timekeeping" by Dave Mills 11 * "A Kernel Model for Precision Timekeeping" by Dave Mills
12 * 1998-12-24 Fixed a xtime SMP race (we need the xtime_lock rw spinlock to 12 * 1998-12-24 Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
13 * serialize accesses to xtime/lost_ticks). 13 * serialize accesses to xtime/lost_ticks).
14 * Copyright (C) 1998 Andrea Arcangeli 14 * Copyright (C) 1998 Andrea Arcangeli
15 * 1999-03-10 Improved NTP compatibility by Ulrich Windl 15 * 1999-03-10 Improved NTP compatibility by Ulrich Windl
16 * 2002-05-31 Move sys_sysinfo here and make its locking sane, Robert Love 16 * 2002-05-31 Move sys_sysinfo here and make its locking sane, Robert Love
17 * 2000-10-05 Implemented scalable SMP per-CPU timer handling. 17 * 2000-10-05 Implemented scalable SMP per-CPU timer handling.
18 * Copyright (C) 2000, 2001, 2002 Ingo Molnar 18 * Copyright (C) 2000, 2001, 2002 Ingo Molnar
19 * Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar 19 * Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar
20 */ 20 */
21 21
22 #include <linux/kernel_stat.h> 22 #include <linux/kernel_stat.h>
23 #include <linux/module.h> 23 #include <linux/module.h>
24 #include <linux/interrupt.h> 24 #include <linux/interrupt.h>
25 #include <linux/percpu.h> 25 #include <linux/percpu.h>
26 #include <linux/init.h> 26 #include <linux/init.h>
27 #include <linux/mm.h> 27 #include <linux/mm.h>
28 #include <linux/swap.h> 28 #include <linux/swap.h>
29 #include <linux/pid_namespace.h> 29 #include <linux/pid_namespace.h>
30 #include <linux/notifier.h> 30 #include <linux/notifier.h>
31 #include <linux/thread_info.h> 31 #include <linux/thread_info.h>
32 #include <linux/time.h> 32 #include <linux/time.h>
33 #include <linux/jiffies.h> 33 #include <linux/jiffies.h>
34 #include <linux/posix-timers.h> 34 #include <linux/posix-timers.h>
35 #include <linux/cpu.h> 35 #include <linux/cpu.h>
36 #include <linux/syscalls.h> 36 #include <linux/syscalls.h>
37 #include <linux/delay.h> 37 #include <linux/delay.h>
38 #include <linux/tick.h> 38 #include <linux/tick.h>
39 #include <linux/kallsyms.h> 39 #include <linux/kallsyms.h>
40 40
41 #include <asm/uaccess.h> 41 #include <asm/uaccess.h>
42 #include <asm/unistd.h> 42 #include <asm/unistd.h>
43 #include <asm/div64.h> 43 #include <asm/div64.h>
44 #include <asm/timex.h> 44 #include <asm/timex.h>
45 #include <asm/io.h> 45 #include <asm/io.h>
46 46
47 u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; 47 u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
48 48
49 EXPORT_SYMBOL(jiffies_64); 49 EXPORT_SYMBOL(jiffies_64);
50 50
51 /* 51 /*
52 * per-CPU timer vector definitions: 52 * per-CPU timer vector definitions:
53 */ 53 */
54 #define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6) 54 #define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6)
55 #define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8) 55 #define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8)
56 #define TVN_SIZE (1 << TVN_BITS) 56 #define TVN_SIZE (1 << TVN_BITS)
57 #define TVR_SIZE (1 << TVR_BITS) 57 #define TVR_SIZE (1 << TVR_BITS)
58 #define TVN_MASK (TVN_SIZE - 1) 58 #define TVN_MASK (TVN_SIZE - 1)
59 #define TVR_MASK (TVR_SIZE - 1) 59 #define TVR_MASK (TVR_SIZE - 1)
60 60
61 struct tvec { 61 struct tvec {
62 struct list_head vec[TVN_SIZE]; 62 struct list_head vec[TVN_SIZE];
63 }; 63 };
64 64
65 struct tvec_root { 65 struct tvec_root {
66 struct list_head vec[TVR_SIZE]; 66 struct list_head vec[TVR_SIZE];
67 }; 67 };
68 68
69 struct tvec_base { 69 struct tvec_base {
70 spinlock_t lock; 70 spinlock_t lock;
71 struct timer_list *running_timer; 71 struct timer_list *running_timer;
72 unsigned long timer_jiffies; 72 unsigned long timer_jiffies;
73 struct tvec_root tv1; 73 struct tvec_root tv1;
74 struct tvec tv2; 74 struct tvec tv2;
75 struct tvec tv3; 75 struct tvec tv3;
76 struct tvec tv4; 76 struct tvec tv4;
77 struct tvec tv5; 77 struct tvec tv5;
78 } ____cacheline_aligned; 78 } ____cacheline_aligned;
79 79
80 struct tvec_base boot_tvec_bases; 80 struct tvec_base boot_tvec_bases;
81 EXPORT_SYMBOL(boot_tvec_bases); 81 EXPORT_SYMBOL(boot_tvec_bases);
82 static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases; 82 static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
83 83
84 /* 84 /*
85 * Note that all tvec_bases are 2 byte aligned and lower bit of 85 * Note that all tvec_bases are 2 byte aligned and lower bit of
86 * base in timer_list is guaranteed to be zero. Use the LSB for 86 * base in timer_list is guaranteed to be zero. Use the LSB for
87 * the new flag to indicate whether the timer is deferrable 87 * the new flag to indicate whether the timer is deferrable
88 */ 88 */
89 #define TBASE_DEFERRABLE_FLAG (0x1) 89 #define TBASE_DEFERRABLE_FLAG (0x1)
90 90
91 /* Functions below help us manage 'deferrable' flag */ 91 /* Functions below help us manage 'deferrable' flag */
92 static inline unsigned int tbase_get_deferrable(struct tvec_base *base) 92 static inline unsigned int tbase_get_deferrable(struct tvec_base *base)
93 { 93 {
94 return ((unsigned int)(unsigned long)base & TBASE_DEFERRABLE_FLAG); 94 return ((unsigned int)(unsigned long)base & TBASE_DEFERRABLE_FLAG);
95 } 95 }
96 96
97 static inline struct tvec_base *tbase_get_base(struct tvec_base *base) 97 static inline struct tvec_base *tbase_get_base(struct tvec_base *base)
98 { 98 {
99 return ((struct tvec_base *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG)); 99 return ((struct tvec_base *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG));
100 } 100 }
101 101
102 static inline void timer_set_deferrable(struct timer_list *timer) 102 static inline void timer_set_deferrable(struct timer_list *timer)
103 { 103 {
104 timer->base = ((struct tvec_base *)((unsigned long)(timer->base) | 104 timer->base = ((struct tvec_base *)((unsigned long)(timer->base) |
105 TBASE_DEFERRABLE_FLAG)); 105 TBASE_DEFERRABLE_FLAG));
106 } 106 }
107 107
108 static inline void 108 static inline void
109 timer_set_base(struct timer_list *timer, struct tvec_base *new_base) 109 timer_set_base(struct timer_list *timer, struct tvec_base *new_base)
110 { 110 {
111 timer->base = (struct tvec_base *)((unsigned long)(new_base) | 111 timer->base = (struct tvec_base *)((unsigned long)(new_base) |
112 tbase_get_deferrable(timer->base)); 112 tbase_get_deferrable(timer->base));
113 } 113 }
114 114
115 /** 115 /**
116 * __round_jiffies - function to round jiffies to a full second 116 * __round_jiffies - function to round jiffies to a full second
117 * @j: the time in (absolute) jiffies that should be rounded 117 * @j: the time in (absolute) jiffies that should be rounded
118 * @cpu: the processor number on which the timeout will happen 118 * @cpu: the processor number on which the timeout will happen
119 * 119 *
120 * __round_jiffies() rounds an absolute time in the future (in jiffies) 120 * __round_jiffies() rounds an absolute time in the future (in jiffies)
121 * up or down to (approximately) full seconds. This is useful for timers 121 * up or down to (approximately) full seconds. This is useful for timers
122 * for which the exact time they fire does not matter too much, as long as 122 * for which the exact time they fire does not matter too much, as long as
123 * they fire approximately every X seconds. 123 * they fire approximately every X seconds.
124 * 124 *
125 * By rounding these timers to whole seconds, all such timers will fire 125 * By rounding these timers to whole seconds, all such timers will fire
126 * at the same time, rather than at various times spread out. The goal 126 * at the same time, rather than at various times spread out. The goal
127 * of this is to have the CPU wake up less, which saves power. 127 * of this is to have the CPU wake up less, which saves power.
128 * 128 *
129 * The exact rounding is skewed for each processor to avoid all 129 * The exact rounding is skewed for each processor to avoid all
130 * processors firing at the exact same time, which could lead 130 * processors firing at the exact same time, which could lead
131 * to lock contention or spurious cache line bouncing. 131 * to lock contention or spurious cache line bouncing.
132 * 132 *
133 * The return value is the rounded version of the @j parameter. 133 * The return value is the rounded version of the @j parameter.
134 */ 134 */
135 unsigned long __round_jiffies(unsigned long j, int cpu) 135 unsigned long __round_jiffies(unsigned long j, int cpu)
136 { 136 {
137 int rem; 137 int rem;
138 unsigned long original = j; 138 unsigned long original = j;
139 139
140 /* 140 /*
141 * We don't want all cpus firing their timers at once hitting the 141 * We don't want all cpus firing their timers at once hitting the
142 * same lock or cachelines, so we skew each extra cpu with an extra 142 * same lock or cachelines, so we skew each extra cpu with an extra
143 * 3 jiffies. This 3 jiffies came originally from the mm/ code which 143 * 3 jiffies. This 3 jiffies came originally from the mm/ code which
144 * already did this. 144 * already did this.
145 * The skew is done by adding 3*cpunr, then round, then subtract this 145 * The skew is done by adding 3*cpunr, then round, then subtract this
146 * extra offset again. 146 * extra offset again.
147 */ 147 */
148 j += cpu * 3; 148 j += cpu * 3;
149 149
150 rem = j % HZ; 150 rem = j % HZ;
151 151
152 /* 152 /*
153 * If the target jiffie is just after a whole second (which can happen 153 * If the target jiffie is just after a whole second (which can happen
154 * due to delays of the timer irq, long irq off times etc etc) then 154 * due to delays of the timer irq, long irq off times etc etc) then
155 * we should round down to the whole second, not up. Use 1/4th second 155 * we should round down to the whole second, not up. Use 1/4th second
156 * as cutoff for this rounding as an extreme upper bound for this. 156 * as cutoff for this rounding as an extreme upper bound for this.
157 */ 157 */
158 if (rem < HZ/4) /* round down */ 158 if (rem < HZ/4) /* round down */
159 j = j - rem; 159 j = j - rem;
160 else /* round up */ 160 else /* round up */
161 j = j - rem + HZ; 161 j = j - rem + HZ;
162 162
163 /* now that we have rounded, subtract the extra skew again */ 163 /* now that we have rounded, subtract the extra skew again */
164 j -= cpu * 3; 164 j -= cpu * 3;
165 165
166 if (j <= jiffies) /* rounding ate our timeout entirely; */ 166 if (j <= jiffies) /* rounding ate our timeout entirely; */
167 return original; 167 return original;
168 return j; 168 return j;
169 } 169 }
170 EXPORT_SYMBOL_GPL(__round_jiffies); 170 EXPORT_SYMBOL_GPL(__round_jiffies);
171 171
172 /** 172 /**
173 * __round_jiffies_relative - function to round jiffies to a full second 173 * __round_jiffies_relative - function to round jiffies to a full second
174 * @j: the time in (relative) jiffies that should be rounded 174 * @j: the time in (relative) jiffies that should be rounded
175 * @cpu: the processor number on which the timeout will happen 175 * @cpu: the processor number on which the timeout will happen
176 * 176 *
177 * __round_jiffies_relative() rounds a time delta in the future (in jiffies) 177 * __round_jiffies_relative() rounds a time delta in the future (in jiffies)
178 * up or down to (approximately) full seconds. This is useful for timers 178 * up or down to (approximately) full seconds. This is useful for timers
179 * for which the exact time they fire does not matter too much, as long as 179 * for which the exact time they fire does not matter too much, as long as
180 * they fire approximately every X seconds. 180 * they fire approximately every X seconds.
181 * 181 *
182 * By rounding these timers to whole seconds, all such timers will fire 182 * By rounding these timers to whole seconds, all such timers will fire
183 * at the same time, rather than at various times spread out. The goal 183 * at the same time, rather than at various times spread out. The goal
184 * of this is to have the CPU wake up less, which saves power. 184 * of this is to have the CPU wake up less, which saves power.
185 * 185 *
186 * The exact rounding is skewed for each processor to avoid all 186 * The exact rounding is skewed for each processor to avoid all
187 * processors firing at the exact same time, which could lead 187 * processors firing at the exact same time, which could lead
188 * to lock contention or spurious cache line bouncing. 188 * to lock contention or spurious cache line bouncing.
189 * 189 *
190 * The return value is the rounded version of the @j parameter. 190 * The return value is the rounded version of the @j parameter.
191 */ 191 */
192 unsigned long __round_jiffies_relative(unsigned long j, int cpu) 192 unsigned long __round_jiffies_relative(unsigned long j, int cpu)
193 { 193 {
194 /* 194 /*
195 * In theory the following code can skip a jiffy in case jiffies 195 * In theory the following code can skip a jiffy in case jiffies
196 * increments right between the addition and the later subtraction. 196 * increments right between the addition and the later subtraction.
197 * However since the entire point of this function is to use approximate 197 * However since the entire point of this function is to use approximate
198 * timeouts, it's entirely ok to not handle that. 198 * timeouts, it's entirely ok to not handle that.
199 */ 199 */
200 return __round_jiffies(j + jiffies, cpu) - jiffies; 200 return __round_jiffies(j + jiffies, cpu) - jiffies;
201 } 201 }
202 EXPORT_SYMBOL_GPL(__round_jiffies_relative); 202 EXPORT_SYMBOL_GPL(__round_jiffies_relative);
203 203
204 /** 204 /**
205 * round_jiffies - function to round jiffies to a full second 205 * round_jiffies - function to round jiffies to a full second
206 * @j: the time in (absolute) jiffies that should be rounded 206 * @j: the time in (absolute) jiffies that should be rounded
207 * 207 *
208 * round_jiffies() rounds an absolute time in the future (in jiffies) 208 * round_jiffies() rounds an absolute time in the future (in jiffies)
209 * up or down to (approximately) full seconds. This is useful for timers 209 * up or down to (approximately) full seconds. This is useful for timers
210 * for which the exact time they fire does not matter too much, as long as 210 * for which the exact time they fire does not matter too much, as long as
211 * they fire approximately every X seconds. 211 * they fire approximately every X seconds.
212 * 212 *
213 * By rounding these timers to whole seconds, all such timers will fire 213 * By rounding these timers to whole seconds, all such timers will fire
214 * at the same time, rather than at various times spread out. The goal 214 * at the same time, rather than at various times spread out. The goal
215 * of this is to have the CPU wake up less, which saves power. 215 * of this is to have the CPU wake up less, which saves power.
216 * 216 *
217 * The return value is the rounded version of the @j parameter. 217 * The return value is the rounded version of the @j parameter.
218 */ 218 */
219 unsigned long round_jiffies(unsigned long j) 219 unsigned long round_jiffies(unsigned long j)
220 { 220 {
221 return __round_jiffies(j, raw_smp_processor_id()); 221 return __round_jiffies(j, raw_smp_processor_id());
222 } 222 }
223 EXPORT_SYMBOL_GPL(round_jiffies); 223 EXPORT_SYMBOL_GPL(round_jiffies);
224 224
225 /** 225 /**
226 * round_jiffies_relative - function to round jiffies to a full second 226 * round_jiffies_relative - function to round jiffies to a full second
227 * @j: the time in (relative) jiffies that should be rounded 227 * @j: the time in (relative) jiffies that should be rounded
228 * 228 *
229 * round_jiffies_relative() rounds a time delta in the future (in jiffies) 229 * round_jiffies_relative() rounds a time delta in the future (in jiffies)
230 * up or down to (approximately) full seconds. This is useful for timers 230 * up or down to (approximately) full seconds. This is useful for timers
231 * for which the exact time they fire does not matter too much, as long as 231 * for which the exact time they fire does not matter too much, as long as
232 * they fire approximately every X seconds. 232 * they fire approximately every X seconds.
233 * 233 *
234 * By rounding these timers to whole seconds, all such timers will fire 234 * By rounding these timers to whole seconds, all such timers will fire
235 * at the same time, rather than at various times spread out. The goal 235 * at the same time, rather than at various times spread out. The goal
236 * of this is to have the CPU wake up less, which saves power. 236 * of this is to have the CPU wake up less, which saves power.
237 * 237 *
238 * The return value is the rounded version of the @j parameter. 238 * The return value is the rounded version of the @j parameter.
239 */ 239 */
240 unsigned long round_jiffies_relative(unsigned long j) 240 unsigned long round_jiffies_relative(unsigned long j)
241 { 241 {
242 return __round_jiffies_relative(j, raw_smp_processor_id()); 242 return __round_jiffies_relative(j, raw_smp_processor_id());
243 } 243 }
244 EXPORT_SYMBOL_GPL(round_jiffies_relative); 244 EXPORT_SYMBOL_GPL(round_jiffies_relative);
245 245
246 246
247 static inline void set_running_timer(struct tvec_base *base, 247 static inline void set_running_timer(struct tvec_base *base,
248 struct timer_list *timer) 248 struct timer_list *timer)
249 { 249 {
250 #ifdef CONFIG_SMP 250 #ifdef CONFIG_SMP
251 base->running_timer = timer; 251 base->running_timer = timer;
252 #endif 252 #endif
253 } 253 }
254 254
255 static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) 255 static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
256 { 256 {
257 unsigned long expires = timer->expires; 257 unsigned long expires = timer->expires;
258 unsigned long idx = expires - base->timer_jiffies; 258 unsigned long idx = expires - base->timer_jiffies;
259 struct list_head *vec; 259 struct list_head *vec;
260 260
261 if (idx < TVR_SIZE) { 261 if (idx < TVR_SIZE) {
262 int i = expires & TVR_MASK; 262 int i = expires & TVR_MASK;
263 vec = base->tv1.vec + i; 263 vec = base->tv1.vec + i;
264 } else if (idx < 1 << (TVR_BITS + TVN_BITS)) { 264 } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
265 int i = (expires >> TVR_BITS) & TVN_MASK; 265 int i = (expires >> TVR_BITS) & TVN_MASK;
266 vec = base->tv2.vec + i; 266 vec = base->tv2.vec + i;
267 } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) { 267 } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
268 int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK; 268 int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
269 vec = base->tv3.vec + i; 269 vec = base->tv3.vec + i;
270 } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) { 270 } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
271 int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK; 271 int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
272 vec = base->tv4.vec + i; 272 vec = base->tv4.vec + i;
273 } else if ((signed long) idx < 0) { 273 } else if ((signed long) idx < 0) {
274 /* 274 /*
275 * Can happen if you add a timer with expires == jiffies, 275 * Can happen if you add a timer with expires == jiffies,
276 * or you set a timer to go off in the past 276 * or you set a timer to go off in the past
277 */ 277 */
278 vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK); 278 vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK);
279 } else { 279 } else {
280 int i; 280 int i;
281 /* If the timeout is larger than 0xffffffff on 64-bit 281 /* If the timeout is larger than 0xffffffff on 64-bit
282 * architectures then we use the maximum timeout: 282 * architectures then we use the maximum timeout:
283 */ 283 */
284 if (idx > 0xffffffffUL) { 284 if (idx > 0xffffffffUL) {
285 idx = 0xffffffffUL; 285 idx = 0xffffffffUL;
286 expires = idx + base->timer_jiffies; 286 expires = idx + base->timer_jiffies;
287 } 287 }
288 i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; 288 i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
289 vec = base->tv5.vec + i; 289 vec = base->tv5.vec + i;
290 } 290 }
291 /* 291 /*
292 * Timers are FIFO: 292 * Timers are FIFO:
293 */ 293 */
294 list_add_tail(&timer->entry, vec); 294 list_add_tail(&timer->entry, vec);
295 } 295 }
296 296
297 #ifdef CONFIG_TIMER_STATS 297 #ifdef CONFIG_TIMER_STATS
298 void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr) 298 void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
299 { 299 {
300 if (timer->start_site) 300 if (timer->start_site)
301 return; 301 return;
302 302
303 timer->start_site = addr; 303 timer->start_site = addr;
304 memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); 304 memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
305 timer->start_pid = current->pid; 305 timer->start_pid = current->pid;
306 } 306 }
307 307
308 static void timer_stats_account_timer(struct timer_list *timer) 308 static void timer_stats_account_timer(struct timer_list *timer)
309 { 309 {
310 unsigned int flag = 0; 310 unsigned int flag = 0;
311 311
312 if (unlikely(tbase_get_deferrable(timer->base))) 312 if (unlikely(tbase_get_deferrable(timer->base)))
313 flag |= TIMER_STATS_FLAG_DEFERRABLE; 313 flag |= TIMER_STATS_FLAG_DEFERRABLE;
314 314
315 timer_stats_update_stats(timer, timer->start_pid, timer->start_site, 315 timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
316 timer->function, timer->start_comm, flag); 316 timer->function, timer->start_comm, flag);
317 } 317 }
318 318
319 #else 319 #else
320 static void timer_stats_account_timer(struct timer_list *timer) {} 320 static void timer_stats_account_timer(struct timer_list *timer) {}
321 #endif 321 #endif
322 322
323 #ifdef CONFIG_DEBUG_OBJECTS_TIMERS 323 #ifdef CONFIG_DEBUG_OBJECTS_TIMERS
324 324
325 static struct debug_obj_descr timer_debug_descr; 325 static struct debug_obj_descr timer_debug_descr;
326 326
327 /* 327 /*
328 * fixup_init is called when: 328 * fixup_init is called when:
329 * - an active object is initialized 329 * - an active object is initialized
330 */ 330 */
331 static int timer_fixup_init(void *addr, enum debug_obj_state state) 331 static int timer_fixup_init(void *addr, enum debug_obj_state state)
332 { 332 {
333 struct timer_list *timer = addr; 333 struct timer_list *timer = addr;
334 334
335 switch (state) { 335 switch (state) {
336 case ODEBUG_STATE_ACTIVE: 336 case ODEBUG_STATE_ACTIVE:
337 del_timer_sync(timer); 337 del_timer_sync(timer);
338 debug_object_init(timer, &timer_debug_descr); 338 debug_object_init(timer, &timer_debug_descr);
339 return 1; 339 return 1;
340 default: 340 default:
341 return 0; 341 return 0;
342 } 342 }
343 } 343 }
344 344
345 /* 345 /*
346 * fixup_activate is called when: 346 * fixup_activate is called when:
347 * - an active object is activated 347 * - an active object is activated
348 * - an unknown object is activated (might be a statically initialized object) 348 * - an unknown object is activated (might be a statically initialized object)
349 */ 349 */
350 static int timer_fixup_activate(void *addr, enum debug_obj_state state) 350 static int timer_fixup_activate(void *addr, enum debug_obj_state state)
351 { 351 {
352 struct timer_list *timer = addr; 352 struct timer_list *timer = addr;
353 353
354 switch (state) { 354 switch (state) {
355 355
356 case ODEBUG_STATE_NOTAVAILABLE: 356 case ODEBUG_STATE_NOTAVAILABLE:
357 /* 357 /*
358 * This is not really a fixup. The timer was 358 * This is not really a fixup. The timer was
359 * statically initialized. We just make sure that it 359 * statically initialized. We just make sure that it
360 * is tracked in the object tracker. 360 * is tracked in the object tracker.
361 */ 361 */
362 if (timer->entry.next == NULL && 362 if (timer->entry.next == NULL &&
363 timer->entry.prev == TIMER_ENTRY_STATIC) { 363 timer->entry.prev == TIMER_ENTRY_STATIC) {
364 debug_object_init(timer, &timer_debug_descr); 364 debug_object_init(timer, &timer_debug_descr);
365 debug_object_activate(timer, &timer_debug_descr); 365 debug_object_activate(timer, &timer_debug_descr);
366 return 0; 366 return 0;
367 } else { 367 } else {
368 WARN_ON_ONCE(1); 368 WARN_ON_ONCE(1);
369 } 369 }
370 return 0; 370 return 0;
371 371
372 case ODEBUG_STATE_ACTIVE: 372 case ODEBUG_STATE_ACTIVE:
373 WARN_ON(1); 373 WARN_ON(1);
374 374
375 default: 375 default:
376 return 0; 376 return 0;
377 } 377 }
378 } 378 }
379 379
380 /* 380 /*
381 * fixup_free is called when: 381 * fixup_free is called when:
382 * - an active object is freed 382 * - an active object is freed
383 */ 383 */
384 static int timer_fixup_free(void *addr, enum debug_obj_state state) 384 static int timer_fixup_free(void *addr, enum debug_obj_state state)
385 { 385 {
386 struct timer_list *timer = addr; 386 struct timer_list *timer = addr;
387 387
388 switch (state) { 388 switch (state) {
389 case ODEBUG_STATE_ACTIVE: 389 case ODEBUG_STATE_ACTIVE:
390 del_timer_sync(timer); 390 del_timer_sync(timer);
391 debug_object_free(timer, &timer_debug_descr); 391 debug_object_free(timer, &timer_debug_descr);
392 return 1; 392 return 1;
393 default: 393 default:
394 return 0; 394 return 0;
395 } 395 }
396 } 396 }
397 397
398 static struct debug_obj_descr timer_debug_descr = { 398 static struct debug_obj_descr timer_debug_descr = {
399 .name = "timer_list", 399 .name = "timer_list",
400 .fixup_init = timer_fixup_init, 400 .fixup_init = timer_fixup_init,
401 .fixup_activate = timer_fixup_activate, 401 .fixup_activate = timer_fixup_activate,
402 .fixup_free = timer_fixup_free, 402 .fixup_free = timer_fixup_free,
403 }; 403 };
404 404
405 static inline void debug_timer_init(struct timer_list *timer) 405 static inline void debug_timer_init(struct timer_list *timer)
406 { 406 {
407 debug_object_init(timer, &timer_debug_descr); 407 debug_object_init(timer, &timer_debug_descr);
408 } 408 }
409 409
410 static inline void debug_timer_activate(struct timer_list *timer) 410 static inline void debug_timer_activate(struct timer_list *timer)
411 { 411 {
412 debug_object_activate(timer, &timer_debug_descr); 412 debug_object_activate(timer, &timer_debug_descr);
413 } 413 }
414 414
415 static inline void debug_timer_deactivate(struct timer_list *timer) 415 static inline void debug_timer_deactivate(struct timer_list *timer)
416 { 416 {
417 debug_object_deactivate(timer, &timer_debug_descr); 417 debug_object_deactivate(timer, &timer_debug_descr);
418 } 418 }
419 419
420 static inline void debug_timer_free(struct timer_list *timer) 420 static inline void debug_timer_free(struct timer_list *timer)
421 { 421 {
422 debug_object_free(timer, &timer_debug_descr); 422 debug_object_free(timer, &timer_debug_descr);
423 } 423 }
424 424
425 static void __init_timer(struct timer_list *timer); 425 static void __init_timer(struct timer_list *timer);
426 426
427 void init_timer_on_stack(struct timer_list *timer) 427 void init_timer_on_stack(struct timer_list *timer)
428 { 428 {
429 debug_object_init_on_stack(timer, &timer_debug_descr); 429 debug_object_init_on_stack(timer, &timer_debug_descr);
430 __init_timer(timer); 430 __init_timer(timer);
431 } 431 }
432 EXPORT_SYMBOL_GPL(init_timer_on_stack); 432 EXPORT_SYMBOL_GPL(init_timer_on_stack);
433 433
434 void destroy_timer_on_stack(struct timer_list *timer) 434 void destroy_timer_on_stack(struct timer_list *timer)
435 { 435 {
436 debug_object_free(timer, &timer_debug_descr); 436 debug_object_free(timer, &timer_debug_descr);
437 } 437 }
438 EXPORT_SYMBOL_GPL(destroy_timer_on_stack); 438 EXPORT_SYMBOL_GPL(destroy_timer_on_stack);
439 439
440 #else 440 #else
441 static inline void debug_timer_init(struct timer_list *timer) { } 441 static inline void debug_timer_init(struct timer_list *timer) { }
442 static inline void debug_timer_activate(struct timer_list *timer) { } 442 static inline void debug_timer_activate(struct timer_list *timer) { }
443 static inline void debug_timer_deactivate(struct timer_list *timer) { } 443 static inline void debug_timer_deactivate(struct timer_list *timer) { }
444 #endif 444 #endif
445 445
446 static void __init_timer(struct timer_list *timer) 446 static void __init_timer(struct timer_list *timer)
447 { 447 {
448 timer->entry.next = NULL; 448 timer->entry.next = NULL;
449 timer->base = __raw_get_cpu_var(tvec_bases); 449 timer->base = __raw_get_cpu_var(tvec_bases);
450 #ifdef CONFIG_TIMER_STATS 450 #ifdef CONFIG_TIMER_STATS
451 timer->start_site = NULL; 451 timer->start_site = NULL;
452 timer->start_pid = -1; 452 timer->start_pid = -1;
453 memset(timer->start_comm, 0, TASK_COMM_LEN); 453 memset(timer->start_comm, 0, TASK_COMM_LEN);
454 #endif 454 #endif
455 } 455 }
456 456
457 /** 457 /**
458 * init_timer - initialize a timer. 458 * init_timer - initialize a timer.
459 * @timer: the timer to be initialized 459 * @timer: the timer to be initialized
460 * 460 *
461 * init_timer() must be done to a timer prior calling *any* of the 461 * init_timer() must be done to a timer prior calling *any* of the
462 * other timer functions. 462 * other timer functions.
463 */ 463 */
464 void init_timer(struct timer_list *timer) 464 void init_timer(struct timer_list *timer)
465 { 465 {
466 debug_timer_init(timer); 466 debug_timer_init(timer);
467 __init_timer(timer); 467 __init_timer(timer);
468 } 468 }
469 EXPORT_SYMBOL(init_timer); 469 EXPORT_SYMBOL(init_timer);
470 470
471 void init_timer_deferrable(struct timer_list *timer) 471 void init_timer_deferrable(struct timer_list *timer)
472 { 472 {
473 init_timer(timer); 473 init_timer(timer);
474 timer_set_deferrable(timer); 474 timer_set_deferrable(timer);
475 } 475 }
476 EXPORT_SYMBOL(init_timer_deferrable); 476 EXPORT_SYMBOL(init_timer_deferrable);
477 477
478 static inline void detach_timer(struct timer_list *timer, 478 static inline void detach_timer(struct timer_list *timer,
479 int clear_pending) 479 int clear_pending)
480 { 480 {
481 struct list_head *entry = &timer->entry; 481 struct list_head *entry = &timer->entry;
482 482
483 debug_timer_deactivate(timer); 483 debug_timer_deactivate(timer);
484 484
485 __list_del(entry->prev, entry->next); 485 __list_del(entry->prev, entry->next);
486 if (clear_pending) 486 if (clear_pending)
487 entry->next = NULL; 487 entry->next = NULL;
488 entry->prev = LIST_POISON2; 488 entry->prev = LIST_POISON2;
489 } 489 }
490 490
491 /* 491 /*
492 * We are using hashed locking: holding per_cpu(tvec_bases).lock 492 * We are using hashed locking: holding per_cpu(tvec_bases).lock
493 * means that all timers which are tied to this base via timer->base are 493 * means that all timers which are tied to this base via timer->base are
494 * locked, and the base itself is locked too. 494 * locked, and the base itself is locked too.
495 * 495 *
496 * So __run_timers/migrate_timers can safely modify all timers which could 496 * So __run_timers/migrate_timers can safely modify all timers which could
497 * be found on ->tvX lists. 497 * be found on ->tvX lists.
498 * 498 *
499 * When the timer's base is locked, and the timer removed from list, it is 499 * When the timer's base is locked, and the timer removed from list, it is
500 * possible to set timer->base = NULL and drop the lock: the timer remains 500 * possible to set timer->base = NULL and drop the lock: the timer remains
501 * locked. 501 * locked.
502 */ 502 */
503 static struct tvec_base *lock_timer_base(struct timer_list *timer, 503 static struct tvec_base *lock_timer_base(struct timer_list *timer,
504 unsigned long *flags) 504 unsigned long *flags)
505 __acquires(timer->base->lock) 505 __acquires(timer->base->lock)
506 { 506 {
507 struct tvec_base *base; 507 struct tvec_base *base;
508 508
509 for (;;) { 509 for (;;) {
510 struct tvec_base *prelock_base = timer->base; 510 struct tvec_base *prelock_base = timer->base;
511 base = tbase_get_base(prelock_base); 511 base = tbase_get_base(prelock_base);
512 if (likely(base != NULL)) { 512 if (likely(base != NULL)) {
513 spin_lock_irqsave(&base->lock, *flags); 513 spin_lock_irqsave(&base->lock, *flags);
514 if (likely(prelock_base == timer->base)) 514 if (likely(prelock_base == timer->base))
515 return base; 515 return base;
516 /* The timer has migrated to another CPU */ 516 /* The timer has migrated to another CPU */
517 spin_unlock_irqrestore(&base->lock, *flags); 517 spin_unlock_irqrestore(&base->lock, *flags);
518 } 518 }
519 cpu_relax(); 519 cpu_relax();
520 } 520 }
521 } 521 }
522 522
523 int __mod_timer(struct timer_list *timer, unsigned long expires) 523 int __mod_timer(struct timer_list *timer, unsigned long expires)
524 { 524 {
525 struct tvec_base *base, *new_base; 525 struct tvec_base *base, *new_base;
526 unsigned long flags; 526 unsigned long flags;
527 int ret = 0; 527 int ret = 0;
528 528
529 timer_stats_timer_set_start_info(timer); 529 timer_stats_timer_set_start_info(timer);
530 BUG_ON(!timer->function); 530 BUG_ON(!timer->function);
531 531
532 base = lock_timer_base(timer, &flags); 532 base = lock_timer_base(timer, &flags);
533 533
534 if (timer_pending(timer)) { 534 if (timer_pending(timer)) {
535 detach_timer(timer, 0); 535 detach_timer(timer, 0);
536 ret = 1; 536 ret = 1;
537 } 537 }
538 538
539 debug_timer_activate(timer); 539 debug_timer_activate(timer);
540 540
541 new_base = __get_cpu_var(tvec_bases); 541 new_base = __get_cpu_var(tvec_bases);
542 542
543 if (base != new_base) { 543 if (base != new_base) {
544 /* 544 /*
545 * We are trying to schedule the timer on the local CPU. 545 * We are trying to schedule the timer on the local CPU.
546 * However we can't change timer's base while it is running, 546 * However we can't change timer's base while it is running,
547 * otherwise del_timer_sync() can't detect that the timer's 547 * otherwise del_timer_sync() can't detect that the timer's
548 * handler yet has not finished. This also guarantees that 548 * handler yet has not finished. This also guarantees that
549 * the timer is serialized wrt itself. 549 * the timer is serialized wrt itself.
550 */ 550 */
551 if (likely(base->running_timer != timer)) { 551 if (likely(base->running_timer != timer)) {
552 /* See the comment in lock_timer_base() */ 552 /* See the comment in lock_timer_base() */
553 timer_set_base(timer, NULL); 553 timer_set_base(timer, NULL);
554 spin_unlock(&base->lock); 554 spin_unlock(&base->lock);
555 base = new_base; 555 base = new_base;
556 spin_lock(&base->lock); 556 spin_lock(&base->lock);
557 timer_set_base(timer, base); 557 timer_set_base(timer, base);
558 } 558 }
559 } 559 }
560 560
561 timer->expires = expires; 561 timer->expires = expires;
562 internal_add_timer(base, timer); 562 internal_add_timer(base, timer);
563 spin_unlock_irqrestore(&base->lock, flags); 563 spin_unlock_irqrestore(&base->lock, flags);
564 564
565 return ret; 565 return ret;
566 } 566 }
567 567
568 EXPORT_SYMBOL(__mod_timer); 568 EXPORT_SYMBOL(__mod_timer);
569 569
570 /** 570 /**
571 * add_timer_on - start a timer on a particular CPU 571 * add_timer_on - start a timer on a particular CPU
572 * @timer: the timer to be added 572 * @timer: the timer to be added
573 * @cpu: the CPU to start it on 573 * @cpu: the CPU to start it on
574 * 574 *
575 * This is not very scalable on SMP. Double adds are not possible. 575 * This is not very scalable on SMP. Double adds are not possible.
576 */ 576 */
577 void add_timer_on(struct timer_list *timer, int cpu) 577 void add_timer_on(struct timer_list *timer, int cpu)
578 { 578 {
579 struct tvec_base *base = per_cpu(tvec_bases, cpu); 579 struct tvec_base *base = per_cpu(tvec_bases, cpu);
580 unsigned long flags; 580 unsigned long flags;
581 581
582 timer_stats_timer_set_start_info(timer); 582 timer_stats_timer_set_start_info(timer);
583 BUG_ON(timer_pending(timer) || !timer->function); 583 BUG_ON(timer_pending(timer) || !timer->function);
584 spin_lock_irqsave(&base->lock, flags); 584 spin_lock_irqsave(&base->lock, flags);
585 timer_set_base(timer, base); 585 timer_set_base(timer, base);
586 debug_timer_activate(timer); 586 debug_timer_activate(timer);
587 internal_add_timer(base, timer); 587 internal_add_timer(base, timer);
588 /* 588 /*
589 * Check whether the other CPU is idle and needs to be 589 * Check whether the other CPU is idle and needs to be
590 * triggered to reevaluate the timer wheel when nohz is 590 * triggered to reevaluate the timer wheel when nohz is
591 * active. We are protected against the other CPU fiddling 591 * active. We are protected against the other CPU fiddling
592 * with the timer by holding the timer base lock. This also 592 * with the timer by holding the timer base lock. This also
593 * makes sure that a CPU on the way to idle can not evaluate 593 * makes sure that a CPU on the way to idle can not evaluate
594 * the timer wheel. 594 * the timer wheel.
595 */ 595 */
596 wake_up_idle_cpu(cpu); 596 wake_up_idle_cpu(cpu);
597 spin_unlock_irqrestore(&base->lock, flags); 597 spin_unlock_irqrestore(&base->lock, flags);
598 } 598 }
599 599
600 /** 600 /**
601 * mod_timer - modify a timer's timeout 601 * mod_timer - modify a timer's timeout
602 * @timer: the timer to be modified 602 * @timer: the timer to be modified
603 * @expires: new timeout in jiffies 603 * @expires: new timeout in jiffies
604 * 604 *
605 * mod_timer() is a more efficient way to update the expire field of an 605 * mod_timer() is a more efficient way to update the expire field of an
606 * active timer (if the timer is inactive it will be activated) 606 * active timer (if the timer is inactive it will be activated)
607 * 607 *
608 * mod_timer(timer, expires) is equivalent to: 608 * mod_timer(timer, expires) is equivalent to:
609 * 609 *
610 * del_timer(timer); timer->expires = expires; add_timer(timer); 610 * del_timer(timer); timer->expires = expires; add_timer(timer);
611 * 611 *
612 * Note that if there are multiple unserialized concurrent users of the 612 * Note that if there are multiple unserialized concurrent users of the
613 * same timer, then mod_timer() is the only safe way to modify the timeout, 613 * same timer, then mod_timer() is the only safe way to modify the timeout,
614 * since add_timer() cannot modify an already running timer. 614 * since add_timer() cannot modify an already running timer.
615 * 615 *
616 * The function returns whether it has modified a pending timer or not. 616 * The function returns whether it has modified a pending timer or not.
617 * (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an 617 * (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an
618 * active timer returns 1.) 618 * active timer returns 1.)
619 */ 619 */
620 int mod_timer(struct timer_list *timer, unsigned long expires) 620 int mod_timer(struct timer_list *timer, unsigned long expires)
621 { 621 {
622 BUG_ON(!timer->function); 622 BUG_ON(!timer->function);
623 623
624 timer_stats_timer_set_start_info(timer); 624 timer_stats_timer_set_start_info(timer);
625 /* 625 /*
626 * This is a common optimization triggered by the 626 * This is a common optimization triggered by the
627 * networking code - if the timer is re-modified 627 * networking code - if the timer is re-modified
628 * to be the same thing then just return: 628 * to be the same thing then just return:
629 */ 629 */
630 if (timer->expires == expires && timer_pending(timer)) 630 if (timer->expires == expires && timer_pending(timer))
631 return 1; 631 return 1;
632 632
633 return __mod_timer(timer, expires); 633 return __mod_timer(timer, expires);
634 } 634 }
635 635
636 EXPORT_SYMBOL(mod_timer); 636 EXPORT_SYMBOL(mod_timer);
637 637
638 /** 638 /**
639 * del_timer - deactive a timer. 639 * del_timer - deactive a timer.
640 * @timer: the timer to be deactivated 640 * @timer: the timer to be deactivated
641 * 641 *
642 * del_timer() deactivates a timer - this works on both active and inactive 642 * del_timer() deactivates a timer - this works on both active and inactive
643 * timers. 643 * timers.
644 * 644 *
645 * The function returns whether it has deactivated a pending timer or not. 645 * The function returns whether it has deactivated a pending timer or not.
646 * (ie. del_timer() of an inactive timer returns 0, del_timer() of an 646 * (ie. del_timer() of an inactive timer returns 0, del_timer() of an
647 * active timer returns 1.) 647 * active timer returns 1.)
648 */ 648 */
649 int del_timer(struct timer_list *timer) 649 int del_timer(struct timer_list *timer)
650 { 650 {
651 struct tvec_base *base; 651 struct tvec_base *base;
652 unsigned long flags; 652 unsigned long flags;
653 int ret = 0; 653 int ret = 0;
654 654
655 timer_stats_timer_clear_start_info(timer); 655 timer_stats_timer_clear_start_info(timer);
656 if (timer_pending(timer)) { 656 if (timer_pending(timer)) {
657 base = lock_timer_base(timer, &flags); 657 base = lock_timer_base(timer, &flags);
658 if (timer_pending(timer)) { 658 if (timer_pending(timer)) {
659 detach_timer(timer, 1); 659 detach_timer(timer, 1);
660 ret = 1; 660 ret = 1;
661 } 661 }
662 spin_unlock_irqrestore(&base->lock, flags); 662 spin_unlock_irqrestore(&base->lock, flags);
663 } 663 }
664 664
665 return ret; 665 return ret;
666 } 666 }
667 667
668 EXPORT_SYMBOL(del_timer); 668 EXPORT_SYMBOL(del_timer);
669 669
670 #ifdef CONFIG_SMP 670 #ifdef CONFIG_SMP
671 /** 671 /**
672 * try_to_del_timer_sync - Try to deactivate a timer 672 * try_to_del_timer_sync - Try to deactivate a timer
673 * @timer: timer do del 673 * @timer: timer do del
674 * 674 *
675 * This function tries to deactivate a timer. Upon successful (ret >= 0) 675 * This function tries to deactivate a timer. Upon successful (ret >= 0)
676 * exit the timer is not queued and the handler is not running on any CPU. 676 * exit the timer is not queued and the handler is not running on any CPU.
677 * 677 *
678 * It must not be called from interrupt contexts. 678 * It must not be called from interrupt contexts.
679 */ 679 */
680 int try_to_del_timer_sync(struct timer_list *timer) 680 int try_to_del_timer_sync(struct timer_list *timer)
681 { 681 {
682 struct tvec_base *base; 682 struct tvec_base *base;
683 unsigned long flags; 683 unsigned long flags;
684 int ret = -1; 684 int ret = -1;
685 685
686 base = lock_timer_base(timer, &flags); 686 base = lock_timer_base(timer, &flags);
687 687
688 if (base->running_timer == timer) 688 if (base->running_timer == timer)
689 goto out; 689 goto out;
690 690
691 ret = 0; 691 ret = 0;
692 if (timer_pending(timer)) { 692 if (timer_pending(timer)) {
693 detach_timer(timer, 1); 693 detach_timer(timer, 1);
694 ret = 1; 694 ret = 1;
695 } 695 }
696 out: 696 out:
697 spin_unlock_irqrestore(&base->lock, flags); 697 spin_unlock_irqrestore(&base->lock, flags);
698 698
699 return ret; 699 return ret;
700 } 700 }
701 701
702 EXPORT_SYMBOL(try_to_del_timer_sync); 702 EXPORT_SYMBOL(try_to_del_timer_sync);
703 703
704 /** 704 /**
705 * del_timer_sync - deactivate a timer and wait for the handler to finish. 705 * del_timer_sync - deactivate a timer and wait for the handler to finish.
706 * @timer: the timer to be deactivated 706 * @timer: the timer to be deactivated
707 * 707 *
708 * This function only differs from del_timer() on SMP: besides deactivating 708 * This function only differs from del_timer() on SMP: besides deactivating
709 * the timer it also makes sure the handler has finished executing on other 709 * the timer it also makes sure the handler has finished executing on other
710 * CPUs. 710 * CPUs.
711 * 711 *
712 * Synchronization rules: Callers must prevent restarting of the timer, 712 * Synchronization rules: Callers must prevent restarting of the timer,
713 * otherwise this function is meaningless. It must not be called from 713 * otherwise this function is meaningless. It must not be called from
714 * interrupt contexts. The caller must not hold locks which would prevent 714 * interrupt contexts. The caller must not hold locks which would prevent
715 * completion of the timer's handler. The timer's handler must not call 715 * completion of the timer's handler. The timer's handler must not call
716 * add_timer_on(). Upon exit the timer is not queued and the handler is 716 * add_timer_on(). Upon exit the timer is not queued and the handler is
717 * not running on any CPU. 717 * not running on any CPU.
718 * 718 *
719 * The function returns whether it has deactivated a pending timer or not. 719 * The function returns whether it has deactivated a pending timer or not.
720 */ 720 */
721 int del_timer_sync(struct timer_list *timer) 721 int del_timer_sync(struct timer_list *timer)
722 { 722 {
723 for (;;) { 723 for (;;) {
724 int ret = try_to_del_timer_sync(timer); 724 int ret = try_to_del_timer_sync(timer);
725 if (ret >= 0) 725 if (ret >= 0)
726 return ret; 726 return ret;
727 cpu_relax(); 727 cpu_relax();
728 } 728 }
729 } 729 }
730 730
731 EXPORT_SYMBOL(del_timer_sync); 731 EXPORT_SYMBOL(del_timer_sync);
732 #endif 732 #endif
733 733
734 static int cascade(struct tvec_base *base, struct tvec *tv, int index) 734 static int cascade(struct tvec_base *base, struct tvec *tv, int index)
735 { 735 {
736 /* cascade all the timers from tv up one level */ 736 /* cascade all the timers from tv up one level */
737 struct timer_list *timer, *tmp; 737 struct timer_list *timer, *tmp;
738 struct list_head tv_list; 738 struct list_head tv_list;
739 739
740 list_replace_init(tv->vec + index, &tv_list); 740 list_replace_init(tv->vec + index, &tv_list);
741 741
742 /* 742 /*
743 * We are removing _all_ timers from the list, so we 743 * We are removing _all_ timers from the list, so we
744 * don't have to detach them individually. 744 * don't have to detach them individually.
745 */ 745 */
746 list_for_each_entry_safe(timer, tmp, &tv_list, entry) { 746 list_for_each_entry_safe(timer, tmp, &tv_list, entry) {
747 BUG_ON(tbase_get_base(timer->base) != base); 747 BUG_ON(tbase_get_base(timer->base) != base);
748 internal_add_timer(base, timer); 748 internal_add_timer(base, timer);
749 } 749 }
750 750
751 return index; 751 return index;
752 } 752 }
753 753
754 #define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) 754 #define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK)
755 755
756 /** 756 /**
757 * __run_timers - run all expired timers (if any) on this CPU. 757 * __run_timers - run all expired timers (if any) on this CPU.
758 * @base: the timer vector to be processed. 758 * @base: the timer vector to be processed.
759 * 759 *
760 * This function cascades all vectors and executes all expired timer 760 * This function cascades all vectors and executes all expired timer
761 * vectors. 761 * vectors.
762 */ 762 */
763 static inline void __run_timers(struct tvec_base *base) 763 static inline void __run_timers(struct tvec_base *base)
764 { 764 {
765 struct timer_list *timer; 765 struct timer_list *timer;
766 766
767 spin_lock_irq(&base->lock); 767 spin_lock_irq(&base->lock);
768 while (time_after_eq(jiffies, base->timer_jiffies)) { 768 while (time_after_eq(jiffies, base->timer_jiffies)) {
769 struct list_head work_list; 769 struct list_head work_list;
770 struct list_head *head = &work_list; 770 struct list_head *head = &work_list;
771 int index = base->timer_jiffies & TVR_MASK; 771 int index = base->timer_jiffies & TVR_MASK;
772 772
773 /* 773 /*
774 * Cascade timers: 774 * Cascade timers:
775 */ 775 */
776 if (!index && 776 if (!index &&
777 (!cascade(base, &base->tv2, INDEX(0))) && 777 (!cascade(base, &base->tv2, INDEX(0))) &&
778 (!cascade(base, &base->tv3, INDEX(1))) && 778 (!cascade(base, &base->tv3, INDEX(1))) &&
779 !cascade(base, &base->tv4, INDEX(2))) 779 !cascade(base, &base->tv4, INDEX(2)))
780 cascade(base, &base->tv5, INDEX(3)); 780 cascade(base, &base->tv5, INDEX(3));
781 ++base->timer_jiffies; 781 ++base->timer_jiffies;
782 list_replace_init(base->tv1.vec + index, &work_list); 782 list_replace_init(base->tv1.vec + index, &work_list);
783 while (!list_empty(head)) { 783 while (!list_empty(head)) {
784 void (*fn)(unsigned long); 784 void (*fn)(unsigned long);
785 unsigned long data; 785 unsigned long data;
786 786
787 timer = list_first_entry(head, struct timer_list,entry); 787 timer = list_first_entry(head, struct timer_list,entry);
788 fn = timer->function; 788 fn = timer->function;
789 data = timer->data; 789 data = timer->data;
790 790
791 timer_stats_account_timer(timer); 791 timer_stats_account_timer(timer);
792 792
793 set_running_timer(base, timer); 793 set_running_timer(base, timer);
794 detach_timer(timer, 1); 794 detach_timer(timer, 1);
795 spin_unlock_irq(&base->lock); 795 spin_unlock_irq(&base->lock);
796 { 796 {
797 int preempt_count = preempt_count(); 797 int preempt_count = preempt_count();
798 fn(data); 798 fn(data);
799 if (preempt_count != preempt_count()) { 799 if (preempt_count != preempt_count()) {
800 printk(KERN_ERR "huh, entered %p " 800 printk(KERN_ERR "huh, entered %p "
801 "with preempt_count %08x, exited" 801 "with preempt_count %08x, exited"
802 " with %08x?\n", 802 " with %08x?\n",
803 fn, preempt_count, 803 fn, preempt_count,
804 preempt_count()); 804 preempt_count());
805 BUG(); 805 BUG();
806 } 806 }
807 } 807 }
808 spin_lock_irq(&base->lock); 808 spin_lock_irq(&base->lock);
809 } 809 }
810 } 810 }
811 set_running_timer(base, NULL); 811 set_running_timer(base, NULL);
812 spin_unlock_irq(&base->lock); 812 spin_unlock_irq(&base->lock);
813 } 813 }
814 814
815 #ifdef CONFIG_NO_HZ 815 #ifdef CONFIG_NO_HZ
816 /* 816 /*
817 * Find out when the next timer event is due to happen. This 817 * Find out when the next timer event is due to happen. This
818 * is used on S/390 to stop all activity when a cpus is idle. 818 * is used on S/390 to stop all activity when a cpus is idle.
819 * This functions needs to be called disabled. 819 * This functions needs to be called disabled.
820 */ 820 */
821 static unsigned long __next_timer_interrupt(struct tvec_base *base) 821 static unsigned long __next_timer_interrupt(struct tvec_base *base)
822 { 822 {
823 unsigned long timer_jiffies = base->timer_jiffies; 823 unsigned long timer_jiffies = base->timer_jiffies;
824 unsigned long expires = timer_jiffies + NEXT_TIMER_MAX_DELTA; 824 unsigned long expires = timer_jiffies + NEXT_TIMER_MAX_DELTA;
825 int index, slot, array, found = 0; 825 int index, slot, array, found = 0;
826 struct timer_list *nte; 826 struct timer_list *nte;
827 struct tvec *varray[4]; 827 struct tvec *varray[4];
828 828
829 /* Look for timer events in tv1. */ 829 /* Look for timer events in tv1. */
830 index = slot = timer_jiffies & TVR_MASK; 830 index = slot = timer_jiffies & TVR_MASK;
831 do { 831 do {
832 list_for_each_entry(nte, base->tv1.vec + slot, entry) { 832 list_for_each_entry(nte, base->tv1.vec + slot, entry) {
833 if (tbase_get_deferrable(nte->base)) 833 if (tbase_get_deferrable(nte->base))
834 continue; 834 continue;
835 835
836 found = 1; 836 found = 1;
837 expires = nte->expires; 837 expires = nte->expires;
838 /* Look at the cascade bucket(s)? */ 838 /* Look at the cascade bucket(s)? */
839 if (!index || slot < index) 839 if (!index || slot < index)
840 goto cascade; 840 goto cascade;
841 return expires; 841 return expires;
842 } 842 }
843 slot = (slot + 1) & TVR_MASK; 843 slot = (slot + 1) & TVR_MASK;
844 } while (slot != index); 844 } while (slot != index);
845 845
846 cascade: 846 cascade:
847 /* Calculate the next cascade event */ 847 /* Calculate the next cascade event */
848 if (index) 848 if (index)
849 timer_jiffies += TVR_SIZE - index; 849 timer_jiffies += TVR_SIZE - index;
850 timer_jiffies >>= TVR_BITS; 850 timer_jiffies >>= TVR_BITS;
851 851
852 /* Check tv2-tv5. */ 852 /* Check tv2-tv5. */
853 varray[0] = &base->tv2; 853 varray[0] = &base->tv2;
854 varray[1] = &base->tv3; 854 varray[1] = &base->tv3;
855 varray[2] = &base->tv4; 855 varray[2] = &base->tv4;
856 varray[3] = &base->tv5; 856 varray[3] = &base->tv5;
857 857
858 for (array = 0; array < 4; array++) { 858 for (array = 0; array < 4; array++) {
859 struct tvec *varp = varray[array]; 859 struct tvec *varp = varray[array];
860 860
861 index = slot = timer_jiffies & TVN_MASK; 861 index = slot = timer_jiffies & TVN_MASK;
862 do { 862 do {
863 list_for_each_entry(nte, varp->vec + slot, entry) { 863 list_for_each_entry(nte, varp->vec + slot, entry) {
864 found = 1; 864 found = 1;
865 if (time_before(nte->expires, expires)) 865 if (time_before(nte->expires, expires))
866 expires = nte->expires; 866 expires = nte->expires;
867 } 867 }
868 /* 868 /*
869 * Do we still search for the first timer or are 869 * Do we still search for the first timer or are
870 * we looking up the cascade buckets ? 870 * we looking up the cascade buckets ?
871 */ 871 */
872 if (found) { 872 if (found) {
873 /* Look at the cascade bucket(s)? */ 873 /* Look at the cascade bucket(s)? */
874 if (!index || slot < index) 874 if (!index || slot < index)
875 break; 875 break;
876 return expires; 876 return expires;
877 } 877 }
878 slot = (slot + 1) & TVN_MASK; 878 slot = (slot + 1) & TVN_MASK;
879 } while (slot != index); 879 } while (slot != index);
880 880
881 if (index) 881 if (index)
882 timer_jiffies += TVN_SIZE - index; 882 timer_jiffies += TVN_SIZE - index;
883 timer_jiffies >>= TVN_BITS; 883 timer_jiffies >>= TVN_BITS;
884 } 884 }
885 return expires; 885 return expires;
886 } 886 }
887 887
888 /* 888 /*
889 * Check, if the next hrtimer event is before the next timer wheel 889 * Check, if the next hrtimer event is before the next timer wheel
890 * event: 890 * event:
891 */ 891 */
892 static unsigned long cmp_next_hrtimer_event(unsigned long now, 892 static unsigned long cmp_next_hrtimer_event(unsigned long now,
893 unsigned long expires) 893 unsigned long expires)
894 { 894 {
895 ktime_t hr_delta = hrtimer_get_next_event(); 895 ktime_t hr_delta = hrtimer_get_next_event();
896 struct timespec tsdelta; 896 struct timespec tsdelta;
897 unsigned long delta; 897 unsigned long delta;
898 898
899 if (hr_delta.tv64 == KTIME_MAX) 899 if (hr_delta.tv64 == KTIME_MAX)
900 return expires; 900 return expires;
901 901
902 /* 902 /*
903 * Expired timer available, let it expire in the next tick 903 * Expired timer available, let it expire in the next tick
904 */ 904 */
905 if (hr_delta.tv64 <= 0) 905 if (hr_delta.tv64 <= 0)
906 return now + 1; 906 return now + 1;
907 907
908 tsdelta = ktime_to_timespec(hr_delta); 908 tsdelta = ktime_to_timespec(hr_delta);
909 delta = timespec_to_jiffies(&tsdelta); 909 delta = timespec_to_jiffies(&tsdelta);
910 910
911 /* 911 /*
912 * Limit the delta to the max value, which is checked in 912 * Limit the delta to the max value, which is checked in
913 * tick_nohz_stop_sched_tick(): 913 * tick_nohz_stop_sched_tick():
914 */ 914 */
915 if (delta > NEXT_TIMER_MAX_DELTA) 915 if (delta > NEXT_TIMER_MAX_DELTA)
916 delta = NEXT_TIMER_MAX_DELTA; 916 delta = NEXT_TIMER_MAX_DELTA;
917 917
918 /* 918 /*
919 * Take rounding errors in to account and make sure, that it 919 * Take rounding errors in to account and make sure, that it
920 * expires in the next tick. Otherwise we go into an endless 920 * expires in the next tick. Otherwise we go into an endless
921 * ping pong due to tick_nohz_stop_sched_tick() retriggering 921 * ping pong due to tick_nohz_stop_sched_tick() retriggering
922 * the timer softirq 922 * the timer softirq
923 */ 923 */
924 if (delta < 1) 924 if (delta < 1)
925 delta = 1; 925 delta = 1;
926 now += delta; 926 now += delta;
927 if (time_before(now, expires)) 927 if (time_before(now, expires))
928 return now; 928 return now;
929 return expires; 929 return expires;
930 } 930 }
931 931
932 /** 932 /**
933 * get_next_timer_interrupt - return the jiffy of the next pending timer 933 * get_next_timer_interrupt - return the jiffy of the next pending timer
934 * @now: current time (in jiffies) 934 * @now: current time (in jiffies)
935 */ 935 */
936 unsigned long get_next_timer_interrupt(unsigned long now) 936 unsigned long get_next_timer_interrupt(unsigned long now)
937 { 937 {
938 struct tvec_base *base = __get_cpu_var(tvec_bases); 938 struct tvec_base *base = __get_cpu_var(tvec_bases);
939 unsigned long expires; 939 unsigned long expires;
940 940
941 spin_lock(&base->lock); 941 spin_lock(&base->lock);
942 expires = __next_timer_interrupt(base); 942 expires = __next_timer_interrupt(base);
943 spin_unlock(&base->lock); 943 spin_unlock(&base->lock);
944 944
945 if (time_before_eq(expires, now)) 945 if (time_before_eq(expires, now))
946 return now; 946 return now;
947 947
948 return cmp_next_hrtimer_event(now, expires); 948 return cmp_next_hrtimer_event(now, expires);
949 } 949 }
950 #endif 950 #endif
951 951
952 #ifndef CONFIG_VIRT_CPU_ACCOUNTING 952 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
953 void account_process_tick(struct task_struct *p, int user_tick) 953 void account_process_tick(struct task_struct *p, int user_tick)
954 { 954 {
955 cputime_t one_jiffy = jiffies_to_cputime(1); 955 cputime_t one_jiffy = jiffies_to_cputime(1);
956 956
957 if (user_tick) { 957 if (user_tick) {
958 account_user_time(p, one_jiffy); 958 account_user_time(p, one_jiffy);
959 account_user_time_scaled(p, cputime_to_scaled(one_jiffy)); 959 account_user_time_scaled(p, cputime_to_scaled(one_jiffy));
960 } else { 960 } else {
961 account_system_time(p, HARDIRQ_OFFSET, one_jiffy); 961 account_system_time(p, HARDIRQ_OFFSET, one_jiffy);
962 account_system_time_scaled(p, cputime_to_scaled(one_jiffy)); 962 account_system_time_scaled(p, cputime_to_scaled(one_jiffy));
963 } 963 }
964 } 964 }
965 #endif 965 #endif
966 966
967 /* 967 /*
968 * Called from the timer interrupt handler to charge one tick to the current 968 * Called from the timer interrupt handler to charge one tick to the current
969 * process. user_tick is 1 if the tick is user time, 0 for system. 969 * process. user_tick is 1 if the tick is user time, 0 for system.
970 */ 970 */
971 void update_process_times(int user_tick) 971 void update_process_times(int user_tick)
972 { 972 {
973 struct task_struct *p = current; 973 struct task_struct *p = current;
974 int cpu = smp_processor_id(); 974 int cpu = smp_processor_id();
975 975
976 /* Note: this timer irq context must be accounted for as well. */ 976 /* Note: this timer irq context must be accounted for as well. */
977 account_process_tick(p, user_tick); 977 account_process_tick(p, user_tick);
978 run_local_timers(); 978 run_local_timers();
979 if (rcu_pending(cpu)) 979 if (rcu_pending(cpu))
980 rcu_check_callbacks(cpu, user_tick); 980 rcu_check_callbacks(cpu, user_tick);
981 printk_tick();
981 scheduler_tick(); 982 scheduler_tick();
982 run_posix_cpu_timers(p); 983 run_posix_cpu_timers(p);
983 } 984 }
984 985
985 /* 986 /*
986 * Nr of active tasks - counted in fixed-point numbers 987 * Nr of active tasks - counted in fixed-point numbers
987 */ 988 */
988 static unsigned long count_active_tasks(void) 989 static unsigned long count_active_tasks(void)
989 { 990 {
990 return nr_active() * FIXED_1; 991 return nr_active() * FIXED_1;
991 } 992 }
992 993
993 /* 994 /*
994 * Hmm.. Changed this, as the GNU make sources (load.c) seems to 995 * Hmm.. Changed this, as the GNU make sources (load.c) seems to
995 * imply that avenrun[] is the standard name for this kind of thing. 996 * imply that avenrun[] is the standard name for this kind of thing.
996 * Nothing else seems to be standardized: the fractional size etc 997 * Nothing else seems to be standardized: the fractional size etc
997 * all seem to differ on different machines. 998 * all seem to differ on different machines.
998 * 999 *
999 * Requires xtime_lock to access. 1000 * Requires xtime_lock to access.
1000 */ 1001 */
1001 unsigned long avenrun[3]; 1002 unsigned long avenrun[3];
1002 1003
1003 EXPORT_SYMBOL(avenrun); 1004 EXPORT_SYMBOL(avenrun);
1004 1005
1005 /* 1006 /*
1006 * calc_load - given tick count, update the avenrun load estimates. 1007 * calc_load - given tick count, update the avenrun load estimates.
1007 * This is called while holding a write_lock on xtime_lock. 1008 * This is called while holding a write_lock on xtime_lock.
1008 */ 1009 */
1009 static inline void calc_load(unsigned long ticks) 1010 static inline void calc_load(unsigned long ticks)
1010 { 1011 {
1011 unsigned long active_tasks; /* fixed-point */ 1012 unsigned long active_tasks; /* fixed-point */
1012 static int count = LOAD_FREQ; 1013 static int count = LOAD_FREQ;
1013 1014
1014 count -= ticks; 1015 count -= ticks;
1015 if (unlikely(count < 0)) { 1016 if (unlikely(count < 0)) {
1016 active_tasks = count_active_tasks(); 1017 active_tasks = count_active_tasks();
1017 do { 1018 do {
1018 CALC_LOAD(avenrun[0], EXP_1, active_tasks); 1019 CALC_LOAD(avenrun[0], EXP_1, active_tasks);
1019 CALC_LOAD(avenrun[1], EXP_5, active_tasks); 1020 CALC_LOAD(avenrun[1], EXP_5, active_tasks);
1020 CALC_LOAD(avenrun[2], EXP_15, active_tasks); 1021 CALC_LOAD(avenrun[2], EXP_15, active_tasks);
1021 count += LOAD_FREQ; 1022 count += LOAD_FREQ;
1022 } while (count < 0); 1023 } while (count < 0);
1023 } 1024 }
1024 } 1025 }
1025 1026
1026 /* 1027 /*
1027 * This function runs timers and the timer-tq in bottom half context. 1028 * This function runs timers and the timer-tq in bottom half context.
1028 */ 1029 */
1029 static void run_timer_softirq(struct softirq_action *h) 1030 static void run_timer_softirq(struct softirq_action *h)
1030 { 1031 {
1031 struct tvec_base *base = __get_cpu_var(tvec_bases); 1032 struct tvec_base *base = __get_cpu_var(tvec_bases);
1032 1033
1033 hrtimer_run_pending(); 1034 hrtimer_run_pending();
1034 1035
1035 if (time_after_eq(jiffies, base->timer_jiffies)) 1036 if (time_after_eq(jiffies, base->timer_jiffies))
1036 __run_timers(base); 1037 __run_timers(base);
1037 } 1038 }
1038 1039
1039 /* 1040 /*
1040 * Called by the local, per-CPU timer interrupt on SMP. 1041 * Called by the local, per-CPU timer interrupt on SMP.
1041 */ 1042 */
1042 void run_local_timers(void) 1043 void run_local_timers(void)
1043 { 1044 {
1044 hrtimer_run_queues(); 1045 hrtimer_run_queues();
1045 raise_softirq(TIMER_SOFTIRQ); 1046 raise_softirq(TIMER_SOFTIRQ);
1046 softlockup_tick(); 1047 softlockup_tick();
1047 } 1048 }
1048 1049
1049 /* 1050 /*
1050 * Called by the timer interrupt. xtime_lock must already be taken 1051 * Called by the timer interrupt. xtime_lock must already be taken
1051 * by the timer IRQ! 1052 * by the timer IRQ!
1052 */ 1053 */
1053 static inline void update_times(unsigned long ticks) 1054 static inline void update_times(unsigned long ticks)
1054 { 1055 {
1055 update_wall_time(); 1056 update_wall_time();
1056 calc_load(ticks); 1057 calc_load(ticks);
1057 } 1058 }
1058 1059
1059 /* 1060 /*
1060 * The 64-bit jiffies value is not atomic - you MUST NOT read it 1061 * The 64-bit jiffies value is not atomic - you MUST NOT read it
1061 * without sampling the sequence number in xtime_lock. 1062 * without sampling the sequence number in xtime_lock.
1062 * jiffies is defined in the linker script... 1063 * jiffies is defined in the linker script...
1063 */ 1064 */
1064 1065
1065 void do_timer(unsigned long ticks) 1066 void do_timer(unsigned long ticks)
1066 { 1067 {
1067 jiffies_64 += ticks; 1068 jiffies_64 += ticks;
1068 update_times(ticks); 1069 update_times(ticks);
1069 } 1070 }
1070 1071
1071 #ifdef __ARCH_WANT_SYS_ALARM 1072 #ifdef __ARCH_WANT_SYS_ALARM
1072 1073
1073 /* 1074 /*
1074 * For backwards compatibility? This can be done in libc so Alpha 1075 * For backwards compatibility? This can be done in libc so Alpha
1075 * and all newer ports shouldn't need it. 1076 * and all newer ports shouldn't need it.
1076 */ 1077 */
1077 asmlinkage unsigned long sys_alarm(unsigned int seconds) 1078 asmlinkage unsigned long sys_alarm(unsigned int seconds)
1078 { 1079 {
1079 return alarm_setitimer(seconds); 1080 return alarm_setitimer(seconds);
1080 } 1081 }
1081 1082
1082 #endif 1083 #endif
1083 1084
1084 #ifndef __alpha__ 1085 #ifndef __alpha__
1085 1086
1086 /* 1087 /*
1087 * The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this 1088 * The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this
1088 * should be moved into arch/i386 instead? 1089 * should be moved into arch/i386 instead?
1089 */ 1090 */
1090 1091
1091 /** 1092 /**
1092 * sys_getpid - return the thread group id of the current process 1093 * sys_getpid - return the thread group id of the current process
1093 * 1094 *
1094 * Note, despite the name, this returns the tgid not the pid. The tgid and 1095 * Note, despite the name, this returns the tgid not the pid. The tgid and
1095 * the pid are identical unless CLONE_THREAD was specified on clone() in 1096 * the pid are identical unless CLONE_THREAD was specified on clone() in
1096 * which case the tgid is the same in all threads of the same group. 1097 * which case the tgid is the same in all threads of the same group.
1097 * 1098 *
1098 * This is SMP safe as current->tgid does not change. 1099 * This is SMP safe as current->tgid does not change.
1099 */ 1100 */
1100 asmlinkage long sys_getpid(void) 1101 asmlinkage long sys_getpid(void)
1101 { 1102 {
1102 return task_tgid_vnr(current); 1103 return task_tgid_vnr(current);
1103 } 1104 }
1104 1105
1105 /* 1106 /*
1106 * Accessing ->real_parent is not SMP-safe, it could 1107 * Accessing ->real_parent is not SMP-safe, it could
1107 * change from under us. However, we can use a stale 1108 * change from under us. However, we can use a stale
1108 * value of ->real_parent under rcu_read_lock(), see 1109 * value of ->real_parent under rcu_read_lock(), see
1109 * release_task()->call_rcu(delayed_put_task_struct). 1110 * release_task()->call_rcu(delayed_put_task_struct).
1110 */ 1111 */
1111 asmlinkage long sys_getppid(void) 1112 asmlinkage long sys_getppid(void)
1112 { 1113 {
1113 int pid; 1114 int pid;
1114 1115
1115 rcu_read_lock(); 1116 rcu_read_lock();
1116 pid = task_tgid_vnr(current->real_parent); 1117 pid = task_tgid_vnr(current->real_parent);
1117 rcu_read_unlock(); 1118 rcu_read_unlock();
1118 1119
1119 return pid; 1120 return pid;
1120 } 1121 }
1121 1122
1122 asmlinkage long sys_getuid(void) 1123 asmlinkage long sys_getuid(void)
1123 { 1124 {
1124 /* Only we change this so SMP safe */ 1125 /* Only we change this so SMP safe */
1125 return current->uid; 1126 return current->uid;
1126 } 1127 }
1127 1128
1128 asmlinkage long sys_geteuid(void) 1129 asmlinkage long sys_geteuid(void)
1129 { 1130 {
1130 /* Only we change this so SMP safe */ 1131 /* Only we change this so SMP safe */
1131 return current->euid; 1132 return current->euid;
1132 } 1133 }
1133 1134
1134 asmlinkage long sys_getgid(void) 1135 asmlinkage long sys_getgid(void)
1135 { 1136 {
1136 /* Only we change this so SMP safe */ 1137 /* Only we change this so SMP safe */
1137 return current->gid; 1138 return current->gid;
1138 } 1139 }
1139 1140
1140 asmlinkage long sys_getegid(void) 1141 asmlinkage long sys_getegid(void)
1141 { 1142 {
1142 /* Only we change this so SMP safe */ 1143 /* Only we change this so SMP safe */
1143 return current->egid; 1144 return current->egid;
1144 } 1145 }
1145 1146
1146 #endif 1147 #endif
1147 1148
1148 static void process_timeout(unsigned long __data) 1149 static void process_timeout(unsigned long __data)
1149 { 1150 {
1150 wake_up_process((struct task_struct *)__data); 1151 wake_up_process((struct task_struct *)__data);
1151 } 1152 }
1152 1153
1153 /** 1154 /**
1154 * schedule_timeout - sleep until timeout 1155 * schedule_timeout - sleep until timeout
1155 * @timeout: timeout value in jiffies 1156 * @timeout: timeout value in jiffies
1156 * 1157 *
1157 * Make the current task sleep until @timeout jiffies have 1158 * Make the current task sleep until @timeout jiffies have
1158 * elapsed. The routine will return immediately unless 1159 * elapsed. The routine will return immediately unless
1159 * the current task state has been set (see set_current_state()). 1160 * the current task state has been set (see set_current_state()).
1160 * 1161 *
1161 * You can set the task state as follows - 1162 * You can set the task state as follows -
1162 * 1163 *
1163 * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to 1164 * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
1164 * pass before the routine returns. The routine will return 0 1165 * pass before the routine returns. The routine will return 0
1165 * 1166 *
1166 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is 1167 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
1167 * delivered to the current task. In this case the remaining time 1168 * delivered to the current task. In this case the remaining time
1168 * in jiffies will be returned, or 0 if the timer expired in time 1169 * in jiffies will be returned, or 0 if the timer expired in time
1169 * 1170 *
1170 * The current task state is guaranteed to be TASK_RUNNING when this 1171 * The current task state is guaranteed to be TASK_RUNNING when this
1171 * routine returns. 1172 * routine returns.
1172 * 1173 *
1173 * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule 1174 * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
1174 * the CPU away without a bound on the timeout. In this case the return 1175 * the CPU away without a bound on the timeout. In this case the return
1175 * value will be %MAX_SCHEDULE_TIMEOUT. 1176 * value will be %MAX_SCHEDULE_TIMEOUT.
1176 * 1177 *
1177 * In all cases the return value is guaranteed to be non-negative. 1178 * In all cases the return value is guaranteed to be non-negative.
1178 */ 1179 */
1179 signed long __sched schedule_timeout(signed long timeout) 1180 signed long __sched schedule_timeout(signed long timeout)
1180 { 1181 {
1181 struct timer_list timer; 1182 struct timer_list timer;
1182 unsigned long expire; 1183 unsigned long expire;
1183 1184
1184 switch (timeout) 1185 switch (timeout)
1185 { 1186 {
1186 case MAX_SCHEDULE_TIMEOUT: 1187 case MAX_SCHEDULE_TIMEOUT:
1187 /* 1188 /*
1188 * These two special cases are useful to be comfortable 1189 * These two special cases are useful to be comfortable
1189 * in the caller. Nothing more. We could take 1190 * in the caller. Nothing more. We could take
1190 * MAX_SCHEDULE_TIMEOUT from one of the negative value 1191 * MAX_SCHEDULE_TIMEOUT from one of the negative value
1191 * but I' d like to return a valid offset (>=0) to allow 1192 * but I' d like to return a valid offset (>=0) to allow
1192 * the caller to do everything it want with the retval. 1193 * the caller to do everything it want with the retval.
1193 */ 1194 */
1194 schedule(); 1195 schedule();
1195 goto out; 1196 goto out;
1196 default: 1197 default:
1197 /* 1198 /*
1198 * Another bit of PARANOID. Note that the retval will be 1199 * Another bit of PARANOID. Note that the retval will be
1199 * 0 since no piece of kernel is supposed to do a check 1200 * 0 since no piece of kernel is supposed to do a check
1200 * for a negative retval of schedule_timeout() (since it 1201 * for a negative retval of schedule_timeout() (since it
1201 * should never happens anyway). You just have the printk() 1202 * should never happens anyway). You just have the printk()
1202 * that will tell you if something is gone wrong and where. 1203 * that will tell you if something is gone wrong and where.
1203 */ 1204 */
1204 if (timeout < 0) { 1205 if (timeout < 0) {
1205 printk(KERN_ERR "schedule_timeout: wrong timeout " 1206 printk(KERN_ERR "schedule_timeout: wrong timeout "
1206 "value %lx\n", timeout); 1207 "value %lx\n", timeout);
1207 dump_stack(); 1208 dump_stack();
1208 current->state = TASK_RUNNING; 1209 current->state = TASK_RUNNING;
1209 goto out; 1210 goto out;
1210 } 1211 }
1211 } 1212 }
1212 1213
1213 expire = timeout + jiffies; 1214 expire = timeout + jiffies;
1214 1215
1215 setup_timer_on_stack(&timer, process_timeout, (unsigned long)current); 1216 setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);
1216 __mod_timer(&timer, expire); 1217 __mod_timer(&timer, expire);
1217 schedule(); 1218 schedule();
1218 del_singleshot_timer_sync(&timer); 1219 del_singleshot_timer_sync(&timer);
1219 1220
1220 /* Remove the timer from the object tracker */ 1221 /* Remove the timer from the object tracker */
1221 destroy_timer_on_stack(&timer); 1222 destroy_timer_on_stack(&timer);
1222 1223
1223 timeout = expire - jiffies; 1224 timeout = expire - jiffies;
1224 1225
1225 out: 1226 out:
1226 return timeout < 0 ? 0 : timeout; 1227 return timeout < 0 ? 0 : timeout;
1227 } 1228 }
1228 EXPORT_SYMBOL(schedule_timeout); 1229 EXPORT_SYMBOL(schedule_timeout);
1229 1230
1230 /* 1231 /*
1231 * We can use __set_current_state() here because schedule_timeout() calls 1232 * We can use __set_current_state() here because schedule_timeout() calls
1232 * schedule() unconditionally. 1233 * schedule() unconditionally.
1233 */ 1234 */
1234 signed long __sched schedule_timeout_interruptible(signed long timeout) 1235 signed long __sched schedule_timeout_interruptible(signed long timeout)
1235 { 1236 {
1236 __set_current_state(TASK_INTERRUPTIBLE); 1237 __set_current_state(TASK_INTERRUPTIBLE);
1237 return schedule_timeout(timeout); 1238 return schedule_timeout(timeout);
1238 } 1239 }
1239 EXPORT_SYMBOL(schedule_timeout_interruptible); 1240 EXPORT_SYMBOL(schedule_timeout_interruptible);
1240 1241
1241 signed long __sched schedule_timeout_killable(signed long timeout) 1242 signed long __sched schedule_timeout_killable(signed long timeout)
1242 { 1243 {
1243 __set_current_state(TASK_KILLABLE); 1244 __set_current_state(TASK_KILLABLE);
1244 return schedule_timeout(timeout); 1245 return schedule_timeout(timeout);
1245 } 1246 }
1246 EXPORT_SYMBOL(schedule_timeout_killable); 1247 EXPORT_SYMBOL(schedule_timeout_killable);
1247 1248
1248 signed long __sched schedule_timeout_uninterruptible(signed long timeout) 1249 signed long __sched schedule_timeout_uninterruptible(signed long timeout)
1249 { 1250 {
1250 __set_current_state(TASK_UNINTERRUPTIBLE); 1251 __set_current_state(TASK_UNINTERRUPTIBLE);
1251 return schedule_timeout(timeout); 1252 return schedule_timeout(timeout);
1252 } 1253 }
1253 EXPORT_SYMBOL(schedule_timeout_uninterruptible); 1254 EXPORT_SYMBOL(schedule_timeout_uninterruptible);
1254 1255
1255 /* Thread ID - the internal kernel "pid" */ 1256 /* Thread ID - the internal kernel "pid" */
1256 asmlinkage long sys_gettid(void) 1257 asmlinkage long sys_gettid(void)
1257 { 1258 {
1258 return task_pid_vnr(current); 1259 return task_pid_vnr(current);
1259 } 1260 }
1260 1261
1261 /** 1262 /**
1262 * do_sysinfo - fill in sysinfo struct 1263 * do_sysinfo - fill in sysinfo struct
1263 * @info: pointer to buffer to fill 1264 * @info: pointer to buffer to fill
1264 */ 1265 */
1265 int do_sysinfo(struct sysinfo *info) 1266 int do_sysinfo(struct sysinfo *info)
1266 { 1267 {
1267 unsigned long mem_total, sav_total; 1268 unsigned long mem_total, sav_total;
1268 unsigned int mem_unit, bitcount; 1269 unsigned int mem_unit, bitcount;
1269 unsigned long seq; 1270 unsigned long seq;
1270 1271
1271 memset(info, 0, sizeof(struct sysinfo)); 1272 memset(info, 0, sizeof(struct sysinfo));
1272 1273
1273 do { 1274 do {
1274 struct timespec tp; 1275 struct timespec tp;
1275 seq = read_seqbegin(&xtime_lock); 1276 seq = read_seqbegin(&xtime_lock);
1276 1277
1277 /* 1278 /*
1278 * This is annoying. The below is the same thing 1279 * This is annoying. The below is the same thing
1279 * posix_get_clock_monotonic() does, but it wants to 1280 * posix_get_clock_monotonic() does, but it wants to
1280 * take the lock which we want to cover the loads stuff 1281 * take the lock which we want to cover the loads stuff
1281 * too. 1282 * too.
1282 */ 1283 */
1283 1284
1284 getnstimeofday(&tp); 1285 getnstimeofday(&tp);
1285 tp.tv_sec += wall_to_monotonic.tv_sec; 1286 tp.tv_sec += wall_to_monotonic.tv_sec;
1286 tp.tv_nsec += wall_to_monotonic.tv_nsec; 1287 tp.tv_nsec += wall_to_monotonic.tv_nsec;
1287 monotonic_to_bootbased(&tp); 1288 monotonic_to_bootbased(&tp);
1288 if (tp.tv_nsec - NSEC_PER_SEC >= 0) { 1289 if (tp.tv_nsec - NSEC_PER_SEC >= 0) {
1289 tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC; 1290 tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC;
1290 tp.tv_sec++; 1291 tp.tv_sec++;
1291 } 1292 }
1292 info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); 1293 info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
1293 1294
1294 info->loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); 1295 info->loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
1295 info->loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT); 1296 info->loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
1296 info->loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT); 1297 info->loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
1297 1298
1298 info->procs = nr_threads; 1299 info->procs = nr_threads;
1299 } while (read_seqretry(&xtime_lock, seq)); 1300 } while (read_seqretry(&xtime_lock, seq));
1300 1301
1301 si_meminfo(info); 1302 si_meminfo(info);
1302 si_swapinfo(info); 1303 si_swapinfo(info);
1303 1304
1304 /* 1305 /*
1305 * If the sum of all the available memory (i.e. ram + swap) 1306 * If the sum of all the available memory (i.e. ram + swap)
1306 * is less than can be stored in a 32 bit unsigned long then 1307 * is less than can be stored in a 32 bit unsigned long then
1307 * we can be binary compatible with 2.2.x kernels. If not, 1308 * we can be binary compatible with 2.2.x kernels. If not,
1308 * well, in that case 2.2.x was broken anyways... 1309 * well, in that case 2.2.x was broken anyways...
1309 * 1310 *
1310 * -Erik Andersen <andersee@debian.org> 1311 * -Erik Andersen <andersee@debian.org>
1311 */ 1312 */
1312 1313
1313 mem_total = info->totalram + info->totalswap; 1314 mem_total = info->totalram + info->totalswap;
1314 if (mem_total < info->totalram || mem_total < info->totalswap) 1315 if (mem_total < info->totalram || mem_total < info->totalswap)
1315 goto out; 1316 goto out;
1316 bitcount = 0; 1317 bitcount = 0;
1317 mem_unit = info->mem_unit; 1318 mem_unit = info->mem_unit;
1318 while (mem_unit > 1) { 1319 while (mem_unit > 1) {
1319 bitcount++; 1320 bitcount++;
1320 mem_unit >>= 1; 1321 mem_unit >>= 1;
1321 sav_total = mem_total; 1322 sav_total = mem_total;
1322 mem_total <<= 1; 1323 mem_total <<= 1;
1323 if (mem_total < sav_total) 1324 if (mem_total < sav_total)
1324 goto out; 1325 goto out;
1325 } 1326 }
1326 1327
1327 /* 1328 /*
1328 * If mem_total did not overflow, multiply all memory values by 1329 * If mem_total did not overflow, multiply all memory values by
1329 * info->mem_unit and set it to 1. This leaves things compatible 1330 * info->mem_unit and set it to 1. This leaves things compatible
1330 * with 2.2.x, and also retains compatibility with earlier 2.4.x 1331 * with 2.2.x, and also retains compatibility with earlier 2.4.x
1331 * kernels... 1332 * kernels...
1332 */ 1333 */
1333 1334
1334 info->mem_unit = 1; 1335 info->mem_unit = 1;
1335 info->totalram <<= bitcount; 1336 info->totalram <<= bitcount;
1336 info->freeram <<= bitcount; 1337 info->freeram <<= bitcount;
1337 info->sharedram <<= bitcount; 1338 info->sharedram <<= bitcount;
1338 info->bufferram <<= bitcount; 1339 info->bufferram <<= bitcount;
1339 info->totalswap <<= bitcount; 1340 info->totalswap <<= bitcount;
1340 info->freeswap <<= bitcount; 1341 info->freeswap <<= bitcount;
1341 info->totalhigh <<= bitcount; 1342 info->totalhigh <<= bitcount;
1342 info->freehigh <<= bitcount; 1343 info->freehigh <<= bitcount;
1343 1344
1344 out: 1345 out:
1345 return 0; 1346 return 0;
1346 } 1347 }
1347 1348
1348 asmlinkage long sys_sysinfo(struct sysinfo __user *info) 1349 asmlinkage long sys_sysinfo(struct sysinfo __user *info)
1349 { 1350 {
1350 struct sysinfo val; 1351 struct sysinfo val;
1351 1352
1352 do_sysinfo(&val); 1353 do_sysinfo(&val);
1353 1354
1354 if (copy_to_user(info, &val, sizeof(struct sysinfo))) 1355 if (copy_to_user(info, &val, sizeof(struct sysinfo)))
1355 return -EFAULT; 1356 return -EFAULT;
1356 1357
1357 return 0; 1358 return 0;
1358 } 1359 }
1359 1360
1360 static int __cpuinit init_timers_cpu(int cpu) 1361 static int __cpuinit init_timers_cpu(int cpu)
1361 { 1362 {
1362 int j; 1363 int j;
1363 struct tvec_base *base; 1364 struct tvec_base *base;
1364 static char __cpuinitdata tvec_base_done[NR_CPUS]; 1365 static char __cpuinitdata tvec_base_done[NR_CPUS];
1365 1366
1366 if (!tvec_base_done[cpu]) { 1367 if (!tvec_base_done[cpu]) {
1367 static char boot_done; 1368 static char boot_done;
1368 1369
1369 if (boot_done) { 1370 if (boot_done) {
1370 /* 1371 /*
1371 * The APs use this path later in boot 1372 * The APs use this path later in boot
1372 */ 1373 */
1373 base = kmalloc_node(sizeof(*base), 1374 base = kmalloc_node(sizeof(*base),
1374 GFP_KERNEL | __GFP_ZERO, 1375 GFP_KERNEL | __GFP_ZERO,
1375 cpu_to_node(cpu)); 1376 cpu_to_node(cpu));
1376 if (!base) 1377 if (!base)
1377 return -ENOMEM; 1378 return -ENOMEM;
1378 1379
1379 /* Make sure that tvec_base is 2 byte aligned */ 1380 /* Make sure that tvec_base is 2 byte aligned */
1380 if (tbase_get_deferrable(base)) { 1381 if (tbase_get_deferrable(base)) {
1381 WARN_ON(1); 1382 WARN_ON(1);
1382 kfree(base); 1383 kfree(base);
1383 return -ENOMEM; 1384 return -ENOMEM;
1384 } 1385 }
1385 per_cpu(tvec_bases, cpu) = base; 1386 per_cpu(tvec_bases, cpu) = base;
1386 } else { 1387 } else {
1387 /* 1388 /*
1388 * This is for the boot CPU - we use compile-time 1389 * This is for the boot CPU - we use compile-time
1389 * static initialisation because per-cpu memory isn't 1390 * static initialisation because per-cpu memory isn't
1390 * ready yet and because the memory allocators are not 1391 * ready yet and because the memory allocators are not
1391 * initialised either. 1392 * initialised either.
1392 */ 1393 */
1393 boot_done = 1; 1394 boot_done = 1;
1394 base = &boot_tvec_bases; 1395 base = &boot_tvec_bases;
1395 } 1396 }
1396 tvec_base_done[cpu] = 1; 1397 tvec_base_done[cpu] = 1;
1397 } else { 1398 } else {
1398 base = per_cpu(tvec_bases, cpu); 1399 base = per_cpu(tvec_bases, cpu);
1399 } 1400 }
1400 1401
1401 spin_lock_init(&base->lock); 1402 spin_lock_init(&base->lock);
1402 1403
1403 for (j = 0; j < TVN_SIZE; j++) { 1404 for (j = 0; j < TVN_SIZE; j++) {
1404 INIT_LIST_HEAD(base->tv5.vec + j); 1405 INIT_LIST_HEAD(base->tv5.vec + j);
1405 INIT_LIST_HEAD(base->tv4.vec + j); 1406 INIT_LIST_HEAD(base->tv4.vec + j);
1406 INIT_LIST_HEAD(base->tv3.vec + j); 1407 INIT_LIST_HEAD(base->tv3.vec + j);
1407 INIT_LIST_HEAD(base->tv2.vec + j); 1408 INIT_LIST_HEAD(base->tv2.vec + j);
1408 } 1409 }
1409 for (j = 0; j < TVR_SIZE; j++) 1410 for (j = 0; j < TVR_SIZE; j++)
1410 INIT_LIST_HEAD(base->tv1.vec + j); 1411 INIT_LIST_HEAD(base->tv1.vec + j);
1411 1412
1412 base->timer_jiffies = jiffies; 1413 base->timer_jiffies = jiffies;
1413 return 0; 1414 return 0;
1414 } 1415 }
1415 1416
1416 #ifdef CONFIG_HOTPLUG_CPU 1417 #ifdef CONFIG_HOTPLUG_CPU
1417 static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head) 1418 static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head)
1418 { 1419 {
1419 struct timer_list *timer; 1420 struct timer_list *timer;
1420 1421
1421 while (!list_empty(head)) { 1422 while (!list_empty(head)) {
1422 timer = list_first_entry(head, struct timer_list, entry); 1423 timer = list_first_entry(head, struct timer_list, entry);
1423 detach_timer(timer, 0); 1424 detach_timer(timer, 0);
1424 timer_set_base(timer, new_base); 1425 timer_set_base(timer, new_base);
1425 internal_add_timer(new_base, timer); 1426 internal_add_timer(new_base, timer);
1426 } 1427 }
1427 } 1428 }
1428 1429
1429 static void __cpuinit migrate_timers(int cpu) 1430 static void __cpuinit migrate_timers(int cpu)
1430 { 1431 {
1431 struct tvec_base *old_base; 1432 struct tvec_base *old_base;
1432 struct tvec_base *new_base; 1433 struct tvec_base *new_base;
1433 int i; 1434 int i;
1434 1435
1435 BUG_ON(cpu_online(cpu)); 1436 BUG_ON(cpu_online(cpu));
1436 old_base = per_cpu(tvec_bases, cpu); 1437 old_base = per_cpu(tvec_bases, cpu);
1437 new_base = get_cpu_var(tvec_bases); 1438 new_base = get_cpu_var(tvec_bases);
1438 1439
1439 local_irq_disable(); 1440 local_irq_disable();
1440 spin_lock(&new_base->lock); 1441 spin_lock(&new_base->lock);
1441 spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); 1442 spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
1442 1443
1443 BUG_ON(old_base->running_timer); 1444 BUG_ON(old_base->running_timer);
1444 1445
1445 for (i = 0; i < TVR_SIZE; i++) 1446 for (i = 0; i < TVR_SIZE; i++)
1446 migrate_timer_list(new_base, old_base->tv1.vec + i); 1447 migrate_timer_list(new_base, old_base->tv1.vec + i);
1447 for (i = 0; i < TVN_SIZE; i++) { 1448 for (i = 0; i < TVN_SIZE; i++) {
1448 migrate_timer_list(new_base, old_base->tv2.vec + i); 1449 migrate_timer_list(new_base, old_base->tv2.vec + i);
1449 migrate_timer_list(new_base, old_base->tv3.vec + i); 1450 migrate_timer_list(new_base, old_base->tv3.vec + i);
1450 migrate_timer_list(new_base, old_base->tv4.vec + i); 1451 migrate_timer_list(new_base, old_base->tv4.vec + i);
1451 migrate_timer_list(new_base, old_base->tv5.vec + i); 1452 migrate_timer_list(new_base, old_base->tv5.vec + i);
1452 } 1453 }
1453 1454
1454 spin_unlock(&old_base->lock); 1455 spin_unlock(&old_base->lock);
1455 spin_unlock(&new_base->lock); 1456 spin_unlock(&new_base->lock);
1456 local_irq_enable(); 1457 local_irq_enable();
1457 put_cpu_var(tvec_bases); 1458 put_cpu_var(tvec_bases);
1458 } 1459 }
1459 #endif /* CONFIG_HOTPLUG_CPU */ 1460 #endif /* CONFIG_HOTPLUG_CPU */
1460 1461
1461 static int __cpuinit timer_cpu_notify(struct notifier_block *self, 1462 static int __cpuinit timer_cpu_notify(struct notifier_block *self,
1462 unsigned long action, void *hcpu) 1463 unsigned long action, void *hcpu)
1463 { 1464 {
1464 long cpu = (long)hcpu; 1465 long cpu = (long)hcpu;
1465 switch(action) { 1466 switch(action) {
1466 case CPU_UP_PREPARE: 1467 case CPU_UP_PREPARE:
1467 case CPU_UP_PREPARE_FROZEN: 1468 case CPU_UP_PREPARE_FROZEN:
1468 if (init_timers_cpu(cpu) < 0) 1469 if (init_timers_cpu(cpu) < 0)
1469 return NOTIFY_BAD; 1470 return NOTIFY_BAD;
1470 break; 1471 break;
1471 #ifdef CONFIG_HOTPLUG_CPU 1472 #ifdef CONFIG_HOTPLUG_CPU
1472 case CPU_DEAD: 1473 case CPU_DEAD:
1473 case CPU_DEAD_FROZEN: 1474 case CPU_DEAD_FROZEN:
1474 migrate_timers(cpu); 1475 migrate_timers(cpu);
1475 break; 1476 break;
1476 #endif 1477 #endif
1477 default: 1478 default:
1478 break; 1479 break;
1479 } 1480 }
1480 return NOTIFY_OK; 1481 return NOTIFY_OK;
1481 } 1482 }
1482 1483
1483 static struct notifier_block __cpuinitdata timers_nb = { 1484 static struct notifier_block __cpuinitdata timers_nb = {
1484 .notifier_call = timer_cpu_notify, 1485 .notifier_call = timer_cpu_notify,
1485 }; 1486 };
1486 1487
1487 1488
1488 void __init init_timers(void) 1489 void __init init_timers(void)
1489 { 1490 {
1490 int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE, 1491 int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE,
1491 (void *)(long)smp_processor_id()); 1492 (void *)(long)smp_processor_id());
1492 1493
1493 init_timer_stats(); 1494 init_timer_stats();
1494 1495
1495 BUG_ON(err == NOTIFY_BAD); 1496 BUG_ON(err == NOTIFY_BAD);
1496 register_cpu_notifier(&timers_nb); 1497 register_cpu_notifier(&timers_nb);
1497 open_softirq(TIMER_SOFTIRQ, run_timer_softirq); 1498 open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
1498 } 1499 }
1499 1500
1500 /** 1501 /**
1501 * msleep - sleep safely even with waitqueue interruptions 1502 * msleep - sleep safely even with waitqueue interruptions
1502 * @msecs: Time in milliseconds to sleep for 1503 * @msecs: Time in milliseconds to sleep for
1503 */ 1504 */
1504 void msleep(unsigned int msecs) 1505 void msleep(unsigned int msecs)
1505 { 1506 {
1506 unsigned long timeout = msecs_to_jiffies(msecs) + 1; 1507 unsigned long timeout = msecs_to_jiffies(msecs) + 1;
1507 1508
1508 while (timeout) 1509 while (timeout)
1509 timeout = schedule_timeout_uninterruptible(timeout); 1510 timeout = schedule_timeout_uninterruptible(timeout);
1510 } 1511 }
1511 1512
1512 EXPORT_SYMBOL(msleep); 1513 EXPORT_SYMBOL(msleep);
1513 1514
1514 /** 1515 /**
1515 * msleep_interruptible - sleep waiting for signals 1516 * msleep_interruptible - sleep waiting for signals
1516 * @msecs: Time in milliseconds to sleep for 1517 * @msecs: Time in milliseconds to sleep for
1517 */ 1518 */
1518 unsigned long msleep_interruptible(unsigned int msecs) 1519 unsigned long msleep_interruptible(unsigned int msecs)
1519 { 1520 {
1520 unsigned long timeout = msecs_to_jiffies(msecs) + 1; 1521 unsigned long timeout = msecs_to_jiffies(msecs) + 1;
1521 1522
1522 while (timeout && !signal_pending(current)) 1523 while (timeout && !signal_pending(current))
1523 timeout = schedule_timeout_interruptible(timeout); 1524 timeout = schedule_timeout_interruptible(timeout);
1524 return jiffies_to_msecs(timeout); 1525 return jiffies_to_msecs(timeout);
1525 } 1526 }
1526 1527
1527 EXPORT_SYMBOL(msleep_interruptible); 1528 EXPORT_SYMBOL(msleep_interruptible);
1528 1529