Commit b845b517b5e3706a3729f6ea83b88ab85f0725b0
Committed by
Ingo Molnar
1 parent
796aadeb1b
Exists in
master
and in
39 other branches
printk: robustify printk
Avoid deadlocks against rq->lock and xtime_lock by deferring the klogd wakeup by polling from the timer tick. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Showing 4 changed files with 23 additions and 3 deletions Inline Diff
include/linux/kernel.h
1 | #ifndef _LINUX_KERNEL_H | 1 | #ifndef _LINUX_KERNEL_H |
2 | #define _LINUX_KERNEL_H | 2 | #define _LINUX_KERNEL_H |
3 | 3 | ||
4 | /* | 4 | /* |
5 | * 'kernel.h' contains some often-used function prototypes etc | 5 | * 'kernel.h' contains some often-used function prototypes etc |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #ifdef __KERNEL__ | 8 | #ifdef __KERNEL__ |
9 | 9 | ||
10 | #include <stdarg.h> | 10 | #include <stdarg.h> |
11 | #include <linux/linkage.h> | 11 | #include <linux/linkage.h> |
12 | #include <linux/stddef.h> | 12 | #include <linux/stddef.h> |
13 | #include <linux/types.h> | 13 | #include <linux/types.h> |
14 | #include <linux/compiler.h> | 14 | #include <linux/compiler.h> |
15 | #include <linux/bitops.h> | 15 | #include <linux/bitops.h> |
16 | #include <linux/log2.h> | 16 | #include <linux/log2.h> |
17 | #include <linux/typecheck.h> | 17 | #include <linux/typecheck.h> |
18 | #include <linux/ratelimit.h> | 18 | #include <linux/ratelimit.h> |
19 | #include <asm/byteorder.h> | 19 | #include <asm/byteorder.h> |
20 | #include <asm/bug.h> | 20 | #include <asm/bug.h> |
21 | 21 | ||
22 | extern const char linux_banner[]; | 22 | extern const char linux_banner[]; |
23 | extern const char linux_proc_banner[]; | 23 | extern const char linux_proc_banner[]; |
24 | 24 | ||
25 | #define USHORT_MAX ((u16)(~0U)) | 25 | #define USHORT_MAX ((u16)(~0U)) |
26 | #define SHORT_MAX ((s16)(USHORT_MAX>>1)) | 26 | #define SHORT_MAX ((s16)(USHORT_MAX>>1)) |
27 | #define SHORT_MIN (-SHORT_MAX - 1) | 27 | #define SHORT_MIN (-SHORT_MAX - 1) |
28 | #define INT_MAX ((int)(~0U>>1)) | 28 | #define INT_MAX ((int)(~0U>>1)) |
29 | #define INT_MIN (-INT_MAX - 1) | 29 | #define INT_MIN (-INT_MAX - 1) |
30 | #define UINT_MAX (~0U) | 30 | #define UINT_MAX (~0U) |
31 | #define LONG_MAX ((long)(~0UL>>1)) | 31 | #define LONG_MAX ((long)(~0UL>>1)) |
32 | #define LONG_MIN (-LONG_MAX - 1) | 32 | #define LONG_MIN (-LONG_MAX - 1) |
33 | #define ULONG_MAX (~0UL) | 33 | #define ULONG_MAX (~0UL) |
34 | #define LLONG_MAX ((long long)(~0ULL>>1)) | 34 | #define LLONG_MAX ((long long)(~0ULL>>1)) |
35 | #define LLONG_MIN (-LLONG_MAX - 1) | 35 | #define LLONG_MIN (-LLONG_MAX - 1) |
36 | #define ULLONG_MAX (~0ULL) | 36 | #define ULLONG_MAX (~0ULL) |
37 | 37 | ||
38 | #define STACK_MAGIC 0xdeadbeef | 38 | #define STACK_MAGIC 0xdeadbeef |
39 | 39 | ||
40 | #define ALIGN(x,a) __ALIGN_MASK(x,(typeof(x))(a)-1) | 40 | #define ALIGN(x,a) __ALIGN_MASK(x,(typeof(x))(a)-1) |
41 | #define __ALIGN_MASK(x,mask) (((x)+(mask))&~(mask)) | 41 | #define __ALIGN_MASK(x,mask) (((x)+(mask))&~(mask)) |
42 | #define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) | 42 | #define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) |
43 | #define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) | 43 | #define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) |
44 | 44 | ||
45 | #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) | 45 | #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) |
46 | 46 | ||
47 | #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) | 47 | #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) |
48 | #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) | 48 | #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) |
49 | #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) | 49 | #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) |
50 | 50 | ||
51 | #define _RET_IP_ (unsigned long)__builtin_return_address(0) | 51 | #define _RET_IP_ (unsigned long)__builtin_return_address(0) |
52 | #define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) | 52 | #define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) |
53 | 53 | ||
54 | #ifdef CONFIG_LBD | 54 | #ifdef CONFIG_LBD |
55 | # include <asm/div64.h> | 55 | # include <asm/div64.h> |
56 | # define sector_div(a, b) do_div(a, b) | 56 | # define sector_div(a, b) do_div(a, b) |
57 | #else | 57 | #else |
58 | # define sector_div(n, b)( \ | 58 | # define sector_div(n, b)( \ |
59 | { \ | 59 | { \ |
60 | int _res; \ | 60 | int _res; \ |
61 | _res = (n) % (b); \ | 61 | _res = (n) % (b); \ |
62 | (n) /= (b); \ | 62 | (n) /= (b); \ |
63 | _res; \ | 63 | _res; \ |
64 | } \ | 64 | } \ |
65 | ) | 65 | ) |
66 | #endif | 66 | #endif |
67 | 67 | ||
68 | /** | 68 | /** |
69 | * upper_32_bits - return bits 32-63 of a number | 69 | * upper_32_bits - return bits 32-63 of a number |
70 | * @n: the number we're accessing | 70 | * @n: the number we're accessing |
71 | * | 71 | * |
72 | * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress | 72 | * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress |
73 | * the "right shift count >= width of type" warning when that quantity is | 73 | * the "right shift count >= width of type" warning when that quantity is |
74 | * 32-bits. | 74 | * 32-bits. |
75 | */ | 75 | */ |
76 | #define upper_32_bits(n) ((u32)(((n) >> 16) >> 16)) | 76 | #define upper_32_bits(n) ((u32)(((n) >> 16) >> 16)) |
77 | 77 | ||
78 | /** | 78 | /** |
79 | * lower_32_bits - return bits 0-31 of a number | 79 | * lower_32_bits - return bits 0-31 of a number |
80 | * @n: the number we're accessing | 80 | * @n: the number we're accessing |
81 | */ | 81 | */ |
82 | #define lower_32_bits(n) ((u32)(n)) | 82 | #define lower_32_bits(n) ((u32)(n)) |
83 | 83 | ||
84 | #define KERN_EMERG "<0>" /* system is unusable */ | 84 | #define KERN_EMERG "<0>" /* system is unusable */ |
85 | #define KERN_ALERT "<1>" /* action must be taken immediately */ | 85 | #define KERN_ALERT "<1>" /* action must be taken immediately */ |
86 | #define KERN_CRIT "<2>" /* critical conditions */ | 86 | #define KERN_CRIT "<2>" /* critical conditions */ |
87 | #define KERN_ERR "<3>" /* error conditions */ | 87 | #define KERN_ERR "<3>" /* error conditions */ |
88 | #define KERN_WARNING "<4>" /* warning conditions */ | 88 | #define KERN_WARNING "<4>" /* warning conditions */ |
89 | #define KERN_NOTICE "<5>" /* normal but significant condition */ | 89 | #define KERN_NOTICE "<5>" /* normal but significant condition */ |
90 | #define KERN_INFO "<6>" /* informational */ | 90 | #define KERN_INFO "<6>" /* informational */ |
91 | #define KERN_DEBUG "<7>" /* debug-level messages */ | 91 | #define KERN_DEBUG "<7>" /* debug-level messages */ |
92 | 92 | ||
93 | /* | 93 | /* |
94 | * Annotation for a "continued" line of log printout (only done after a | 94 | * Annotation for a "continued" line of log printout (only done after a |
95 | * line that had no enclosing \n). Only to be used by core/arch code | 95 | * line that had no enclosing \n). Only to be used by core/arch code |
96 | * during early bootup (a continued line is not SMP-safe otherwise). | 96 | * during early bootup (a continued line is not SMP-safe otherwise). |
97 | */ | 97 | */ |
98 | #define KERN_CONT "" | 98 | #define KERN_CONT "" |
99 | 99 | ||
100 | extern int console_printk[]; | 100 | extern int console_printk[]; |
101 | 101 | ||
102 | #define console_loglevel (console_printk[0]) | 102 | #define console_loglevel (console_printk[0]) |
103 | #define default_message_loglevel (console_printk[1]) | 103 | #define default_message_loglevel (console_printk[1]) |
104 | #define minimum_console_loglevel (console_printk[2]) | 104 | #define minimum_console_loglevel (console_printk[2]) |
105 | #define default_console_loglevel (console_printk[3]) | 105 | #define default_console_loglevel (console_printk[3]) |
106 | 106 | ||
107 | struct completion; | 107 | struct completion; |
108 | struct pt_regs; | 108 | struct pt_regs; |
109 | struct user; | 109 | struct user; |
110 | 110 | ||
111 | /** | 111 | /** |
112 | * might_sleep - annotation for functions that can sleep | 112 | * might_sleep - annotation for functions that can sleep |
113 | * | 113 | * |
114 | * this macro will print a stack trace if it is executed in an atomic | 114 | * this macro will print a stack trace if it is executed in an atomic |
115 | * context (spinlock, irq-handler, ...). | 115 | * context (spinlock, irq-handler, ...). |
116 | * | 116 | * |
117 | * This is a useful debugging help to be able to catch problems early and not | 117 | * This is a useful debugging help to be able to catch problems early and not |
118 | * be bitten later when the calling function happens to sleep when it is not | 118 | * be bitten later when the calling function happens to sleep when it is not |
119 | * supposed to. | 119 | * supposed to. |
120 | */ | 120 | */ |
121 | #ifdef CONFIG_PREEMPT_VOLUNTARY | 121 | #ifdef CONFIG_PREEMPT_VOLUNTARY |
122 | extern int _cond_resched(void); | 122 | extern int _cond_resched(void); |
123 | # define might_resched() _cond_resched() | 123 | # define might_resched() _cond_resched() |
124 | #else | 124 | #else |
125 | # define might_resched() do { } while (0) | 125 | # define might_resched() do { } while (0) |
126 | #endif | 126 | #endif |
127 | 127 | ||
128 | #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP | 128 | #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP |
129 | void __might_sleep(char *file, int line); | 129 | void __might_sleep(char *file, int line); |
130 | # define might_sleep() \ | 130 | # define might_sleep() \ |
131 | do { __might_sleep(__FILE__, __LINE__); might_resched(); } while (0) | 131 | do { __might_sleep(__FILE__, __LINE__); might_resched(); } while (0) |
132 | #else | 132 | #else |
133 | # define might_sleep() do { might_resched(); } while (0) | 133 | # define might_sleep() do { might_resched(); } while (0) |
134 | #endif | 134 | #endif |
135 | 135 | ||
136 | #define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0) | 136 | #define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0) |
137 | 137 | ||
138 | #define abs(x) ({ \ | 138 | #define abs(x) ({ \ |
139 | int __x = (x); \ | 139 | int __x = (x); \ |
140 | (__x < 0) ? -__x : __x; \ | 140 | (__x < 0) ? -__x : __x; \ |
141 | }) | 141 | }) |
142 | 142 | ||
143 | extern struct atomic_notifier_head panic_notifier_list; | 143 | extern struct atomic_notifier_head panic_notifier_list; |
144 | extern long (*panic_blink)(long time); | 144 | extern long (*panic_blink)(long time); |
145 | NORET_TYPE void panic(const char * fmt, ...) | 145 | NORET_TYPE void panic(const char * fmt, ...) |
146 | __attribute__ ((NORET_AND format (printf, 1, 2))) __cold; | 146 | __attribute__ ((NORET_AND format (printf, 1, 2))) __cold; |
147 | extern void oops_enter(void); | 147 | extern void oops_enter(void); |
148 | extern void oops_exit(void); | 148 | extern void oops_exit(void); |
149 | extern int oops_may_print(void); | 149 | extern int oops_may_print(void); |
150 | NORET_TYPE void do_exit(long error_code) | 150 | NORET_TYPE void do_exit(long error_code) |
151 | ATTRIB_NORET; | 151 | ATTRIB_NORET; |
152 | NORET_TYPE void complete_and_exit(struct completion *, long) | 152 | NORET_TYPE void complete_and_exit(struct completion *, long) |
153 | ATTRIB_NORET; | 153 | ATTRIB_NORET; |
154 | extern unsigned long simple_strtoul(const char *,char **,unsigned int); | 154 | extern unsigned long simple_strtoul(const char *,char **,unsigned int); |
155 | extern long simple_strtol(const char *,char **,unsigned int); | 155 | extern long simple_strtol(const char *,char **,unsigned int); |
156 | extern unsigned long long simple_strtoull(const char *,char **,unsigned int); | 156 | extern unsigned long long simple_strtoull(const char *,char **,unsigned int); |
157 | extern long long simple_strtoll(const char *,char **,unsigned int); | 157 | extern long long simple_strtoll(const char *,char **,unsigned int); |
158 | extern int strict_strtoul(const char *, unsigned int, unsigned long *); | 158 | extern int strict_strtoul(const char *, unsigned int, unsigned long *); |
159 | extern int strict_strtol(const char *, unsigned int, long *); | 159 | extern int strict_strtol(const char *, unsigned int, long *); |
160 | extern int strict_strtoull(const char *, unsigned int, unsigned long long *); | 160 | extern int strict_strtoull(const char *, unsigned int, unsigned long long *); |
161 | extern int strict_strtoll(const char *, unsigned int, long long *); | 161 | extern int strict_strtoll(const char *, unsigned int, long long *); |
162 | extern int sprintf(char * buf, const char * fmt, ...) | 162 | extern int sprintf(char * buf, const char * fmt, ...) |
163 | __attribute__ ((format (printf, 2, 3))); | 163 | __attribute__ ((format (printf, 2, 3))); |
164 | extern int vsprintf(char *buf, const char *, va_list) | 164 | extern int vsprintf(char *buf, const char *, va_list) |
165 | __attribute__ ((format (printf, 2, 0))); | 165 | __attribute__ ((format (printf, 2, 0))); |
166 | extern int snprintf(char * buf, size_t size, const char * fmt, ...) | 166 | extern int snprintf(char * buf, size_t size, const char * fmt, ...) |
167 | __attribute__ ((format (printf, 3, 4))); | 167 | __attribute__ ((format (printf, 3, 4))); |
168 | extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) | 168 | extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) |
169 | __attribute__ ((format (printf, 3, 0))); | 169 | __attribute__ ((format (printf, 3, 0))); |
170 | extern int scnprintf(char * buf, size_t size, const char * fmt, ...) | 170 | extern int scnprintf(char * buf, size_t size, const char * fmt, ...) |
171 | __attribute__ ((format (printf, 3, 4))); | 171 | __attribute__ ((format (printf, 3, 4))); |
172 | extern int vscnprintf(char *buf, size_t size, const char *fmt, va_list args) | 172 | extern int vscnprintf(char *buf, size_t size, const char *fmt, va_list args) |
173 | __attribute__ ((format (printf, 3, 0))); | 173 | __attribute__ ((format (printf, 3, 0))); |
174 | extern char *kasprintf(gfp_t gfp, const char *fmt, ...) | 174 | extern char *kasprintf(gfp_t gfp, const char *fmt, ...) |
175 | __attribute__ ((format (printf, 2, 3))); | 175 | __attribute__ ((format (printf, 2, 3))); |
176 | extern char *kvasprintf(gfp_t gfp, const char *fmt, va_list args); | 176 | extern char *kvasprintf(gfp_t gfp, const char *fmt, va_list args); |
177 | 177 | ||
178 | extern int sscanf(const char *, const char *, ...) | 178 | extern int sscanf(const char *, const char *, ...) |
179 | __attribute__ ((format (scanf, 2, 3))); | 179 | __attribute__ ((format (scanf, 2, 3))); |
180 | extern int vsscanf(const char *, const char *, va_list) | 180 | extern int vsscanf(const char *, const char *, va_list) |
181 | __attribute__ ((format (scanf, 2, 0))); | 181 | __attribute__ ((format (scanf, 2, 0))); |
182 | 182 | ||
183 | extern int get_option(char **str, int *pint); | 183 | extern int get_option(char **str, int *pint); |
184 | extern char *get_options(const char *str, int nints, int *ints); | 184 | extern char *get_options(const char *str, int nints, int *ints); |
185 | extern unsigned long long memparse(char *ptr, char **retptr); | 185 | extern unsigned long long memparse(char *ptr, char **retptr); |
186 | 186 | ||
187 | extern int core_kernel_text(unsigned long addr); | 187 | extern int core_kernel_text(unsigned long addr); |
188 | extern int __kernel_text_address(unsigned long addr); | 188 | extern int __kernel_text_address(unsigned long addr); |
189 | extern int kernel_text_address(unsigned long addr); | 189 | extern int kernel_text_address(unsigned long addr); |
190 | struct pid; | 190 | struct pid; |
191 | extern struct pid *session_of_pgrp(struct pid *pgrp); | 191 | extern struct pid *session_of_pgrp(struct pid *pgrp); |
192 | 192 | ||
193 | #ifdef CONFIG_PRINTK | 193 | #ifdef CONFIG_PRINTK |
194 | asmlinkage int vprintk(const char *fmt, va_list args) | 194 | asmlinkage int vprintk(const char *fmt, va_list args) |
195 | __attribute__ ((format (printf, 1, 0))); | 195 | __attribute__ ((format (printf, 1, 0))); |
196 | asmlinkage int printk(const char * fmt, ...) | 196 | asmlinkage int printk(const char * fmt, ...) |
197 | __attribute__ ((format (printf, 1, 2))) __cold; | 197 | __attribute__ ((format (printf, 1, 2))) __cold; |
198 | 198 | ||
199 | extern struct ratelimit_state printk_ratelimit_state; | 199 | extern struct ratelimit_state printk_ratelimit_state; |
200 | extern int printk_ratelimit(void); | 200 | extern int printk_ratelimit(void); |
201 | extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, | 201 | extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, |
202 | unsigned int interval_msec); | 202 | unsigned int interval_msec); |
203 | extern void printk_tick(void); | ||
204 | extern int printk_needs_cpu(int); | ||
203 | #else | 205 | #else |
204 | static inline int vprintk(const char *s, va_list args) | 206 | static inline int vprintk(const char *s, va_list args) |
205 | __attribute__ ((format (printf, 1, 0))); | 207 | __attribute__ ((format (printf, 1, 0))); |
206 | static inline int vprintk(const char *s, va_list args) { return 0; } | 208 | static inline int vprintk(const char *s, va_list args) { return 0; } |
207 | static inline int printk(const char *s, ...) | 209 | static inline int printk(const char *s, ...) |
208 | __attribute__ ((format (printf, 1, 2))); | 210 | __attribute__ ((format (printf, 1, 2))); |
209 | static inline int __cold printk(const char *s, ...) { return 0; } | 211 | static inline int __cold printk(const char *s, ...) { return 0; } |
210 | static inline int printk_ratelimit(void) { return 0; } | 212 | static inline int printk_ratelimit(void) { return 0; } |
211 | static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \ | 213 | static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \ |
212 | unsigned int interval_msec) \ | 214 | unsigned int interval_msec) \ |
213 | { return false; } | 215 | { return false; } |
216 | static inline void printk_tick(void) { } | ||
217 | static inline int printk_needs_cpu(int) { return 0; } | ||
214 | #endif | 218 | #endif |
215 | 219 | ||
216 | extern void asmlinkage __attribute__((format(printf, 1, 2))) | 220 | extern void asmlinkage __attribute__((format(printf, 1, 2))) |
217 | early_printk(const char *fmt, ...); | 221 | early_printk(const char *fmt, ...); |
218 | 222 | ||
219 | unsigned long int_sqrt(unsigned long); | 223 | unsigned long int_sqrt(unsigned long); |
220 | 224 | ||
221 | static inline void console_silent(void) | 225 | static inline void console_silent(void) |
222 | { | 226 | { |
223 | console_loglevel = 0; | 227 | console_loglevel = 0; |
224 | } | 228 | } |
225 | 229 | ||
226 | static inline void console_verbose(void) | 230 | static inline void console_verbose(void) |
227 | { | 231 | { |
228 | if (console_loglevel) | 232 | if (console_loglevel) |
229 | console_loglevel = 15; | 233 | console_loglevel = 15; |
230 | } | 234 | } |
231 | 235 | ||
232 | extern void bust_spinlocks(int yes); | 236 | extern void bust_spinlocks(int yes); |
233 | extern void wake_up_klogd(void); | 237 | extern void wake_up_klogd(void); |
234 | extern int oops_in_progress; /* If set, an oops, panic(), BUG() or die() is in progress */ | 238 | extern int oops_in_progress; /* If set, an oops, panic(), BUG() or die() is in progress */ |
235 | extern int panic_timeout; | 239 | extern int panic_timeout; |
236 | extern int panic_on_oops; | 240 | extern int panic_on_oops; |
237 | extern int panic_on_unrecovered_nmi; | 241 | extern int panic_on_unrecovered_nmi; |
238 | extern int tainted; | 242 | extern int tainted; |
239 | extern const char *print_tainted(void); | 243 | extern const char *print_tainted(void); |
240 | extern void add_taint(unsigned); | 244 | extern void add_taint(unsigned); |
241 | extern int root_mountflags; | 245 | extern int root_mountflags; |
242 | 246 | ||
243 | /* Values used for system_state */ | 247 | /* Values used for system_state */ |
244 | extern enum system_states { | 248 | extern enum system_states { |
245 | SYSTEM_BOOTING, | 249 | SYSTEM_BOOTING, |
246 | SYSTEM_RUNNING, | 250 | SYSTEM_RUNNING, |
247 | SYSTEM_HALT, | 251 | SYSTEM_HALT, |
248 | SYSTEM_POWER_OFF, | 252 | SYSTEM_POWER_OFF, |
249 | SYSTEM_RESTART, | 253 | SYSTEM_RESTART, |
250 | SYSTEM_SUSPEND_DISK, | 254 | SYSTEM_SUSPEND_DISK, |
251 | } system_state; | 255 | } system_state; |
252 | 256 | ||
253 | #define TAINT_PROPRIETARY_MODULE (1<<0) | 257 | #define TAINT_PROPRIETARY_MODULE (1<<0) |
254 | #define TAINT_FORCED_MODULE (1<<1) | 258 | #define TAINT_FORCED_MODULE (1<<1) |
255 | #define TAINT_UNSAFE_SMP (1<<2) | 259 | #define TAINT_UNSAFE_SMP (1<<2) |
256 | #define TAINT_FORCED_RMMOD (1<<3) | 260 | #define TAINT_FORCED_RMMOD (1<<3) |
257 | #define TAINT_MACHINE_CHECK (1<<4) | 261 | #define TAINT_MACHINE_CHECK (1<<4) |
258 | #define TAINT_BAD_PAGE (1<<5) | 262 | #define TAINT_BAD_PAGE (1<<5) |
259 | #define TAINT_USER (1<<6) | 263 | #define TAINT_USER (1<<6) |
260 | #define TAINT_DIE (1<<7) | 264 | #define TAINT_DIE (1<<7) |
261 | #define TAINT_OVERRIDDEN_ACPI_TABLE (1<<8) | 265 | #define TAINT_OVERRIDDEN_ACPI_TABLE (1<<8) |
262 | #define TAINT_WARN (1<<9) | 266 | #define TAINT_WARN (1<<9) |
263 | 267 | ||
264 | extern void dump_stack(void) __cold; | 268 | extern void dump_stack(void) __cold; |
265 | 269 | ||
266 | enum { | 270 | enum { |
267 | DUMP_PREFIX_NONE, | 271 | DUMP_PREFIX_NONE, |
268 | DUMP_PREFIX_ADDRESS, | 272 | DUMP_PREFIX_ADDRESS, |
269 | DUMP_PREFIX_OFFSET | 273 | DUMP_PREFIX_OFFSET |
270 | }; | 274 | }; |
271 | extern void hex_dump_to_buffer(const void *buf, size_t len, | 275 | extern void hex_dump_to_buffer(const void *buf, size_t len, |
272 | int rowsize, int groupsize, | 276 | int rowsize, int groupsize, |
273 | char *linebuf, size_t linebuflen, bool ascii); | 277 | char *linebuf, size_t linebuflen, bool ascii); |
274 | extern void print_hex_dump(const char *level, const char *prefix_str, | 278 | extern void print_hex_dump(const char *level, const char *prefix_str, |
275 | int prefix_type, int rowsize, int groupsize, | 279 | int prefix_type, int rowsize, int groupsize, |
276 | const void *buf, size_t len, bool ascii); | 280 | const void *buf, size_t len, bool ascii); |
277 | extern void print_hex_dump_bytes(const char *prefix_str, int prefix_type, | 281 | extern void print_hex_dump_bytes(const char *prefix_str, int prefix_type, |
278 | const void *buf, size_t len); | 282 | const void *buf, size_t len); |
279 | 283 | ||
280 | extern const char hex_asc[]; | 284 | extern const char hex_asc[]; |
281 | #define hex_asc_lo(x) hex_asc[((x) & 0x0f)] | 285 | #define hex_asc_lo(x) hex_asc[((x) & 0x0f)] |
282 | #define hex_asc_hi(x) hex_asc[((x) & 0xf0) >> 4] | 286 | #define hex_asc_hi(x) hex_asc[((x) & 0xf0) >> 4] |
283 | 287 | ||
284 | static inline char *pack_hex_byte(char *buf, u8 byte) | 288 | static inline char *pack_hex_byte(char *buf, u8 byte) |
285 | { | 289 | { |
286 | *buf++ = hex_asc_hi(byte); | 290 | *buf++ = hex_asc_hi(byte); |
287 | *buf++ = hex_asc_lo(byte); | 291 | *buf++ = hex_asc_lo(byte); |
288 | return buf; | 292 | return buf; |
289 | } | 293 | } |
290 | 294 | ||
291 | #define pr_emerg(fmt, arg...) \ | 295 | #define pr_emerg(fmt, arg...) \ |
292 | printk(KERN_EMERG fmt, ##arg) | 296 | printk(KERN_EMERG fmt, ##arg) |
293 | #define pr_alert(fmt, arg...) \ | 297 | #define pr_alert(fmt, arg...) \ |
294 | printk(KERN_ALERT fmt, ##arg) | 298 | printk(KERN_ALERT fmt, ##arg) |
295 | #define pr_crit(fmt, arg...) \ | 299 | #define pr_crit(fmt, arg...) \ |
296 | printk(KERN_CRIT fmt, ##arg) | 300 | printk(KERN_CRIT fmt, ##arg) |
297 | #define pr_err(fmt, arg...) \ | 301 | #define pr_err(fmt, arg...) \ |
298 | printk(KERN_ERR fmt, ##arg) | 302 | printk(KERN_ERR fmt, ##arg) |
299 | #define pr_warning(fmt, arg...) \ | 303 | #define pr_warning(fmt, arg...) \ |
300 | printk(KERN_WARNING fmt, ##arg) | 304 | printk(KERN_WARNING fmt, ##arg) |
301 | #define pr_notice(fmt, arg...) \ | 305 | #define pr_notice(fmt, arg...) \ |
302 | printk(KERN_NOTICE fmt, ##arg) | 306 | printk(KERN_NOTICE fmt, ##arg) |
303 | #define pr_info(fmt, arg...) \ | 307 | #define pr_info(fmt, arg...) \ |
304 | printk(KERN_INFO fmt, ##arg) | 308 | printk(KERN_INFO fmt, ##arg) |
305 | 309 | ||
306 | #ifdef DEBUG | 310 | #ifdef DEBUG |
307 | /* If you are writing a driver, please use dev_dbg instead */ | 311 | /* If you are writing a driver, please use dev_dbg instead */ |
308 | #define pr_debug(fmt, arg...) \ | 312 | #define pr_debug(fmt, arg...) \ |
309 | printk(KERN_DEBUG fmt, ##arg) | 313 | printk(KERN_DEBUG fmt, ##arg) |
310 | #else | 314 | #else |
311 | #define pr_debug(fmt, arg...) \ | 315 | #define pr_debug(fmt, arg...) \ |
312 | ({ if (0) printk(KERN_DEBUG fmt, ##arg); 0; }) | 316 | ({ if (0) printk(KERN_DEBUG fmt, ##arg); 0; }) |
313 | #endif | 317 | #endif |
314 | 318 | ||
315 | /* | 319 | /* |
316 | * Display an IP address in readable format. | 320 | * Display an IP address in readable format. |
317 | */ | 321 | */ |
318 | 322 | ||
319 | #define NIPQUAD(addr) \ | 323 | #define NIPQUAD(addr) \ |
320 | ((unsigned char *)&addr)[0], \ | 324 | ((unsigned char *)&addr)[0], \ |
321 | ((unsigned char *)&addr)[1], \ | 325 | ((unsigned char *)&addr)[1], \ |
322 | ((unsigned char *)&addr)[2], \ | 326 | ((unsigned char *)&addr)[2], \ |
323 | ((unsigned char *)&addr)[3] | 327 | ((unsigned char *)&addr)[3] |
324 | #define NIPQUAD_FMT "%u.%u.%u.%u" | 328 | #define NIPQUAD_FMT "%u.%u.%u.%u" |
325 | 329 | ||
326 | #define NIP6(addr) \ | 330 | #define NIP6(addr) \ |
327 | ntohs((addr).s6_addr16[0]), \ | 331 | ntohs((addr).s6_addr16[0]), \ |
328 | ntohs((addr).s6_addr16[1]), \ | 332 | ntohs((addr).s6_addr16[1]), \ |
329 | ntohs((addr).s6_addr16[2]), \ | 333 | ntohs((addr).s6_addr16[2]), \ |
330 | ntohs((addr).s6_addr16[3]), \ | 334 | ntohs((addr).s6_addr16[3]), \ |
331 | ntohs((addr).s6_addr16[4]), \ | 335 | ntohs((addr).s6_addr16[4]), \ |
332 | ntohs((addr).s6_addr16[5]), \ | 336 | ntohs((addr).s6_addr16[5]), \ |
333 | ntohs((addr).s6_addr16[6]), \ | 337 | ntohs((addr).s6_addr16[6]), \ |
334 | ntohs((addr).s6_addr16[7]) | 338 | ntohs((addr).s6_addr16[7]) |
335 | #define NIP6_FMT "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x" | 339 | #define NIP6_FMT "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x" |
336 | #define NIP6_SEQFMT "%04x%04x%04x%04x%04x%04x%04x%04x" | 340 | #define NIP6_SEQFMT "%04x%04x%04x%04x%04x%04x%04x%04x" |
337 | 341 | ||
338 | #if defined(__LITTLE_ENDIAN) | 342 | #if defined(__LITTLE_ENDIAN) |
339 | #define HIPQUAD(addr) \ | 343 | #define HIPQUAD(addr) \ |
340 | ((unsigned char *)&addr)[3], \ | 344 | ((unsigned char *)&addr)[3], \ |
341 | ((unsigned char *)&addr)[2], \ | 345 | ((unsigned char *)&addr)[2], \ |
342 | ((unsigned char *)&addr)[1], \ | 346 | ((unsigned char *)&addr)[1], \ |
343 | ((unsigned char *)&addr)[0] | 347 | ((unsigned char *)&addr)[0] |
344 | #elif defined(__BIG_ENDIAN) | 348 | #elif defined(__BIG_ENDIAN) |
345 | #define HIPQUAD NIPQUAD | 349 | #define HIPQUAD NIPQUAD |
346 | #else | 350 | #else |
347 | #error "Please fix asm/byteorder.h" | 351 | #error "Please fix asm/byteorder.h" |
348 | #endif /* __LITTLE_ENDIAN */ | 352 | #endif /* __LITTLE_ENDIAN */ |
349 | 353 | ||
350 | /* | 354 | /* |
351 | * min()/max()/clamp() macros that also do | 355 | * min()/max()/clamp() macros that also do |
352 | * strict type-checking.. See the | 356 | * strict type-checking.. See the |
353 | * "unnecessary" pointer comparison. | 357 | * "unnecessary" pointer comparison. |
354 | */ | 358 | */ |
355 | #define min(x, y) ({ \ | 359 | #define min(x, y) ({ \ |
356 | typeof(x) _min1 = (x); \ | 360 | typeof(x) _min1 = (x); \ |
357 | typeof(y) _min2 = (y); \ | 361 | typeof(y) _min2 = (y); \ |
358 | (void) (&_min1 == &_min2); \ | 362 | (void) (&_min1 == &_min2); \ |
359 | _min1 < _min2 ? _min1 : _min2; }) | 363 | _min1 < _min2 ? _min1 : _min2; }) |
360 | 364 | ||
361 | #define max(x, y) ({ \ | 365 | #define max(x, y) ({ \ |
362 | typeof(x) _max1 = (x); \ | 366 | typeof(x) _max1 = (x); \ |
363 | typeof(y) _max2 = (y); \ | 367 | typeof(y) _max2 = (y); \ |
364 | (void) (&_max1 == &_max2); \ | 368 | (void) (&_max1 == &_max2); \ |
365 | _max1 > _max2 ? _max1 : _max2; }) | 369 | _max1 > _max2 ? _max1 : _max2; }) |
366 | 370 | ||
367 | /** | 371 | /** |
368 | * clamp - return a value clamped to a given range with strict typechecking | 372 | * clamp - return a value clamped to a given range with strict typechecking |
369 | * @val: current value | 373 | * @val: current value |
370 | * @min: minimum allowable value | 374 | * @min: minimum allowable value |
371 | * @max: maximum allowable value | 375 | * @max: maximum allowable value |
372 | * | 376 | * |
373 | * This macro does strict typechecking of min/max to make sure they are of the | 377 | * This macro does strict typechecking of min/max to make sure they are of the |
374 | * same type as val. See the unnecessary pointer comparisons. | 378 | * same type as val. See the unnecessary pointer comparisons. |
375 | */ | 379 | */ |
376 | #define clamp(val, min, max) ({ \ | 380 | #define clamp(val, min, max) ({ \ |
377 | typeof(val) __val = (val); \ | 381 | typeof(val) __val = (val); \ |
378 | typeof(min) __min = (min); \ | 382 | typeof(min) __min = (min); \ |
379 | typeof(max) __max = (max); \ | 383 | typeof(max) __max = (max); \ |
380 | (void) (&__val == &__min); \ | 384 | (void) (&__val == &__min); \ |
381 | (void) (&__val == &__max); \ | 385 | (void) (&__val == &__max); \ |
382 | __val = __val < __min ? __min: __val; \ | 386 | __val = __val < __min ? __min: __val; \ |
383 | __val > __max ? __max: __val; }) | 387 | __val > __max ? __max: __val; }) |
384 | 388 | ||
385 | /* | 389 | /* |
386 | * ..and if you can't take the strict | 390 | * ..and if you can't take the strict |
387 | * types, you can specify one yourself. | 391 | * types, you can specify one yourself. |
388 | * | 392 | * |
389 | * Or not use min/max/clamp at all, of course. | 393 | * Or not use min/max/clamp at all, of course. |
390 | */ | 394 | */ |
391 | #define min_t(type, x, y) ({ \ | 395 | #define min_t(type, x, y) ({ \ |
392 | type __min1 = (x); \ | 396 | type __min1 = (x); \ |
393 | type __min2 = (y); \ | 397 | type __min2 = (y); \ |
394 | __min1 < __min2 ? __min1: __min2; }) | 398 | __min1 < __min2 ? __min1: __min2; }) |
395 | 399 | ||
396 | #define max_t(type, x, y) ({ \ | 400 | #define max_t(type, x, y) ({ \ |
397 | type __max1 = (x); \ | 401 | type __max1 = (x); \ |
398 | type __max2 = (y); \ | 402 | type __max2 = (y); \ |
399 | __max1 > __max2 ? __max1: __max2; }) | 403 | __max1 > __max2 ? __max1: __max2; }) |
400 | 404 | ||
401 | /** | 405 | /** |
402 | * clamp_t - return a value clamped to a given range using a given type | 406 | * clamp_t - return a value clamped to a given range using a given type |
403 | * @type: the type of variable to use | 407 | * @type: the type of variable to use |
404 | * @val: current value | 408 | * @val: current value |
405 | * @min: minimum allowable value | 409 | * @min: minimum allowable value |
406 | * @max: maximum allowable value | 410 | * @max: maximum allowable value |
407 | * | 411 | * |
408 | * This macro does no typechecking and uses temporary variables of type | 412 | * This macro does no typechecking and uses temporary variables of type |
409 | * 'type' to make all the comparisons. | 413 | * 'type' to make all the comparisons. |
410 | */ | 414 | */ |
411 | #define clamp_t(type, val, min, max) ({ \ | 415 | #define clamp_t(type, val, min, max) ({ \ |
412 | type __val = (val); \ | 416 | type __val = (val); \ |
413 | type __min = (min); \ | 417 | type __min = (min); \ |
414 | type __max = (max); \ | 418 | type __max = (max); \ |
415 | __val = __val < __min ? __min: __val; \ | 419 | __val = __val < __min ? __min: __val; \ |
416 | __val > __max ? __max: __val; }) | 420 | __val > __max ? __max: __val; }) |
417 | 421 | ||
418 | /** | 422 | /** |
419 | * clamp_val - return a value clamped to a given range using val's type | 423 | * clamp_val - return a value clamped to a given range using val's type |
420 | * @val: current value | 424 | * @val: current value |
421 | * @min: minimum allowable value | 425 | * @min: minimum allowable value |
422 | * @max: maximum allowable value | 426 | * @max: maximum allowable value |
423 | * | 427 | * |
424 | * This macro does no typechecking and uses temporary variables of whatever | 428 | * This macro does no typechecking and uses temporary variables of whatever |
425 | * type the input argument 'val' is. This is useful when val is an unsigned | 429 | * type the input argument 'val' is. This is useful when val is an unsigned |
426 | * type and min and max are literals that will otherwise be assigned a signed | 430 | * type and min and max are literals that will otherwise be assigned a signed |
427 | * integer type. | 431 | * integer type. |
428 | */ | 432 | */ |
429 | #define clamp_val(val, min, max) ({ \ | 433 | #define clamp_val(val, min, max) ({ \ |
430 | typeof(val) __val = (val); \ | 434 | typeof(val) __val = (val); \ |
431 | typeof(val) __min = (min); \ | 435 | typeof(val) __min = (min); \ |
432 | typeof(val) __max = (max); \ | 436 | typeof(val) __max = (max); \ |
433 | __val = __val < __min ? __min: __val; \ | 437 | __val = __val < __min ? __min: __val; \ |
434 | __val > __max ? __max: __val; }) | 438 | __val > __max ? __max: __val; }) |
435 | 439 | ||
436 | /** | 440 | /** |
437 | * container_of - cast a member of a structure out to the containing structure | 441 | * container_of - cast a member of a structure out to the containing structure |
438 | * @ptr: the pointer to the member. | 442 | * @ptr: the pointer to the member. |
439 | * @type: the type of the container struct this is embedded in. | 443 | * @type: the type of the container struct this is embedded in. |
440 | * @member: the name of the member within the struct. | 444 | * @member: the name of the member within the struct. |
441 | * | 445 | * |
442 | */ | 446 | */ |
443 | #define container_of(ptr, type, member) ({ \ | 447 | #define container_of(ptr, type, member) ({ \ |
444 | const typeof( ((type *)0)->member ) *__mptr = (ptr); \ | 448 | const typeof( ((type *)0)->member ) *__mptr = (ptr); \ |
445 | (type *)( (char *)__mptr - offsetof(type,member) );}) | 449 | (type *)( (char *)__mptr - offsetof(type,member) );}) |
446 | 450 | ||
447 | struct sysinfo; | 451 | struct sysinfo; |
448 | extern int do_sysinfo(struct sysinfo *info); | 452 | extern int do_sysinfo(struct sysinfo *info); |
449 | 453 | ||
450 | #endif /* __KERNEL__ */ | 454 | #endif /* __KERNEL__ */ |
451 | 455 | ||
452 | #define SI_LOAD_SHIFT 16 | 456 | #define SI_LOAD_SHIFT 16 |
453 | struct sysinfo { | 457 | struct sysinfo { |
454 | long uptime; /* Seconds since boot */ | 458 | long uptime; /* Seconds since boot */ |
455 | unsigned long loads[3]; /* 1, 5, and 15 minute load averages */ | 459 | unsigned long loads[3]; /* 1, 5, and 15 minute load averages */ |
456 | unsigned long totalram; /* Total usable main memory size */ | 460 | unsigned long totalram; /* Total usable main memory size */ |
457 | unsigned long freeram; /* Available memory size */ | 461 | unsigned long freeram; /* Available memory size */ |
458 | unsigned long sharedram; /* Amount of shared memory */ | 462 | unsigned long sharedram; /* Amount of shared memory */ |
459 | unsigned long bufferram; /* Memory used by buffers */ | 463 | unsigned long bufferram; /* Memory used by buffers */ |
460 | unsigned long totalswap; /* Total swap space size */ | 464 | unsigned long totalswap; /* Total swap space size */ |
461 | unsigned long freeswap; /* swap space still available */ | 465 | unsigned long freeswap; /* swap space still available */ |
462 | unsigned short procs; /* Number of current processes */ | 466 | unsigned short procs; /* Number of current processes */ |
463 | unsigned short pad; /* explicit padding for m68k */ | 467 | unsigned short pad; /* explicit padding for m68k */ |
464 | unsigned long totalhigh; /* Total high memory size */ | 468 | unsigned long totalhigh; /* Total high memory size */ |
465 | unsigned long freehigh; /* Available high memory size */ | 469 | unsigned long freehigh; /* Available high memory size */ |
466 | unsigned int mem_unit; /* Memory unit size in bytes */ | 470 | unsigned int mem_unit; /* Memory unit size in bytes */ |
467 | char _f[20-2*sizeof(long)-sizeof(int)]; /* Padding: libc5 uses this.. */ | 471 | char _f[20-2*sizeof(long)-sizeof(int)]; /* Padding: libc5 uses this.. */ |
468 | }; | 472 | }; |
469 | 473 | ||
470 | /* Force a compilation error if condition is true */ | 474 | /* Force a compilation error if condition is true */ |
471 | #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) | 475 | #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) |
472 | 476 | ||
473 | /* Force a compilation error if condition is true, but also produce a | 477 | /* Force a compilation error if condition is true, but also produce a |
474 | result (of value 0 and type size_t), so the expression can be used | 478 | result (of value 0 and type size_t), so the expression can be used |
475 | e.g. in a structure initializer (or where-ever else comma expressions | 479 | e.g. in a structure initializer (or where-ever else comma expressions |
476 | aren't permitted). */ | 480 | aren't permitted). */ |
477 | #define BUILD_BUG_ON_ZERO(e) (sizeof(char[1 - 2 * !!(e)]) - 1) | 481 | #define BUILD_BUG_ON_ZERO(e) (sizeof(char[1 - 2 * !!(e)]) - 1) |
478 | 482 | ||
479 | /* Trap pasters of __FUNCTION__ at compile-time */ | 483 | /* Trap pasters of __FUNCTION__ at compile-time */ |
480 | #define __FUNCTION__ (__func__) | 484 | #define __FUNCTION__ (__func__) |
481 | 485 | ||
482 | /* This helps us to avoid #ifdef CONFIG_NUMA */ | 486 | /* This helps us to avoid #ifdef CONFIG_NUMA */ |
483 | #ifdef CONFIG_NUMA | 487 | #ifdef CONFIG_NUMA |
484 | #define NUMA_BUILD 1 | 488 | #define NUMA_BUILD 1 |
485 | #else | 489 | #else |
486 | #define NUMA_BUILD 0 | 490 | #define NUMA_BUILD 0 |
487 | #endif | 491 | #endif |
488 | 492 | ||
489 | #endif | 493 | #endif |
490 | 494 |
kernel/printk.c
1 | /* | 1 | /* |
2 | * linux/kernel/printk.c | 2 | * linux/kernel/printk.c |
3 | * | 3 | * |
4 | * Copyright (C) 1991, 1992 Linus Torvalds | 4 | * Copyright (C) 1991, 1992 Linus Torvalds |
5 | * | 5 | * |
6 | * Modified to make sys_syslog() more flexible: added commands to | 6 | * Modified to make sys_syslog() more flexible: added commands to |
7 | * return the last 4k of kernel messages, regardless of whether | 7 | * return the last 4k of kernel messages, regardless of whether |
8 | * they've been read or not. Added option to suppress kernel printk's | 8 | * they've been read or not. Added option to suppress kernel printk's |
9 | * to the console. Added hook for sending the console messages | 9 | * to the console. Added hook for sending the console messages |
10 | * elsewhere, in preparation for a serial line console (someday). | 10 | * elsewhere, in preparation for a serial line console (someday). |
11 | * Ted Ts'o, 2/11/93. | 11 | * Ted Ts'o, 2/11/93. |
12 | * Modified for sysctl support, 1/8/97, Chris Horn. | 12 | * Modified for sysctl support, 1/8/97, Chris Horn. |
13 | * Fixed SMP synchronization, 08/08/99, Manfred Spraul | 13 | * Fixed SMP synchronization, 08/08/99, Manfred Spraul |
14 | * manfred@colorfullife.com | 14 | * manfred@colorfullife.com |
15 | * Rewrote bits to get rid of console_lock | 15 | * Rewrote bits to get rid of console_lock |
16 | * 01Mar01 Andrew Morton <andrewm@uow.edu.au> | 16 | * 01Mar01 Andrew Morton <andrewm@uow.edu.au> |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/kernel.h> | 19 | #include <linux/kernel.h> |
20 | #include <linux/mm.h> | 20 | #include <linux/mm.h> |
21 | #include <linux/tty.h> | 21 | #include <linux/tty.h> |
22 | #include <linux/tty_driver.h> | 22 | #include <linux/tty_driver.h> |
23 | #include <linux/console.h> | 23 | #include <linux/console.h> |
24 | #include <linux/init.h> | 24 | #include <linux/init.h> |
25 | #include <linux/jiffies.h> | 25 | #include <linux/jiffies.h> |
26 | #include <linux/nmi.h> | 26 | #include <linux/nmi.h> |
27 | #include <linux/module.h> | 27 | #include <linux/module.h> |
28 | #include <linux/moduleparam.h> | 28 | #include <linux/moduleparam.h> |
29 | #include <linux/interrupt.h> /* For in_interrupt() */ | 29 | #include <linux/interrupt.h> /* For in_interrupt() */ |
30 | #include <linux/delay.h> | 30 | #include <linux/delay.h> |
31 | #include <linux/smp.h> | 31 | #include <linux/smp.h> |
32 | #include <linux/security.h> | 32 | #include <linux/security.h> |
33 | #include <linux/bootmem.h> | 33 | #include <linux/bootmem.h> |
34 | #include <linux/syscalls.h> | 34 | #include <linux/syscalls.h> |
35 | 35 | ||
36 | #include <asm/uaccess.h> | 36 | #include <asm/uaccess.h> |
37 | 37 | ||
38 | /* | 38 | /* |
39 | * Architectures can override it: | 39 | * Architectures can override it: |
40 | */ | 40 | */ |
41 | void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...) | 41 | void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...) |
42 | { | 42 | { |
43 | } | 43 | } |
44 | 44 | ||
45 | #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) | 45 | #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) |
46 | 46 | ||
47 | /* printk's without a loglevel use this.. */ | 47 | /* printk's without a loglevel use this.. */ |
48 | #define DEFAULT_MESSAGE_LOGLEVEL 4 /* KERN_WARNING */ | 48 | #define DEFAULT_MESSAGE_LOGLEVEL 4 /* KERN_WARNING */ |
49 | 49 | ||
50 | /* We show everything that is MORE important than this.. */ | 50 | /* We show everything that is MORE important than this.. */ |
51 | #define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */ | 51 | #define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */ |
52 | #define DEFAULT_CONSOLE_LOGLEVEL 7 /* anything MORE serious than KERN_DEBUG */ | 52 | #define DEFAULT_CONSOLE_LOGLEVEL 7 /* anything MORE serious than KERN_DEBUG */ |
53 | 53 | ||
54 | DECLARE_WAIT_QUEUE_HEAD(log_wait); | 54 | DECLARE_WAIT_QUEUE_HEAD(log_wait); |
55 | 55 | ||
56 | int console_printk[4] = { | 56 | int console_printk[4] = { |
57 | DEFAULT_CONSOLE_LOGLEVEL, /* console_loglevel */ | 57 | DEFAULT_CONSOLE_LOGLEVEL, /* console_loglevel */ |
58 | DEFAULT_MESSAGE_LOGLEVEL, /* default_message_loglevel */ | 58 | DEFAULT_MESSAGE_LOGLEVEL, /* default_message_loglevel */ |
59 | MINIMUM_CONSOLE_LOGLEVEL, /* minimum_console_loglevel */ | 59 | MINIMUM_CONSOLE_LOGLEVEL, /* minimum_console_loglevel */ |
60 | DEFAULT_CONSOLE_LOGLEVEL, /* default_console_loglevel */ | 60 | DEFAULT_CONSOLE_LOGLEVEL, /* default_console_loglevel */ |
61 | }; | 61 | }; |
62 | 62 | ||
63 | /* | 63 | /* |
64 | * Low level drivers may need that to know if they can schedule in | 64 | * Low level drivers may need that to know if they can schedule in |
65 | * their unblank() callback or not. So let's export it. | 65 | * their unblank() callback or not. So let's export it. |
66 | */ | 66 | */ |
67 | int oops_in_progress; | 67 | int oops_in_progress; |
68 | EXPORT_SYMBOL(oops_in_progress); | 68 | EXPORT_SYMBOL(oops_in_progress); |
69 | 69 | ||
70 | /* | 70 | /* |
71 | * console_sem protects the console_drivers list, and also | 71 | * console_sem protects the console_drivers list, and also |
72 | * provides serialisation for access to the entire console | 72 | * provides serialisation for access to the entire console |
73 | * driver system. | 73 | * driver system. |
74 | */ | 74 | */ |
75 | static DECLARE_MUTEX(console_sem); | 75 | static DECLARE_MUTEX(console_sem); |
76 | static DECLARE_MUTEX(secondary_console_sem); | 76 | static DECLARE_MUTEX(secondary_console_sem); |
77 | struct console *console_drivers; | 77 | struct console *console_drivers; |
78 | EXPORT_SYMBOL_GPL(console_drivers); | 78 | EXPORT_SYMBOL_GPL(console_drivers); |
79 | 79 | ||
80 | /* | 80 | /* |
81 | * This is used for debugging the mess that is the VT code by | 81 | * This is used for debugging the mess that is the VT code by |
82 | * keeping track if we have the console semaphore held. It's | 82 | * keeping track if we have the console semaphore held. It's |
83 | * definitely not the perfect debug tool (we don't know if _WE_ | 83 | * definitely not the perfect debug tool (we don't know if _WE_ |
84 | * hold it are racing, but it helps tracking those weird code | 84 | * hold it are racing, but it helps tracking those weird code |
85 | * path in the console code where we end up in places I want | 85 | * path in the console code where we end up in places I want |
86 | * locked without the console sempahore held | 86 | * locked without the console sempahore held |
87 | */ | 87 | */ |
88 | static int console_locked, console_suspended; | 88 | static int console_locked, console_suspended; |
89 | 89 | ||
90 | /* | 90 | /* |
91 | * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars | 91 | * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars |
92 | * It is also used in interesting ways to provide interlocking in | 92 | * It is also used in interesting ways to provide interlocking in |
93 | * release_console_sem(). | 93 | * release_console_sem(). |
94 | */ | 94 | */ |
95 | static DEFINE_SPINLOCK(logbuf_lock); | 95 | static DEFINE_SPINLOCK(logbuf_lock); |
96 | 96 | ||
97 | #define LOG_BUF_MASK (log_buf_len-1) | 97 | #define LOG_BUF_MASK (log_buf_len-1) |
98 | #define LOG_BUF(idx) (log_buf[(idx) & LOG_BUF_MASK]) | 98 | #define LOG_BUF(idx) (log_buf[(idx) & LOG_BUF_MASK]) |
99 | 99 | ||
100 | /* | 100 | /* |
101 | * The indices into log_buf are not constrained to log_buf_len - they | 101 | * The indices into log_buf are not constrained to log_buf_len - they |
102 | * must be masked before subscripting | 102 | * must be masked before subscripting |
103 | */ | 103 | */ |
104 | static unsigned log_start; /* Index into log_buf: next char to be read by syslog() */ | 104 | static unsigned log_start; /* Index into log_buf: next char to be read by syslog() */ |
105 | static unsigned con_start; /* Index into log_buf: next char to be sent to consoles */ | 105 | static unsigned con_start; /* Index into log_buf: next char to be sent to consoles */ |
106 | static unsigned log_end; /* Index into log_buf: most-recently-written-char + 1 */ | 106 | static unsigned log_end; /* Index into log_buf: most-recently-written-char + 1 */ |
107 | 107 | ||
108 | /* | 108 | /* |
109 | * Array of consoles built from command line options (console=) | 109 | * Array of consoles built from command line options (console=) |
110 | */ | 110 | */ |
111 | struct console_cmdline | 111 | struct console_cmdline |
112 | { | 112 | { |
113 | char name[8]; /* Name of the driver */ | 113 | char name[8]; /* Name of the driver */ |
114 | int index; /* Minor dev. to use */ | 114 | int index; /* Minor dev. to use */ |
115 | char *options; /* Options for the driver */ | 115 | char *options; /* Options for the driver */ |
116 | #ifdef CONFIG_A11Y_BRAILLE_CONSOLE | 116 | #ifdef CONFIG_A11Y_BRAILLE_CONSOLE |
117 | char *brl_options; /* Options for braille driver */ | 117 | char *brl_options; /* Options for braille driver */ |
118 | #endif | 118 | #endif |
119 | }; | 119 | }; |
120 | 120 | ||
121 | #define MAX_CMDLINECONSOLES 8 | 121 | #define MAX_CMDLINECONSOLES 8 |
122 | 122 | ||
123 | static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES]; | 123 | static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES]; |
124 | static int selected_console = -1; | 124 | static int selected_console = -1; |
125 | static int preferred_console = -1; | 125 | static int preferred_console = -1; |
126 | int console_set_on_cmdline; | 126 | int console_set_on_cmdline; |
127 | EXPORT_SYMBOL(console_set_on_cmdline); | 127 | EXPORT_SYMBOL(console_set_on_cmdline); |
128 | 128 | ||
129 | /* Flag: console code may call schedule() */ | 129 | /* Flag: console code may call schedule() */ |
130 | static int console_may_schedule; | 130 | static int console_may_schedule; |
131 | 131 | ||
132 | #ifdef CONFIG_PRINTK | 132 | #ifdef CONFIG_PRINTK |
133 | 133 | ||
134 | static char __log_buf[__LOG_BUF_LEN]; | 134 | static char __log_buf[__LOG_BUF_LEN]; |
135 | static char *log_buf = __log_buf; | 135 | static char *log_buf = __log_buf; |
136 | static int log_buf_len = __LOG_BUF_LEN; | 136 | static int log_buf_len = __LOG_BUF_LEN; |
137 | static unsigned logged_chars; /* Number of chars produced since last read+clear operation */ | 137 | static unsigned logged_chars; /* Number of chars produced since last read+clear operation */ |
138 | 138 | ||
139 | static int __init log_buf_len_setup(char *str) | 139 | static int __init log_buf_len_setup(char *str) |
140 | { | 140 | { |
141 | unsigned size = memparse(str, &str); | 141 | unsigned size = memparse(str, &str); |
142 | unsigned long flags; | 142 | unsigned long flags; |
143 | 143 | ||
144 | if (size) | 144 | if (size) |
145 | size = roundup_pow_of_two(size); | 145 | size = roundup_pow_of_two(size); |
146 | if (size > log_buf_len) { | 146 | if (size > log_buf_len) { |
147 | unsigned start, dest_idx, offset; | 147 | unsigned start, dest_idx, offset; |
148 | char *new_log_buf; | 148 | char *new_log_buf; |
149 | 149 | ||
150 | new_log_buf = alloc_bootmem(size); | 150 | new_log_buf = alloc_bootmem(size); |
151 | if (!new_log_buf) { | 151 | if (!new_log_buf) { |
152 | printk(KERN_WARNING "log_buf_len: allocation failed\n"); | 152 | printk(KERN_WARNING "log_buf_len: allocation failed\n"); |
153 | goto out; | 153 | goto out; |
154 | } | 154 | } |
155 | 155 | ||
156 | spin_lock_irqsave(&logbuf_lock, flags); | 156 | spin_lock_irqsave(&logbuf_lock, flags); |
157 | log_buf_len = size; | 157 | log_buf_len = size; |
158 | log_buf = new_log_buf; | 158 | log_buf = new_log_buf; |
159 | 159 | ||
160 | offset = start = min(con_start, log_start); | 160 | offset = start = min(con_start, log_start); |
161 | dest_idx = 0; | 161 | dest_idx = 0; |
162 | while (start != log_end) { | 162 | while (start != log_end) { |
163 | log_buf[dest_idx] = __log_buf[start & (__LOG_BUF_LEN - 1)]; | 163 | log_buf[dest_idx] = __log_buf[start & (__LOG_BUF_LEN - 1)]; |
164 | start++; | 164 | start++; |
165 | dest_idx++; | 165 | dest_idx++; |
166 | } | 166 | } |
167 | log_start -= offset; | 167 | log_start -= offset; |
168 | con_start -= offset; | 168 | con_start -= offset; |
169 | log_end -= offset; | 169 | log_end -= offset; |
170 | spin_unlock_irqrestore(&logbuf_lock, flags); | 170 | spin_unlock_irqrestore(&logbuf_lock, flags); |
171 | 171 | ||
172 | printk(KERN_NOTICE "log_buf_len: %d\n", log_buf_len); | 172 | printk(KERN_NOTICE "log_buf_len: %d\n", log_buf_len); |
173 | } | 173 | } |
174 | out: | 174 | out: |
175 | return 1; | 175 | return 1; |
176 | } | 176 | } |
177 | 177 | ||
178 | __setup("log_buf_len=", log_buf_len_setup); | 178 | __setup("log_buf_len=", log_buf_len_setup); |
179 | 179 | ||
180 | #ifdef CONFIG_BOOT_PRINTK_DELAY | 180 | #ifdef CONFIG_BOOT_PRINTK_DELAY |
181 | 181 | ||
182 | static unsigned int boot_delay; /* msecs delay after each printk during bootup */ | 182 | static unsigned int boot_delay; /* msecs delay after each printk during bootup */ |
183 | static unsigned long long printk_delay_msec; /* per msec, based on boot_delay */ | 183 | static unsigned long long printk_delay_msec; /* per msec, based on boot_delay */ |
184 | 184 | ||
185 | static int __init boot_delay_setup(char *str) | 185 | static int __init boot_delay_setup(char *str) |
186 | { | 186 | { |
187 | unsigned long lpj; | 187 | unsigned long lpj; |
188 | unsigned long long loops_per_msec; | 188 | unsigned long long loops_per_msec; |
189 | 189 | ||
190 | lpj = preset_lpj ? preset_lpj : 1000000; /* some guess */ | 190 | lpj = preset_lpj ? preset_lpj : 1000000; /* some guess */ |
191 | loops_per_msec = (unsigned long long)lpj / 1000 * HZ; | 191 | loops_per_msec = (unsigned long long)lpj / 1000 * HZ; |
192 | 192 | ||
193 | get_option(&str, &boot_delay); | 193 | get_option(&str, &boot_delay); |
194 | if (boot_delay > 10 * 1000) | 194 | if (boot_delay > 10 * 1000) |
195 | boot_delay = 0; | 195 | boot_delay = 0; |
196 | 196 | ||
197 | printk_delay_msec = loops_per_msec; | 197 | printk_delay_msec = loops_per_msec; |
198 | printk(KERN_DEBUG "boot_delay: %u, preset_lpj: %ld, lpj: %lu, " | 198 | printk(KERN_DEBUG "boot_delay: %u, preset_lpj: %ld, lpj: %lu, " |
199 | "HZ: %d, printk_delay_msec: %llu\n", | 199 | "HZ: %d, printk_delay_msec: %llu\n", |
200 | boot_delay, preset_lpj, lpj, HZ, printk_delay_msec); | 200 | boot_delay, preset_lpj, lpj, HZ, printk_delay_msec); |
201 | return 1; | 201 | return 1; |
202 | } | 202 | } |
203 | __setup("boot_delay=", boot_delay_setup); | 203 | __setup("boot_delay=", boot_delay_setup); |
204 | 204 | ||
205 | static void boot_delay_msec(void) | 205 | static void boot_delay_msec(void) |
206 | { | 206 | { |
207 | unsigned long long k; | 207 | unsigned long long k; |
208 | unsigned long timeout; | 208 | unsigned long timeout; |
209 | 209 | ||
210 | if (boot_delay == 0 || system_state != SYSTEM_BOOTING) | 210 | if (boot_delay == 0 || system_state != SYSTEM_BOOTING) |
211 | return; | 211 | return; |
212 | 212 | ||
213 | k = (unsigned long long)printk_delay_msec * boot_delay; | 213 | k = (unsigned long long)printk_delay_msec * boot_delay; |
214 | 214 | ||
215 | timeout = jiffies + msecs_to_jiffies(boot_delay); | 215 | timeout = jiffies + msecs_to_jiffies(boot_delay); |
216 | while (k) { | 216 | while (k) { |
217 | k--; | 217 | k--; |
218 | cpu_relax(); | 218 | cpu_relax(); |
219 | /* | 219 | /* |
220 | * use (volatile) jiffies to prevent | 220 | * use (volatile) jiffies to prevent |
221 | * compiler reduction; loop termination via jiffies | 221 | * compiler reduction; loop termination via jiffies |
222 | * is secondary and may or may not happen. | 222 | * is secondary and may or may not happen. |
223 | */ | 223 | */ |
224 | if (time_after(jiffies, timeout)) | 224 | if (time_after(jiffies, timeout)) |
225 | break; | 225 | break; |
226 | touch_nmi_watchdog(); | 226 | touch_nmi_watchdog(); |
227 | } | 227 | } |
228 | } | 228 | } |
229 | #else | 229 | #else |
230 | static inline void boot_delay_msec(void) | 230 | static inline void boot_delay_msec(void) |
231 | { | 231 | { |
232 | } | 232 | } |
233 | #endif | 233 | #endif |
234 | 234 | ||
235 | /* | 235 | /* |
236 | * Return the number of unread characters in the log buffer. | 236 | * Return the number of unread characters in the log buffer. |
237 | */ | 237 | */ |
238 | static int log_buf_get_len(void) | 238 | static int log_buf_get_len(void) |
239 | { | 239 | { |
240 | return logged_chars; | 240 | return logged_chars; |
241 | } | 241 | } |
242 | 242 | ||
243 | /* | 243 | /* |
244 | * Copy a range of characters from the log buffer. | 244 | * Copy a range of characters from the log buffer. |
245 | */ | 245 | */ |
246 | int log_buf_copy(char *dest, int idx, int len) | 246 | int log_buf_copy(char *dest, int idx, int len) |
247 | { | 247 | { |
248 | int ret, max; | 248 | int ret, max; |
249 | bool took_lock = false; | 249 | bool took_lock = false; |
250 | 250 | ||
251 | if (!oops_in_progress) { | 251 | if (!oops_in_progress) { |
252 | spin_lock_irq(&logbuf_lock); | 252 | spin_lock_irq(&logbuf_lock); |
253 | took_lock = true; | 253 | took_lock = true; |
254 | } | 254 | } |
255 | 255 | ||
256 | max = log_buf_get_len(); | 256 | max = log_buf_get_len(); |
257 | if (idx < 0 || idx >= max) { | 257 | if (idx < 0 || idx >= max) { |
258 | ret = -1; | 258 | ret = -1; |
259 | } else { | 259 | } else { |
260 | if (len > max) | 260 | if (len > max) |
261 | len = max; | 261 | len = max; |
262 | ret = len; | 262 | ret = len; |
263 | idx += (log_end - max); | 263 | idx += (log_end - max); |
264 | while (len-- > 0) | 264 | while (len-- > 0) |
265 | dest[len] = LOG_BUF(idx + len); | 265 | dest[len] = LOG_BUF(idx + len); |
266 | } | 266 | } |
267 | 267 | ||
268 | if (took_lock) | 268 | if (took_lock) |
269 | spin_unlock_irq(&logbuf_lock); | 269 | spin_unlock_irq(&logbuf_lock); |
270 | 270 | ||
271 | return ret; | 271 | return ret; |
272 | } | 272 | } |
273 | 273 | ||
274 | /* | 274 | /* |
275 | * Commands to do_syslog: | 275 | * Commands to do_syslog: |
276 | * | 276 | * |
277 | * 0 -- Close the log. Currently a NOP. | 277 | * 0 -- Close the log. Currently a NOP. |
278 | * 1 -- Open the log. Currently a NOP. | 278 | * 1 -- Open the log. Currently a NOP. |
279 | * 2 -- Read from the log. | 279 | * 2 -- Read from the log. |
280 | * 3 -- Read all messages remaining in the ring buffer. | 280 | * 3 -- Read all messages remaining in the ring buffer. |
281 | * 4 -- Read and clear all messages remaining in the ring buffer | 281 | * 4 -- Read and clear all messages remaining in the ring buffer |
282 | * 5 -- Clear ring buffer. | 282 | * 5 -- Clear ring buffer. |
283 | * 6 -- Disable printk's to console | 283 | * 6 -- Disable printk's to console |
284 | * 7 -- Enable printk's to console | 284 | * 7 -- Enable printk's to console |
285 | * 8 -- Set level of messages printed to console | 285 | * 8 -- Set level of messages printed to console |
286 | * 9 -- Return number of unread characters in the log buffer | 286 | * 9 -- Return number of unread characters in the log buffer |
287 | * 10 -- Return size of the log buffer | 287 | * 10 -- Return size of the log buffer |
288 | */ | 288 | */ |
289 | int do_syslog(int type, char __user *buf, int len) | 289 | int do_syslog(int type, char __user *buf, int len) |
290 | { | 290 | { |
291 | unsigned i, j, limit, count; | 291 | unsigned i, j, limit, count; |
292 | int do_clear = 0; | 292 | int do_clear = 0; |
293 | char c; | 293 | char c; |
294 | int error = 0; | 294 | int error = 0; |
295 | 295 | ||
296 | error = security_syslog(type); | 296 | error = security_syslog(type); |
297 | if (error) | 297 | if (error) |
298 | return error; | 298 | return error; |
299 | 299 | ||
300 | switch (type) { | 300 | switch (type) { |
301 | case 0: /* Close log */ | 301 | case 0: /* Close log */ |
302 | break; | 302 | break; |
303 | case 1: /* Open log */ | 303 | case 1: /* Open log */ |
304 | break; | 304 | break; |
305 | case 2: /* Read from log */ | 305 | case 2: /* Read from log */ |
306 | error = -EINVAL; | 306 | error = -EINVAL; |
307 | if (!buf || len < 0) | 307 | if (!buf || len < 0) |
308 | goto out; | 308 | goto out; |
309 | error = 0; | 309 | error = 0; |
310 | if (!len) | 310 | if (!len) |
311 | goto out; | 311 | goto out; |
312 | if (!access_ok(VERIFY_WRITE, buf, len)) { | 312 | if (!access_ok(VERIFY_WRITE, buf, len)) { |
313 | error = -EFAULT; | 313 | error = -EFAULT; |
314 | goto out; | 314 | goto out; |
315 | } | 315 | } |
316 | error = wait_event_interruptible(log_wait, | 316 | error = wait_event_interruptible(log_wait, |
317 | (log_start - log_end)); | 317 | (log_start - log_end)); |
318 | if (error) | 318 | if (error) |
319 | goto out; | 319 | goto out; |
320 | i = 0; | 320 | i = 0; |
321 | spin_lock_irq(&logbuf_lock); | 321 | spin_lock_irq(&logbuf_lock); |
322 | while (!error && (log_start != log_end) && i < len) { | 322 | while (!error && (log_start != log_end) && i < len) { |
323 | c = LOG_BUF(log_start); | 323 | c = LOG_BUF(log_start); |
324 | log_start++; | 324 | log_start++; |
325 | spin_unlock_irq(&logbuf_lock); | 325 | spin_unlock_irq(&logbuf_lock); |
326 | error = __put_user(c,buf); | 326 | error = __put_user(c,buf); |
327 | buf++; | 327 | buf++; |
328 | i++; | 328 | i++; |
329 | cond_resched(); | 329 | cond_resched(); |
330 | spin_lock_irq(&logbuf_lock); | 330 | spin_lock_irq(&logbuf_lock); |
331 | } | 331 | } |
332 | spin_unlock_irq(&logbuf_lock); | 332 | spin_unlock_irq(&logbuf_lock); |
333 | if (!error) | 333 | if (!error) |
334 | error = i; | 334 | error = i; |
335 | break; | 335 | break; |
336 | case 4: /* Read/clear last kernel messages */ | 336 | case 4: /* Read/clear last kernel messages */ |
337 | do_clear = 1; | 337 | do_clear = 1; |
338 | /* FALL THRU */ | 338 | /* FALL THRU */ |
339 | case 3: /* Read last kernel messages */ | 339 | case 3: /* Read last kernel messages */ |
340 | error = -EINVAL; | 340 | error = -EINVAL; |
341 | if (!buf || len < 0) | 341 | if (!buf || len < 0) |
342 | goto out; | 342 | goto out; |
343 | error = 0; | 343 | error = 0; |
344 | if (!len) | 344 | if (!len) |
345 | goto out; | 345 | goto out; |
346 | if (!access_ok(VERIFY_WRITE, buf, len)) { | 346 | if (!access_ok(VERIFY_WRITE, buf, len)) { |
347 | error = -EFAULT; | 347 | error = -EFAULT; |
348 | goto out; | 348 | goto out; |
349 | } | 349 | } |
350 | count = len; | 350 | count = len; |
351 | if (count > log_buf_len) | 351 | if (count > log_buf_len) |
352 | count = log_buf_len; | 352 | count = log_buf_len; |
353 | spin_lock_irq(&logbuf_lock); | 353 | spin_lock_irq(&logbuf_lock); |
354 | if (count > logged_chars) | 354 | if (count > logged_chars) |
355 | count = logged_chars; | 355 | count = logged_chars; |
356 | if (do_clear) | 356 | if (do_clear) |
357 | logged_chars = 0; | 357 | logged_chars = 0; |
358 | limit = log_end; | 358 | limit = log_end; |
359 | /* | 359 | /* |
360 | * __put_user() could sleep, and while we sleep | 360 | * __put_user() could sleep, and while we sleep |
361 | * printk() could overwrite the messages | 361 | * printk() could overwrite the messages |
362 | * we try to copy to user space. Therefore | 362 | * we try to copy to user space. Therefore |
363 | * the messages are copied in reverse. <manfreds> | 363 | * the messages are copied in reverse. <manfreds> |
364 | */ | 364 | */ |
365 | for (i = 0; i < count && !error; i++) { | 365 | for (i = 0; i < count && !error; i++) { |
366 | j = limit-1-i; | 366 | j = limit-1-i; |
367 | if (j + log_buf_len < log_end) | 367 | if (j + log_buf_len < log_end) |
368 | break; | 368 | break; |
369 | c = LOG_BUF(j); | 369 | c = LOG_BUF(j); |
370 | spin_unlock_irq(&logbuf_lock); | 370 | spin_unlock_irq(&logbuf_lock); |
371 | error = __put_user(c,&buf[count-1-i]); | 371 | error = __put_user(c,&buf[count-1-i]); |
372 | cond_resched(); | 372 | cond_resched(); |
373 | spin_lock_irq(&logbuf_lock); | 373 | spin_lock_irq(&logbuf_lock); |
374 | } | 374 | } |
375 | spin_unlock_irq(&logbuf_lock); | 375 | spin_unlock_irq(&logbuf_lock); |
376 | if (error) | 376 | if (error) |
377 | break; | 377 | break; |
378 | error = i; | 378 | error = i; |
379 | if (i != count) { | 379 | if (i != count) { |
380 | int offset = count-error; | 380 | int offset = count-error; |
381 | /* buffer overflow during copy, correct user buffer. */ | 381 | /* buffer overflow during copy, correct user buffer. */ |
382 | for (i = 0; i < error; i++) { | 382 | for (i = 0; i < error; i++) { |
383 | if (__get_user(c,&buf[i+offset]) || | 383 | if (__get_user(c,&buf[i+offset]) || |
384 | __put_user(c,&buf[i])) { | 384 | __put_user(c,&buf[i])) { |
385 | error = -EFAULT; | 385 | error = -EFAULT; |
386 | break; | 386 | break; |
387 | } | 387 | } |
388 | cond_resched(); | 388 | cond_resched(); |
389 | } | 389 | } |
390 | } | 390 | } |
391 | break; | 391 | break; |
392 | case 5: /* Clear ring buffer */ | 392 | case 5: /* Clear ring buffer */ |
393 | logged_chars = 0; | 393 | logged_chars = 0; |
394 | break; | 394 | break; |
395 | case 6: /* Disable logging to console */ | 395 | case 6: /* Disable logging to console */ |
396 | console_loglevel = minimum_console_loglevel; | 396 | console_loglevel = minimum_console_loglevel; |
397 | break; | 397 | break; |
398 | case 7: /* Enable logging to console */ | 398 | case 7: /* Enable logging to console */ |
399 | console_loglevel = default_console_loglevel; | 399 | console_loglevel = default_console_loglevel; |
400 | break; | 400 | break; |
401 | case 8: /* Set level of messages printed to console */ | 401 | case 8: /* Set level of messages printed to console */ |
402 | error = -EINVAL; | 402 | error = -EINVAL; |
403 | if (len < 1 || len > 8) | 403 | if (len < 1 || len > 8) |
404 | goto out; | 404 | goto out; |
405 | if (len < minimum_console_loglevel) | 405 | if (len < minimum_console_loglevel) |
406 | len = minimum_console_loglevel; | 406 | len = minimum_console_loglevel; |
407 | console_loglevel = len; | 407 | console_loglevel = len; |
408 | error = 0; | 408 | error = 0; |
409 | break; | 409 | break; |
410 | case 9: /* Number of chars in the log buffer */ | 410 | case 9: /* Number of chars in the log buffer */ |
411 | error = log_end - log_start; | 411 | error = log_end - log_start; |
412 | break; | 412 | break; |
413 | case 10: /* Size of the log buffer */ | 413 | case 10: /* Size of the log buffer */ |
414 | error = log_buf_len; | 414 | error = log_buf_len; |
415 | break; | 415 | break; |
416 | default: | 416 | default: |
417 | error = -EINVAL; | 417 | error = -EINVAL; |
418 | break; | 418 | break; |
419 | } | 419 | } |
420 | out: | 420 | out: |
421 | return error; | 421 | return error; |
422 | } | 422 | } |
423 | 423 | ||
424 | asmlinkage long sys_syslog(int type, char __user *buf, int len) | 424 | asmlinkage long sys_syslog(int type, char __user *buf, int len) |
425 | { | 425 | { |
426 | return do_syslog(type, buf, len); | 426 | return do_syslog(type, buf, len); |
427 | } | 427 | } |
428 | 428 | ||
429 | /* | 429 | /* |
430 | * Call the console drivers on a range of log_buf | 430 | * Call the console drivers on a range of log_buf |
431 | */ | 431 | */ |
432 | static void __call_console_drivers(unsigned start, unsigned end) | 432 | static void __call_console_drivers(unsigned start, unsigned end) |
433 | { | 433 | { |
434 | struct console *con; | 434 | struct console *con; |
435 | 435 | ||
436 | for (con = console_drivers; con; con = con->next) { | 436 | for (con = console_drivers; con; con = con->next) { |
437 | if ((con->flags & CON_ENABLED) && con->write && | 437 | if ((con->flags & CON_ENABLED) && con->write && |
438 | (cpu_online(smp_processor_id()) || | 438 | (cpu_online(smp_processor_id()) || |
439 | (con->flags & CON_ANYTIME))) | 439 | (con->flags & CON_ANYTIME))) |
440 | con->write(con, &LOG_BUF(start), end - start); | 440 | con->write(con, &LOG_BUF(start), end - start); |
441 | } | 441 | } |
442 | } | 442 | } |
443 | 443 | ||
444 | static int __read_mostly ignore_loglevel; | 444 | static int __read_mostly ignore_loglevel; |
445 | 445 | ||
446 | static int __init ignore_loglevel_setup(char *str) | 446 | static int __init ignore_loglevel_setup(char *str) |
447 | { | 447 | { |
448 | ignore_loglevel = 1; | 448 | ignore_loglevel = 1; |
449 | printk(KERN_INFO "debug: ignoring loglevel setting.\n"); | 449 | printk(KERN_INFO "debug: ignoring loglevel setting.\n"); |
450 | 450 | ||
451 | return 0; | 451 | return 0; |
452 | } | 452 | } |
453 | 453 | ||
454 | early_param("ignore_loglevel", ignore_loglevel_setup); | 454 | early_param("ignore_loglevel", ignore_loglevel_setup); |
455 | 455 | ||
456 | /* | 456 | /* |
457 | * Write out chars from start to end - 1 inclusive | 457 | * Write out chars from start to end - 1 inclusive |
458 | */ | 458 | */ |
459 | static void _call_console_drivers(unsigned start, | 459 | static void _call_console_drivers(unsigned start, |
460 | unsigned end, int msg_log_level) | 460 | unsigned end, int msg_log_level) |
461 | { | 461 | { |
462 | if ((msg_log_level < console_loglevel || ignore_loglevel) && | 462 | if ((msg_log_level < console_loglevel || ignore_loglevel) && |
463 | console_drivers && start != end) { | 463 | console_drivers && start != end) { |
464 | if ((start & LOG_BUF_MASK) > (end & LOG_BUF_MASK)) { | 464 | if ((start & LOG_BUF_MASK) > (end & LOG_BUF_MASK)) { |
465 | /* wrapped write */ | 465 | /* wrapped write */ |
466 | __call_console_drivers(start & LOG_BUF_MASK, | 466 | __call_console_drivers(start & LOG_BUF_MASK, |
467 | log_buf_len); | 467 | log_buf_len); |
468 | __call_console_drivers(0, end & LOG_BUF_MASK); | 468 | __call_console_drivers(0, end & LOG_BUF_MASK); |
469 | } else { | 469 | } else { |
470 | __call_console_drivers(start, end); | 470 | __call_console_drivers(start, end); |
471 | } | 471 | } |
472 | } | 472 | } |
473 | } | 473 | } |
474 | 474 | ||
475 | /* | 475 | /* |
476 | * Call the console drivers, asking them to write out | 476 | * Call the console drivers, asking them to write out |
477 | * log_buf[start] to log_buf[end - 1]. | 477 | * log_buf[start] to log_buf[end - 1]. |
478 | * The console_sem must be held. | 478 | * The console_sem must be held. |
479 | */ | 479 | */ |
480 | static void call_console_drivers(unsigned start, unsigned end) | 480 | static void call_console_drivers(unsigned start, unsigned end) |
481 | { | 481 | { |
482 | unsigned cur_index, start_print; | 482 | unsigned cur_index, start_print; |
483 | static int msg_level = -1; | 483 | static int msg_level = -1; |
484 | 484 | ||
485 | BUG_ON(((int)(start - end)) > 0); | 485 | BUG_ON(((int)(start - end)) > 0); |
486 | 486 | ||
487 | cur_index = start; | 487 | cur_index = start; |
488 | start_print = start; | 488 | start_print = start; |
489 | while (cur_index != end) { | 489 | while (cur_index != end) { |
490 | if (msg_level < 0 && ((end - cur_index) > 2) && | 490 | if (msg_level < 0 && ((end - cur_index) > 2) && |
491 | LOG_BUF(cur_index + 0) == '<' && | 491 | LOG_BUF(cur_index + 0) == '<' && |
492 | LOG_BUF(cur_index + 1) >= '0' && | 492 | LOG_BUF(cur_index + 1) >= '0' && |
493 | LOG_BUF(cur_index + 1) <= '7' && | 493 | LOG_BUF(cur_index + 1) <= '7' && |
494 | LOG_BUF(cur_index + 2) == '>') { | 494 | LOG_BUF(cur_index + 2) == '>') { |
495 | msg_level = LOG_BUF(cur_index + 1) - '0'; | 495 | msg_level = LOG_BUF(cur_index + 1) - '0'; |
496 | cur_index += 3; | 496 | cur_index += 3; |
497 | start_print = cur_index; | 497 | start_print = cur_index; |
498 | } | 498 | } |
499 | while (cur_index != end) { | 499 | while (cur_index != end) { |
500 | char c = LOG_BUF(cur_index); | 500 | char c = LOG_BUF(cur_index); |
501 | 501 | ||
502 | cur_index++; | 502 | cur_index++; |
503 | if (c == '\n') { | 503 | if (c == '\n') { |
504 | if (msg_level < 0) { | 504 | if (msg_level < 0) { |
505 | /* | 505 | /* |
506 | * printk() has already given us loglevel tags in | 506 | * printk() has already given us loglevel tags in |
507 | * the buffer. This code is here in case the | 507 | * the buffer. This code is here in case the |
508 | * log buffer has wrapped right round and scribbled | 508 | * log buffer has wrapped right round and scribbled |
509 | * on those tags | 509 | * on those tags |
510 | */ | 510 | */ |
511 | msg_level = default_message_loglevel; | 511 | msg_level = default_message_loglevel; |
512 | } | 512 | } |
513 | _call_console_drivers(start_print, cur_index, msg_level); | 513 | _call_console_drivers(start_print, cur_index, msg_level); |
514 | msg_level = -1; | 514 | msg_level = -1; |
515 | start_print = cur_index; | 515 | start_print = cur_index; |
516 | break; | 516 | break; |
517 | } | 517 | } |
518 | } | 518 | } |
519 | } | 519 | } |
520 | _call_console_drivers(start_print, end, msg_level); | 520 | _call_console_drivers(start_print, end, msg_level); |
521 | } | 521 | } |
522 | 522 | ||
523 | static void emit_log_char(char c) | 523 | static void emit_log_char(char c) |
524 | { | 524 | { |
525 | LOG_BUF(log_end) = c; | 525 | LOG_BUF(log_end) = c; |
526 | log_end++; | 526 | log_end++; |
527 | if (log_end - log_start > log_buf_len) | 527 | if (log_end - log_start > log_buf_len) |
528 | log_start = log_end - log_buf_len; | 528 | log_start = log_end - log_buf_len; |
529 | if (log_end - con_start > log_buf_len) | 529 | if (log_end - con_start > log_buf_len) |
530 | con_start = log_end - log_buf_len; | 530 | con_start = log_end - log_buf_len; |
531 | if (logged_chars < log_buf_len) | 531 | if (logged_chars < log_buf_len) |
532 | logged_chars++; | 532 | logged_chars++; |
533 | } | 533 | } |
534 | 534 | ||
535 | /* | 535 | /* |
536 | * Zap console related locks when oopsing. Only zap at most once | 536 | * Zap console related locks when oopsing. Only zap at most once |
537 | * every 10 seconds, to leave time for slow consoles to print a | 537 | * every 10 seconds, to leave time for slow consoles to print a |
538 | * full oops. | 538 | * full oops. |
539 | */ | 539 | */ |
540 | static void zap_locks(void) | 540 | static void zap_locks(void) |
541 | { | 541 | { |
542 | static unsigned long oops_timestamp; | 542 | static unsigned long oops_timestamp; |
543 | 543 | ||
544 | if (time_after_eq(jiffies, oops_timestamp) && | 544 | if (time_after_eq(jiffies, oops_timestamp) && |
545 | !time_after(jiffies, oops_timestamp + 30 * HZ)) | 545 | !time_after(jiffies, oops_timestamp + 30 * HZ)) |
546 | return; | 546 | return; |
547 | 547 | ||
548 | oops_timestamp = jiffies; | 548 | oops_timestamp = jiffies; |
549 | 549 | ||
550 | /* If a crash is occurring, make sure we can't deadlock */ | 550 | /* If a crash is occurring, make sure we can't deadlock */ |
551 | spin_lock_init(&logbuf_lock); | 551 | spin_lock_init(&logbuf_lock); |
552 | /* And make sure that we print immediately */ | 552 | /* And make sure that we print immediately */ |
553 | init_MUTEX(&console_sem); | 553 | init_MUTEX(&console_sem); |
554 | } | 554 | } |
555 | 555 | ||
556 | #if defined(CONFIG_PRINTK_TIME) | 556 | #if defined(CONFIG_PRINTK_TIME) |
557 | static int printk_time = 1; | 557 | static int printk_time = 1; |
558 | #else | 558 | #else |
559 | static int printk_time = 0; | 559 | static int printk_time = 0; |
560 | #endif | 560 | #endif |
561 | module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); | 561 | module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); |
562 | 562 | ||
563 | /* Check if we have any console registered that can be called early in boot. */ | 563 | /* Check if we have any console registered that can be called early in boot. */ |
564 | static int have_callable_console(void) | 564 | static int have_callable_console(void) |
565 | { | 565 | { |
566 | struct console *con; | 566 | struct console *con; |
567 | 567 | ||
568 | for (con = console_drivers; con; con = con->next) | 568 | for (con = console_drivers; con; con = con->next) |
569 | if (con->flags & CON_ANYTIME) | 569 | if (con->flags & CON_ANYTIME) |
570 | return 1; | 570 | return 1; |
571 | 571 | ||
572 | return 0; | 572 | return 0; |
573 | } | 573 | } |
574 | 574 | ||
575 | /** | 575 | /** |
576 | * printk - print a kernel message | 576 | * printk - print a kernel message |
577 | * @fmt: format string | 577 | * @fmt: format string |
578 | * | 578 | * |
579 | * This is printk(). It can be called from any context. We want it to work. | 579 | * This is printk(). It can be called from any context. We want it to work. |
580 | * Be aware of the fact that if oops_in_progress is not set, we might try to | 580 | * Be aware of the fact that if oops_in_progress is not set, we might try to |
581 | * wake klogd up which could deadlock on runqueue lock if printk() is called | 581 | * wake klogd up which could deadlock on runqueue lock if printk() is called |
582 | * from scheduler code. | 582 | * from scheduler code. |
583 | * | 583 | * |
584 | * We try to grab the console_sem. If we succeed, it's easy - we log the output and | 584 | * We try to grab the console_sem. If we succeed, it's easy - we log the output and |
585 | * call the console drivers. If we fail to get the semaphore we place the output | 585 | * call the console drivers. If we fail to get the semaphore we place the output |
586 | * into the log buffer and return. The current holder of the console_sem will | 586 | * into the log buffer and return. The current holder of the console_sem will |
587 | * notice the new output in release_console_sem() and will send it to the | 587 | * notice the new output in release_console_sem() and will send it to the |
588 | * consoles before releasing the semaphore. | 588 | * consoles before releasing the semaphore. |
589 | * | 589 | * |
590 | * One effect of this deferred printing is that code which calls printk() and | 590 | * One effect of this deferred printing is that code which calls printk() and |
591 | * then changes console_loglevel may break. This is because console_loglevel | 591 | * then changes console_loglevel may break. This is because console_loglevel |
592 | * is inspected when the actual printing occurs. | 592 | * is inspected when the actual printing occurs. |
593 | * | 593 | * |
594 | * See also: | 594 | * See also: |
595 | * printf(3) | 595 | * printf(3) |
596 | */ | 596 | */ |
597 | 597 | ||
598 | asmlinkage int printk(const char *fmt, ...) | 598 | asmlinkage int printk(const char *fmt, ...) |
599 | { | 599 | { |
600 | va_list args; | 600 | va_list args; |
601 | int r; | 601 | int r; |
602 | 602 | ||
603 | va_start(args, fmt); | 603 | va_start(args, fmt); |
604 | r = vprintk(fmt, args); | 604 | r = vprintk(fmt, args); |
605 | va_end(args); | 605 | va_end(args); |
606 | 606 | ||
607 | return r; | 607 | return r; |
608 | } | 608 | } |
609 | 609 | ||
610 | /* cpu currently holding logbuf_lock */ | 610 | /* cpu currently holding logbuf_lock */ |
611 | static volatile unsigned int printk_cpu = UINT_MAX; | 611 | static volatile unsigned int printk_cpu = UINT_MAX; |
612 | 612 | ||
613 | /* | 613 | /* |
614 | * Can we actually use the console at this time on this cpu? | 614 | * Can we actually use the console at this time on this cpu? |
615 | * | 615 | * |
616 | * Console drivers may assume that per-cpu resources have | 616 | * Console drivers may assume that per-cpu resources have |
617 | * been allocated. So unless they're explicitly marked as | 617 | * been allocated. So unless they're explicitly marked as |
618 | * being able to cope (CON_ANYTIME) don't call them until | 618 | * being able to cope (CON_ANYTIME) don't call them until |
619 | * this CPU is officially up. | 619 | * this CPU is officially up. |
620 | */ | 620 | */ |
621 | static inline int can_use_console(unsigned int cpu) | 621 | static inline int can_use_console(unsigned int cpu) |
622 | { | 622 | { |
623 | return cpu_online(cpu) || have_callable_console(); | 623 | return cpu_online(cpu) || have_callable_console(); |
624 | } | 624 | } |
625 | 625 | ||
626 | /* | 626 | /* |
627 | * Try to get console ownership to actually show the kernel | 627 | * Try to get console ownership to actually show the kernel |
628 | * messages from a 'printk'. Return true (and with the | 628 | * messages from a 'printk'. Return true (and with the |
629 | * console_semaphore held, and 'console_locked' set) if it | 629 | * console_semaphore held, and 'console_locked' set) if it |
630 | * is successful, false otherwise. | 630 | * is successful, false otherwise. |
631 | * | 631 | * |
632 | * This gets called with the 'logbuf_lock' spinlock held and | 632 | * This gets called with the 'logbuf_lock' spinlock held and |
633 | * interrupts disabled. It should return with 'lockbuf_lock' | 633 | * interrupts disabled. It should return with 'lockbuf_lock' |
634 | * released but interrupts still disabled. | 634 | * released but interrupts still disabled. |
635 | */ | 635 | */ |
636 | static int acquire_console_semaphore_for_printk(unsigned int cpu) | 636 | static int acquire_console_semaphore_for_printk(unsigned int cpu) |
637 | { | 637 | { |
638 | int retval = 0; | 638 | int retval = 0; |
639 | 639 | ||
640 | if (!try_acquire_console_sem()) { | 640 | if (!try_acquire_console_sem()) { |
641 | retval = 1; | 641 | retval = 1; |
642 | 642 | ||
643 | /* | 643 | /* |
644 | * If we can't use the console, we need to release | 644 | * If we can't use the console, we need to release |
645 | * the console semaphore by hand to avoid flushing | 645 | * the console semaphore by hand to avoid flushing |
646 | * the buffer. We need to hold the console semaphore | 646 | * the buffer. We need to hold the console semaphore |
647 | * in order to do this test safely. | 647 | * in order to do this test safely. |
648 | */ | 648 | */ |
649 | if (!can_use_console(cpu)) { | 649 | if (!can_use_console(cpu)) { |
650 | console_locked = 0; | 650 | console_locked = 0; |
651 | up(&console_sem); | 651 | up(&console_sem); |
652 | retval = 0; | 652 | retval = 0; |
653 | } | 653 | } |
654 | } | 654 | } |
655 | printk_cpu = UINT_MAX; | 655 | printk_cpu = UINT_MAX; |
656 | spin_unlock(&logbuf_lock); | 656 | spin_unlock(&logbuf_lock); |
657 | return retval; | 657 | return retval; |
658 | } | 658 | } |
659 | static const char recursion_bug_msg [] = | 659 | static const char recursion_bug_msg [] = |
660 | KERN_CRIT "BUG: recent printk recursion!\n"; | 660 | KERN_CRIT "BUG: recent printk recursion!\n"; |
661 | static int recursion_bug; | 661 | static int recursion_bug; |
662 | static int new_text_line = 1; | 662 | static int new_text_line = 1; |
663 | static char printk_buf[1024]; | 663 | static char printk_buf[1024]; |
664 | 664 | ||
665 | asmlinkage int vprintk(const char *fmt, va_list args) | 665 | asmlinkage int vprintk(const char *fmt, va_list args) |
666 | { | 666 | { |
667 | int printed_len = 0; | 667 | int printed_len = 0; |
668 | int current_log_level = default_message_loglevel; | 668 | int current_log_level = default_message_loglevel; |
669 | unsigned long flags; | 669 | unsigned long flags; |
670 | int this_cpu; | 670 | int this_cpu; |
671 | char *p; | 671 | char *p; |
672 | 672 | ||
673 | boot_delay_msec(); | 673 | boot_delay_msec(); |
674 | 674 | ||
675 | preempt_disable(); | 675 | preempt_disable(); |
676 | /* This stops the holder of console_sem just where we want him */ | 676 | /* This stops the holder of console_sem just where we want him */ |
677 | raw_local_irq_save(flags); | 677 | raw_local_irq_save(flags); |
678 | this_cpu = smp_processor_id(); | 678 | this_cpu = smp_processor_id(); |
679 | 679 | ||
680 | /* | 680 | /* |
681 | * Ouch, printk recursed into itself! | 681 | * Ouch, printk recursed into itself! |
682 | */ | 682 | */ |
683 | if (unlikely(printk_cpu == this_cpu)) { | 683 | if (unlikely(printk_cpu == this_cpu)) { |
684 | /* | 684 | /* |
685 | * If a crash is occurring during printk() on this CPU, | 685 | * If a crash is occurring during printk() on this CPU, |
686 | * then try to get the crash message out but make sure | 686 | * then try to get the crash message out but make sure |
687 | * we can't deadlock. Otherwise just return to avoid the | 687 | * we can't deadlock. Otherwise just return to avoid the |
688 | * recursion and return - but flag the recursion so that | 688 | * recursion and return - but flag the recursion so that |
689 | * it can be printed at the next appropriate moment: | 689 | * it can be printed at the next appropriate moment: |
690 | */ | 690 | */ |
691 | if (!oops_in_progress) { | 691 | if (!oops_in_progress) { |
692 | recursion_bug = 1; | 692 | recursion_bug = 1; |
693 | goto out_restore_irqs; | 693 | goto out_restore_irqs; |
694 | } | 694 | } |
695 | zap_locks(); | 695 | zap_locks(); |
696 | } | 696 | } |
697 | 697 | ||
698 | lockdep_off(); | 698 | lockdep_off(); |
699 | spin_lock(&logbuf_lock); | 699 | spin_lock(&logbuf_lock); |
700 | printk_cpu = this_cpu; | 700 | printk_cpu = this_cpu; |
701 | 701 | ||
702 | if (recursion_bug) { | 702 | if (recursion_bug) { |
703 | recursion_bug = 0; | 703 | recursion_bug = 0; |
704 | strcpy(printk_buf, recursion_bug_msg); | 704 | strcpy(printk_buf, recursion_bug_msg); |
705 | printed_len = sizeof(recursion_bug_msg); | 705 | printed_len = sizeof(recursion_bug_msg); |
706 | } | 706 | } |
707 | /* Emit the output into the temporary buffer */ | 707 | /* Emit the output into the temporary buffer */ |
708 | printed_len += vscnprintf(printk_buf + printed_len, | 708 | printed_len += vscnprintf(printk_buf + printed_len, |
709 | sizeof(printk_buf) - printed_len, fmt, args); | 709 | sizeof(printk_buf) - printed_len, fmt, args); |
710 | 710 | ||
711 | 711 | ||
712 | /* | 712 | /* |
713 | * Copy the output into log_buf. If the caller didn't provide | 713 | * Copy the output into log_buf. If the caller didn't provide |
714 | * appropriate log level tags, we insert them here | 714 | * appropriate log level tags, we insert them here |
715 | */ | 715 | */ |
716 | for (p = printk_buf; *p; p++) { | 716 | for (p = printk_buf; *p; p++) { |
717 | if (new_text_line) { | 717 | if (new_text_line) { |
718 | /* If a token, set current_log_level and skip over */ | 718 | /* If a token, set current_log_level and skip over */ |
719 | if (p[0] == '<' && p[1] >= '0' && p[1] <= '7' && | 719 | if (p[0] == '<' && p[1] >= '0' && p[1] <= '7' && |
720 | p[2] == '>') { | 720 | p[2] == '>') { |
721 | current_log_level = p[1] - '0'; | 721 | current_log_level = p[1] - '0'; |
722 | p += 3; | 722 | p += 3; |
723 | printed_len -= 3; | 723 | printed_len -= 3; |
724 | } | 724 | } |
725 | 725 | ||
726 | /* Always output the token */ | 726 | /* Always output the token */ |
727 | emit_log_char('<'); | 727 | emit_log_char('<'); |
728 | emit_log_char(current_log_level + '0'); | 728 | emit_log_char(current_log_level + '0'); |
729 | emit_log_char('>'); | 729 | emit_log_char('>'); |
730 | printed_len += 3; | 730 | printed_len += 3; |
731 | new_text_line = 0; | 731 | new_text_line = 0; |
732 | 732 | ||
733 | if (printk_time) { | 733 | if (printk_time) { |
734 | /* Follow the token with the time */ | 734 | /* Follow the token with the time */ |
735 | char tbuf[50], *tp; | 735 | char tbuf[50], *tp; |
736 | unsigned tlen; | 736 | unsigned tlen; |
737 | unsigned long long t; | 737 | unsigned long long t; |
738 | unsigned long nanosec_rem; | 738 | unsigned long nanosec_rem; |
739 | 739 | ||
740 | t = cpu_clock(printk_cpu); | 740 | t = cpu_clock(printk_cpu); |
741 | nanosec_rem = do_div(t, 1000000000); | 741 | nanosec_rem = do_div(t, 1000000000); |
742 | tlen = sprintf(tbuf, "[%5lu.%06lu] ", | 742 | tlen = sprintf(tbuf, "[%5lu.%06lu] ", |
743 | (unsigned long) t, | 743 | (unsigned long) t, |
744 | nanosec_rem / 1000); | 744 | nanosec_rem / 1000); |
745 | 745 | ||
746 | for (tp = tbuf; tp < tbuf + tlen; tp++) | 746 | for (tp = tbuf; tp < tbuf + tlen; tp++) |
747 | emit_log_char(*tp); | 747 | emit_log_char(*tp); |
748 | printed_len += tlen; | 748 | printed_len += tlen; |
749 | } | 749 | } |
750 | 750 | ||
751 | if (!*p) | 751 | if (!*p) |
752 | break; | 752 | break; |
753 | } | 753 | } |
754 | 754 | ||
755 | emit_log_char(*p); | 755 | emit_log_char(*p); |
756 | if (*p == '\n') | 756 | if (*p == '\n') |
757 | new_text_line = 1; | 757 | new_text_line = 1; |
758 | } | 758 | } |
759 | 759 | ||
760 | /* | 760 | /* |
761 | * Try to acquire and then immediately release the | 761 | * Try to acquire and then immediately release the |
762 | * console semaphore. The release will do all the | 762 | * console semaphore. The release will do all the |
763 | * actual magic (print out buffers, wake up klogd, | 763 | * actual magic (print out buffers, wake up klogd, |
764 | * etc). | 764 | * etc). |
765 | * | 765 | * |
766 | * The acquire_console_semaphore_for_printk() function | 766 | * The acquire_console_semaphore_for_printk() function |
767 | * will release 'logbuf_lock' regardless of whether it | 767 | * will release 'logbuf_lock' regardless of whether it |
768 | * actually gets the semaphore or not. | 768 | * actually gets the semaphore or not. |
769 | */ | 769 | */ |
770 | if (acquire_console_semaphore_for_printk(this_cpu)) | 770 | if (acquire_console_semaphore_for_printk(this_cpu)) |
771 | release_console_sem(); | 771 | release_console_sem(); |
772 | 772 | ||
773 | lockdep_on(); | 773 | lockdep_on(); |
774 | out_restore_irqs: | 774 | out_restore_irqs: |
775 | raw_local_irq_restore(flags); | 775 | raw_local_irq_restore(flags); |
776 | 776 | ||
777 | preempt_enable(); | 777 | preempt_enable(); |
778 | return printed_len; | 778 | return printed_len; |
779 | } | 779 | } |
780 | EXPORT_SYMBOL(printk); | 780 | EXPORT_SYMBOL(printk); |
781 | EXPORT_SYMBOL(vprintk); | 781 | EXPORT_SYMBOL(vprintk); |
782 | 782 | ||
783 | #else | 783 | #else |
784 | 784 | ||
785 | asmlinkage long sys_syslog(int type, char __user *buf, int len) | 785 | asmlinkage long sys_syslog(int type, char __user *buf, int len) |
786 | { | 786 | { |
787 | return -ENOSYS; | 787 | return -ENOSYS; |
788 | } | 788 | } |
789 | 789 | ||
790 | static void call_console_drivers(unsigned start, unsigned end) | 790 | static void call_console_drivers(unsigned start, unsigned end) |
791 | { | 791 | { |
792 | } | 792 | } |
793 | 793 | ||
794 | #endif | 794 | #endif |
795 | 795 | ||
796 | static int __add_preferred_console(char *name, int idx, char *options, | 796 | static int __add_preferred_console(char *name, int idx, char *options, |
797 | char *brl_options) | 797 | char *brl_options) |
798 | { | 798 | { |
799 | struct console_cmdline *c; | 799 | struct console_cmdline *c; |
800 | int i; | 800 | int i; |
801 | 801 | ||
802 | /* | 802 | /* |
803 | * See if this tty is not yet registered, and | 803 | * See if this tty is not yet registered, and |
804 | * if we have a slot free. | 804 | * if we have a slot free. |
805 | */ | 805 | */ |
806 | for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) | 806 | for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) |
807 | if (strcmp(console_cmdline[i].name, name) == 0 && | 807 | if (strcmp(console_cmdline[i].name, name) == 0 && |
808 | console_cmdline[i].index == idx) { | 808 | console_cmdline[i].index == idx) { |
809 | if (!brl_options) | 809 | if (!brl_options) |
810 | selected_console = i; | 810 | selected_console = i; |
811 | return 0; | 811 | return 0; |
812 | } | 812 | } |
813 | if (i == MAX_CMDLINECONSOLES) | 813 | if (i == MAX_CMDLINECONSOLES) |
814 | return -E2BIG; | 814 | return -E2BIG; |
815 | if (!brl_options) | 815 | if (!brl_options) |
816 | selected_console = i; | 816 | selected_console = i; |
817 | c = &console_cmdline[i]; | 817 | c = &console_cmdline[i]; |
818 | strlcpy(c->name, name, sizeof(c->name)); | 818 | strlcpy(c->name, name, sizeof(c->name)); |
819 | c->options = options; | 819 | c->options = options; |
820 | #ifdef CONFIG_A11Y_BRAILLE_CONSOLE | 820 | #ifdef CONFIG_A11Y_BRAILLE_CONSOLE |
821 | c->brl_options = brl_options; | 821 | c->brl_options = brl_options; |
822 | #endif | 822 | #endif |
823 | c->index = idx; | 823 | c->index = idx; |
824 | return 0; | 824 | return 0; |
825 | } | 825 | } |
826 | /* | 826 | /* |
827 | * Set up a list of consoles. Called from init/main.c | 827 | * Set up a list of consoles. Called from init/main.c |
828 | */ | 828 | */ |
829 | static int __init console_setup(char *str) | 829 | static int __init console_setup(char *str) |
830 | { | 830 | { |
831 | char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for index */ | 831 | char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for index */ |
832 | char *s, *options, *brl_options = NULL; | 832 | char *s, *options, *brl_options = NULL; |
833 | int idx; | 833 | int idx; |
834 | 834 | ||
835 | #ifdef CONFIG_A11Y_BRAILLE_CONSOLE | 835 | #ifdef CONFIG_A11Y_BRAILLE_CONSOLE |
836 | if (!memcmp(str, "brl,", 4)) { | 836 | if (!memcmp(str, "brl,", 4)) { |
837 | brl_options = ""; | 837 | brl_options = ""; |
838 | str += 4; | 838 | str += 4; |
839 | } else if (!memcmp(str, "brl=", 4)) { | 839 | } else if (!memcmp(str, "brl=", 4)) { |
840 | brl_options = str + 4; | 840 | brl_options = str + 4; |
841 | str = strchr(brl_options, ','); | 841 | str = strchr(brl_options, ','); |
842 | if (!str) { | 842 | if (!str) { |
843 | printk(KERN_ERR "need port name after brl=\n"); | 843 | printk(KERN_ERR "need port name after brl=\n"); |
844 | return 1; | 844 | return 1; |
845 | } | 845 | } |
846 | *(str++) = 0; | 846 | *(str++) = 0; |
847 | } | 847 | } |
848 | #endif | 848 | #endif |
849 | 849 | ||
850 | /* | 850 | /* |
851 | * Decode str into name, index, options. | 851 | * Decode str into name, index, options. |
852 | */ | 852 | */ |
853 | if (str[0] >= '0' && str[0] <= '9') { | 853 | if (str[0] >= '0' && str[0] <= '9') { |
854 | strcpy(buf, "ttyS"); | 854 | strcpy(buf, "ttyS"); |
855 | strncpy(buf + 4, str, sizeof(buf) - 5); | 855 | strncpy(buf + 4, str, sizeof(buf) - 5); |
856 | } else { | 856 | } else { |
857 | strncpy(buf, str, sizeof(buf) - 1); | 857 | strncpy(buf, str, sizeof(buf) - 1); |
858 | } | 858 | } |
859 | buf[sizeof(buf) - 1] = 0; | 859 | buf[sizeof(buf) - 1] = 0; |
860 | if ((options = strchr(str, ',')) != NULL) | 860 | if ((options = strchr(str, ',')) != NULL) |
861 | *(options++) = 0; | 861 | *(options++) = 0; |
862 | #ifdef __sparc__ | 862 | #ifdef __sparc__ |
863 | if (!strcmp(str, "ttya")) | 863 | if (!strcmp(str, "ttya")) |
864 | strcpy(buf, "ttyS0"); | 864 | strcpy(buf, "ttyS0"); |
865 | if (!strcmp(str, "ttyb")) | 865 | if (!strcmp(str, "ttyb")) |
866 | strcpy(buf, "ttyS1"); | 866 | strcpy(buf, "ttyS1"); |
867 | #endif | 867 | #endif |
868 | for (s = buf; *s; s++) | 868 | for (s = buf; *s; s++) |
869 | if ((*s >= '0' && *s <= '9') || *s == ',') | 869 | if ((*s >= '0' && *s <= '9') || *s == ',') |
870 | break; | 870 | break; |
871 | idx = simple_strtoul(s, NULL, 10); | 871 | idx = simple_strtoul(s, NULL, 10); |
872 | *s = 0; | 872 | *s = 0; |
873 | 873 | ||
874 | __add_preferred_console(buf, idx, options, brl_options); | 874 | __add_preferred_console(buf, idx, options, brl_options); |
875 | console_set_on_cmdline = 1; | 875 | console_set_on_cmdline = 1; |
876 | return 1; | 876 | return 1; |
877 | } | 877 | } |
878 | __setup("console=", console_setup); | 878 | __setup("console=", console_setup); |
879 | 879 | ||
880 | /** | 880 | /** |
881 | * add_preferred_console - add a device to the list of preferred consoles. | 881 | * add_preferred_console - add a device to the list of preferred consoles. |
882 | * @name: device name | 882 | * @name: device name |
883 | * @idx: device index | 883 | * @idx: device index |
884 | * @options: options for this console | 884 | * @options: options for this console |
885 | * | 885 | * |
886 | * The last preferred console added will be used for kernel messages | 886 | * The last preferred console added will be used for kernel messages |
887 | * and stdin/out/err for init. Normally this is used by console_setup | 887 | * and stdin/out/err for init. Normally this is used by console_setup |
888 | * above to handle user-supplied console arguments; however it can also | 888 | * above to handle user-supplied console arguments; however it can also |
889 | * be used by arch-specific code either to override the user or more | 889 | * be used by arch-specific code either to override the user or more |
890 | * commonly to provide a default console (ie from PROM variables) when | 890 | * commonly to provide a default console (ie from PROM variables) when |
891 | * the user has not supplied one. | 891 | * the user has not supplied one. |
892 | */ | 892 | */ |
893 | int add_preferred_console(char *name, int idx, char *options) | 893 | int add_preferred_console(char *name, int idx, char *options) |
894 | { | 894 | { |
895 | return __add_preferred_console(name, idx, options, NULL); | 895 | return __add_preferred_console(name, idx, options, NULL); |
896 | } | 896 | } |
897 | 897 | ||
898 | int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options) | 898 | int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options) |
899 | { | 899 | { |
900 | struct console_cmdline *c; | 900 | struct console_cmdline *c; |
901 | int i; | 901 | int i; |
902 | 902 | ||
903 | for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) | 903 | for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) |
904 | if (strcmp(console_cmdline[i].name, name) == 0 && | 904 | if (strcmp(console_cmdline[i].name, name) == 0 && |
905 | console_cmdline[i].index == idx) { | 905 | console_cmdline[i].index == idx) { |
906 | c = &console_cmdline[i]; | 906 | c = &console_cmdline[i]; |
907 | strlcpy(c->name, name_new, sizeof(c->name)); | 907 | strlcpy(c->name, name_new, sizeof(c->name)); |
908 | c->name[sizeof(c->name) - 1] = 0; | 908 | c->name[sizeof(c->name) - 1] = 0; |
909 | c->options = options; | 909 | c->options = options; |
910 | c->index = idx_new; | 910 | c->index = idx_new; |
911 | return i; | 911 | return i; |
912 | } | 912 | } |
913 | /* not found */ | 913 | /* not found */ |
914 | return -1; | 914 | return -1; |
915 | } | 915 | } |
916 | 916 | ||
917 | int console_suspend_enabled = 1; | 917 | int console_suspend_enabled = 1; |
918 | EXPORT_SYMBOL(console_suspend_enabled); | 918 | EXPORT_SYMBOL(console_suspend_enabled); |
919 | 919 | ||
920 | static int __init console_suspend_disable(char *str) | 920 | static int __init console_suspend_disable(char *str) |
921 | { | 921 | { |
922 | console_suspend_enabled = 0; | 922 | console_suspend_enabled = 0; |
923 | return 1; | 923 | return 1; |
924 | } | 924 | } |
925 | __setup("no_console_suspend", console_suspend_disable); | 925 | __setup("no_console_suspend", console_suspend_disable); |
926 | 926 | ||
927 | /** | 927 | /** |
928 | * suspend_console - suspend the console subsystem | 928 | * suspend_console - suspend the console subsystem |
929 | * | 929 | * |
930 | * This disables printk() while we go into suspend states | 930 | * This disables printk() while we go into suspend states |
931 | */ | 931 | */ |
932 | void suspend_console(void) | 932 | void suspend_console(void) |
933 | { | 933 | { |
934 | if (!console_suspend_enabled) | 934 | if (!console_suspend_enabled) |
935 | return; | 935 | return; |
936 | printk("Suspending console(s) (use no_console_suspend to debug)\n"); | 936 | printk("Suspending console(s) (use no_console_suspend to debug)\n"); |
937 | acquire_console_sem(); | 937 | acquire_console_sem(); |
938 | console_suspended = 1; | 938 | console_suspended = 1; |
939 | } | 939 | } |
940 | 940 | ||
941 | void resume_console(void) | 941 | void resume_console(void) |
942 | { | 942 | { |
943 | if (!console_suspend_enabled) | 943 | if (!console_suspend_enabled) |
944 | return; | 944 | return; |
945 | console_suspended = 0; | 945 | console_suspended = 0; |
946 | release_console_sem(); | 946 | release_console_sem(); |
947 | } | 947 | } |
948 | 948 | ||
949 | /** | 949 | /** |
950 | * acquire_console_sem - lock the console system for exclusive use. | 950 | * acquire_console_sem - lock the console system for exclusive use. |
951 | * | 951 | * |
952 | * Acquires a semaphore which guarantees that the caller has | 952 | * Acquires a semaphore which guarantees that the caller has |
953 | * exclusive access to the console system and the console_drivers list. | 953 | * exclusive access to the console system and the console_drivers list. |
954 | * | 954 | * |
955 | * Can sleep, returns nothing. | 955 | * Can sleep, returns nothing. |
956 | */ | 956 | */ |
957 | void acquire_console_sem(void) | 957 | void acquire_console_sem(void) |
958 | { | 958 | { |
959 | BUG_ON(in_interrupt()); | 959 | BUG_ON(in_interrupt()); |
960 | if (console_suspended) { | 960 | if (console_suspended) { |
961 | down(&secondary_console_sem); | 961 | down(&secondary_console_sem); |
962 | return; | 962 | return; |
963 | } | 963 | } |
964 | down(&console_sem); | 964 | down(&console_sem); |
965 | console_locked = 1; | 965 | console_locked = 1; |
966 | console_may_schedule = 1; | 966 | console_may_schedule = 1; |
967 | } | 967 | } |
968 | EXPORT_SYMBOL(acquire_console_sem); | 968 | EXPORT_SYMBOL(acquire_console_sem); |
969 | 969 | ||
970 | int try_acquire_console_sem(void) | 970 | int try_acquire_console_sem(void) |
971 | { | 971 | { |
972 | if (down_trylock(&console_sem)) | 972 | if (down_trylock(&console_sem)) |
973 | return -1; | 973 | return -1; |
974 | console_locked = 1; | 974 | console_locked = 1; |
975 | console_may_schedule = 0; | 975 | console_may_schedule = 0; |
976 | return 0; | 976 | return 0; |
977 | } | 977 | } |
978 | EXPORT_SYMBOL(try_acquire_console_sem); | 978 | EXPORT_SYMBOL(try_acquire_console_sem); |
979 | 979 | ||
980 | int is_console_locked(void) | 980 | int is_console_locked(void) |
981 | { | 981 | { |
982 | return console_locked; | 982 | return console_locked; |
983 | } | 983 | } |
984 | 984 | ||
985 | void wake_up_klogd(void) | 985 | static DEFINE_PER_CPU(int, printk_pending); |
986 | |||
987 | void printk_tick(void) | ||
986 | { | 988 | { |
987 | if (!oops_in_progress && waitqueue_active(&log_wait)) | 989 | if (__get_cpu_var(printk_pending)) { |
990 | __get_cpu_var(printk_pending) = 0; | ||
988 | wake_up_interruptible(&log_wait); | 991 | wake_up_interruptible(&log_wait); |
992 | } | ||
993 | } | ||
994 | |||
995 | int printk_needs_cpu(int cpu) | ||
996 | { | ||
997 | return per_cpu(printk_pending, cpu); | ||
998 | } | ||
999 | |||
1000 | void wake_up_klogd(void) | ||
1001 | { | ||
1002 | if (waitqueue_active(&log_wait)) | ||
1003 | __get_cpu_var(printk_pending) = 1; | ||
989 | } | 1004 | } |
990 | 1005 | ||
991 | /** | 1006 | /** |
992 | * release_console_sem - unlock the console system | 1007 | * release_console_sem - unlock the console system |
993 | * | 1008 | * |
994 | * Releases the semaphore which the caller holds on the console system | 1009 | * Releases the semaphore which the caller holds on the console system |
995 | * and the console driver list. | 1010 | * and the console driver list. |
996 | * | 1011 | * |
997 | * While the semaphore was held, console output may have been buffered | 1012 | * While the semaphore was held, console output may have been buffered |
998 | * by printk(). If this is the case, release_console_sem() emits | 1013 | * by printk(). If this is the case, release_console_sem() emits |
999 | * the output prior to releasing the semaphore. | 1014 | * the output prior to releasing the semaphore. |
1000 | * | 1015 | * |
1001 | * If there is output waiting for klogd, we wake it up. | 1016 | * If there is output waiting for klogd, we wake it up. |
1002 | * | 1017 | * |
1003 | * release_console_sem() may be called from any context. | 1018 | * release_console_sem() may be called from any context. |
1004 | */ | 1019 | */ |
1005 | void release_console_sem(void) | 1020 | void release_console_sem(void) |
1006 | { | 1021 | { |
1007 | unsigned long flags; | 1022 | unsigned long flags; |
1008 | unsigned _con_start, _log_end; | 1023 | unsigned _con_start, _log_end; |
1009 | unsigned wake_klogd = 0; | 1024 | unsigned wake_klogd = 0; |
1010 | 1025 | ||
1011 | if (console_suspended) { | 1026 | if (console_suspended) { |
1012 | up(&secondary_console_sem); | 1027 | up(&secondary_console_sem); |
1013 | return; | 1028 | return; |
1014 | } | 1029 | } |
1015 | 1030 | ||
1016 | console_may_schedule = 0; | 1031 | console_may_schedule = 0; |
1017 | 1032 | ||
1018 | for ( ; ; ) { | 1033 | for ( ; ; ) { |
1019 | spin_lock_irqsave(&logbuf_lock, flags); | 1034 | spin_lock_irqsave(&logbuf_lock, flags); |
1020 | wake_klogd |= log_start - log_end; | 1035 | wake_klogd |= log_start - log_end; |
1021 | if (con_start == log_end) | 1036 | if (con_start == log_end) |
1022 | break; /* Nothing to print */ | 1037 | break; /* Nothing to print */ |
1023 | _con_start = con_start; | 1038 | _con_start = con_start; |
1024 | _log_end = log_end; | 1039 | _log_end = log_end; |
1025 | con_start = log_end; /* Flush */ | 1040 | con_start = log_end; /* Flush */ |
1026 | spin_unlock(&logbuf_lock); | 1041 | spin_unlock(&logbuf_lock); |
1027 | stop_critical_timings(); /* don't trace print latency */ | 1042 | stop_critical_timings(); /* don't trace print latency */ |
1028 | call_console_drivers(_con_start, _log_end); | 1043 | call_console_drivers(_con_start, _log_end); |
1029 | start_critical_timings(); | 1044 | start_critical_timings(); |
1030 | local_irq_restore(flags); | 1045 | local_irq_restore(flags); |
1031 | } | 1046 | } |
1032 | console_locked = 0; | 1047 | console_locked = 0; |
1033 | up(&console_sem); | 1048 | up(&console_sem); |
1034 | spin_unlock_irqrestore(&logbuf_lock, flags); | 1049 | spin_unlock_irqrestore(&logbuf_lock, flags); |
1035 | if (wake_klogd) | 1050 | if (wake_klogd) |
1036 | wake_up_klogd(); | 1051 | wake_up_klogd(); |
1037 | } | 1052 | } |
1038 | EXPORT_SYMBOL(release_console_sem); | 1053 | EXPORT_SYMBOL(release_console_sem); |
1039 | 1054 | ||
1040 | /** | 1055 | /** |
1041 | * console_conditional_schedule - yield the CPU if required | 1056 | * console_conditional_schedule - yield the CPU if required |
1042 | * | 1057 | * |
1043 | * If the console code is currently allowed to sleep, and | 1058 | * If the console code is currently allowed to sleep, and |
1044 | * if this CPU should yield the CPU to another task, do | 1059 | * if this CPU should yield the CPU to another task, do |
1045 | * so here. | 1060 | * so here. |
1046 | * | 1061 | * |
1047 | * Must be called within acquire_console_sem(). | 1062 | * Must be called within acquire_console_sem(). |
1048 | */ | 1063 | */ |
1049 | void __sched console_conditional_schedule(void) | 1064 | void __sched console_conditional_schedule(void) |
1050 | { | 1065 | { |
1051 | if (console_may_schedule) | 1066 | if (console_may_schedule) |
1052 | cond_resched(); | 1067 | cond_resched(); |
1053 | } | 1068 | } |
1054 | EXPORT_SYMBOL(console_conditional_schedule); | 1069 | EXPORT_SYMBOL(console_conditional_schedule); |
1055 | 1070 | ||
1056 | void console_print(const char *s) | 1071 | void console_print(const char *s) |
1057 | { | 1072 | { |
1058 | printk(KERN_EMERG "%s", s); | 1073 | printk(KERN_EMERG "%s", s); |
1059 | } | 1074 | } |
1060 | EXPORT_SYMBOL(console_print); | 1075 | EXPORT_SYMBOL(console_print); |
1061 | 1076 | ||
1062 | void console_unblank(void) | 1077 | void console_unblank(void) |
1063 | { | 1078 | { |
1064 | struct console *c; | 1079 | struct console *c; |
1065 | 1080 | ||
1066 | /* | 1081 | /* |
1067 | * console_unblank can no longer be called in interrupt context unless | 1082 | * console_unblank can no longer be called in interrupt context unless |
1068 | * oops_in_progress is set to 1.. | 1083 | * oops_in_progress is set to 1.. |
1069 | */ | 1084 | */ |
1070 | if (oops_in_progress) { | 1085 | if (oops_in_progress) { |
1071 | if (down_trylock(&console_sem) != 0) | 1086 | if (down_trylock(&console_sem) != 0) |
1072 | return; | 1087 | return; |
1073 | } else | 1088 | } else |
1074 | acquire_console_sem(); | 1089 | acquire_console_sem(); |
1075 | 1090 | ||
1076 | console_locked = 1; | 1091 | console_locked = 1; |
1077 | console_may_schedule = 0; | 1092 | console_may_schedule = 0; |
1078 | for (c = console_drivers; c != NULL; c = c->next) | 1093 | for (c = console_drivers; c != NULL; c = c->next) |
1079 | if ((c->flags & CON_ENABLED) && c->unblank) | 1094 | if ((c->flags & CON_ENABLED) && c->unblank) |
1080 | c->unblank(); | 1095 | c->unblank(); |
1081 | release_console_sem(); | 1096 | release_console_sem(); |
1082 | } | 1097 | } |
1083 | 1098 | ||
1084 | /* | 1099 | /* |
1085 | * Return the console tty driver structure and its associated index | 1100 | * Return the console tty driver structure and its associated index |
1086 | */ | 1101 | */ |
1087 | struct tty_driver *console_device(int *index) | 1102 | struct tty_driver *console_device(int *index) |
1088 | { | 1103 | { |
1089 | struct console *c; | 1104 | struct console *c; |
1090 | struct tty_driver *driver = NULL; | 1105 | struct tty_driver *driver = NULL; |
1091 | 1106 | ||
1092 | acquire_console_sem(); | 1107 | acquire_console_sem(); |
1093 | for (c = console_drivers; c != NULL; c = c->next) { | 1108 | for (c = console_drivers; c != NULL; c = c->next) { |
1094 | if (!c->device) | 1109 | if (!c->device) |
1095 | continue; | 1110 | continue; |
1096 | driver = c->device(c, index); | 1111 | driver = c->device(c, index); |
1097 | if (driver) | 1112 | if (driver) |
1098 | break; | 1113 | break; |
1099 | } | 1114 | } |
1100 | release_console_sem(); | 1115 | release_console_sem(); |
1101 | return driver; | 1116 | return driver; |
1102 | } | 1117 | } |
1103 | 1118 | ||
1104 | /* | 1119 | /* |
1105 | * Prevent further output on the passed console device so that (for example) | 1120 | * Prevent further output on the passed console device so that (for example) |
1106 | * serial drivers can disable console output before suspending a port, and can | 1121 | * serial drivers can disable console output before suspending a port, and can |
1107 | * re-enable output afterwards. | 1122 | * re-enable output afterwards. |
1108 | */ | 1123 | */ |
1109 | void console_stop(struct console *console) | 1124 | void console_stop(struct console *console) |
1110 | { | 1125 | { |
1111 | acquire_console_sem(); | 1126 | acquire_console_sem(); |
1112 | console->flags &= ~CON_ENABLED; | 1127 | console->flags &= ~CON_ENABLED; |
1113 | release_console_sem(); | 1128 | release_console_sem(); |
1114 | } | 1129 | } |
1115 | EXPORT_SYMBOL(console_stop); | 1130 | EXPORT_SYMBOL(console_stop); |
1116 | 1131 | ||
1117 | void console_start(struct console *console) | 1132 | void console_start(struct console *console) |
1118 | { | 1133 | { |
1119 | acquire_console_sem(); | 1134 | acquire_console_sem(); |
1120 | console->flags |= CON_ENABLED; | 1135 | console->flags |= CON_ENABLED; |
1121 | release_console_sem(); | 1136 | release_console_sem(); |
1122 | } | 1137 | } |
1123 | EXPORT_SYMBOL(console_start); | 1138 | EXPORT_SYMBOL(console_start); |
1124 | 1139 | ||
1125 | /* | 1140 | /* |
1126 | * The console driver calls this routine during kernel initialization | 1141 | * The console driver calls this routine during kernel initialization |
1127 | * to register the console printing procedure with printk() and to | 1142 | * to register the console printing procedure with printk() and to |
1128 | * print any messages that were printed by the kernel before the | 1143 | * print any messages that were printed by the kernel before the |
1129 | * console driver was initialized. | 1144 | * console driver was initialized. |
1130 | */ | 1145 | */ |
1131 | void register_console(struct console *console) | 1146 | void register_console(struct console *console) |
1132 | { | 1147 | { |
1133 | int i; | 1148 | int i; |
1134 | unsigned long flags; | 1149 | unsigned long flags; |
1135 | struct console *bootconsole = NULL; | 1150 | struct console *bootconsole = NULL; |
1136 | 1151 | ||
1137 | if (console_drivers) { | 1152 | if (console_drivers) { |
1138 | if (console->flags & CON_BOOT) | 1153 | if (console->flags & CON_BOOT) |
1139 | return; | 1154 | return; |
1140 | if (console_drivers->flags & CON_BOOT) | 1155 | if (console_drivers->flags & CON_BOOT) |
1141 | bootconsole = console_drivers; | 1156 | bootconsole = console_drivers; |
1142 | } | 1157 | } |
1143 | 1158 | ||
1144 | if (preferred_console < 0 || bootconsole || !console_drivers) | 1159 | if (preferred_console < 0 || bootconsole || !console_drivers) |
1145 | preferred_console = selected_console; | 1160 | preferred_console = selected_console; |
1146 | 1161 | ||
1147 | if (console->early_setup) | 1162 | if (console->early_setup) |
1148 | console->early_setup(); | 1163 | console->early_setup(); |
1149 | 1164 | ||
1150 | /* | 1165 | /* |
1151 | * See if we want to use this console driver. If we | 1166 | * See if we want to use this console driver. If we |
1152 | * didn't select a console we take the first one | 1167 | * didn't select a console we take the first one |
1153 | * that registers here. | 1168 | * that registers here. |
1154 | */ | 1169 | */ |
1155 | if (preferred_console < 0) { | 1170 | if (preferred_console < 0) { |
1156 | if (console->index < 0) | 1171 | if (console->index < 0) |
1157 | console->index = 0; | 1172 | console->index = 0; |
1158 | if (console->setup == NULL || | 1173 | if (console->setup == NULL || |
1159 | console->setup(console, NULL) == 0) { | 1174 | console->setup(console, NULL) == 0) { |
1160 | console->flags |= CON_ENABLED; | 1175 | console->flags |= CON_ENABLED; |
1161 | if (console->device) { | 1176 | if (console->device) { |
1162 | console->flags |= CON_CONSDEV; | 1177 | console->flags |= CON_CONSDEV; |
1163 | preferred_console = 0; | 1178 | preferred_console = 0; |
1164 | } | 1179 | } |
1165 | } | 1180 | } |
1166 | } | 1181 | } |
1167 | 1182 | ||
1168 | /* | 1183 | /* |
1169 | * See if this console matches one we selected on | 1184 | * See if this console matches one we selected on |
1170 | * the command line. | 1185 | * the command line. |
1171 | */ | 1186 | */ |
1172 | for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; | 1187 | for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; |
1173 | i++) { | 1188 | i++) { |
1174 | if (strcmp(console_cmdline[i].name, console->name) != 0) | 1189 | if (strcmp(console_cmdline[i].name, console->name) != 0) |
1175 | continue; | 1190 | continue; |
1176 | if (console->index >= 0 && | 1191 | if (console->index >= 0 && |
1177 | console->index != console_cmdline[i].index) | 1192 | console->index != console_cmdline[i].index) |
1178 | continue; | 1193 | continue; |
1179 | if (console->index < 0) | 1194 | if (console->index < 0) |
1180 | console->index = console_cmdline[i].index; | 1195 | console->index = console_cmdline[i].index; |
1181 | #ifdef CONFIG_A11Y_BRAILLE_CONSOLE | 1196 | #ifdef CONFIG_A11Y_BRAILLE_CONSOLE |
1182 | if (console_cmdline[i].brl_options) { | 1197 | if (console_cmdline[i].brl_options) { |
1183 | console->flags |= CON_BRL; | 1198 | console->flags |= CON_BRL; |
1184 | braille_register_console(console, | 1199 | braille_register_console(console, |
1185 | console_cmdline[i].index, | 1200 | console_cmdline[i].index, |
1186 | console_cmdline[i].options, | 1201 | console_cmdline[i].options, |
1187 | console_cmdline[i].brl_options); | 1202 | console_cmdline[i].brl_options); |
1188 | return; | 1203 | return; |
1189 | } | 1204 | } |
1190 | #endif | 1205 | #endif |
1191 | if (console->setup && | 1206 | if (console->setup && |
1192 | console->setup(console, console_cmdline[i].options) != 0) | 1207 | console->setup(console, console_cmdline[i].options) != 0) |
1193 | break; | 1208 | break; |
1194 | console->flags |= CON_ENABLED; | 1209 | console->flags |= CON_ENABLED; |
1195 | console->index = console_cmdline[i].index; | 1210 | console->index = console_cmdline[i].index; |
1196 | if (i == selected_console) { | 1211 | if (i == selected_console) { |
1197 | console->flags |= CON_CONSDEV; | 1212 | console->flags |= CON_CONSDEV; |
1198 | preferred_console = selected_console; | 1213 | preferred_console = selected_console; |
1199 | } | 1214 | } |
1200 | break; | 1215 | break; |
1201 | } | 1216 | } |
1202 | 1217 | ||
1203 | if (!(console->flags & CON_ENABLED)) | 1218 | if (!(console->flags & CON_ENABLED)) |
1204 | return; | 1219 | return; |
1205 | 1220 | ||
1206 | if (bootconsole && (console->flags & CON_CONSDEV)) { | 1221 | if (bootconsole && (console->flags & CON_CONSDEV)) { |
1207 | printk(KERN_INFO "console handover: boot [%s%d] -> real [%s%d]\n", | 1222 | printk(KERN_INFO "console handover: boot [%s%d] -> real [%s%d]\n", |
1208 | bootconsole->name, bootconsole->index, | 1223 | bootconsole->name, bootconsole->index, |
1209 | console->name, console->index); | 1224 | console->name, console->index); |
1210 | unregister_console(bootconsole); | 1225 | unregister_console(bootconsole); |
1211 | console->flags &= ~CON_PRINTBUFFER; | 1226 | console->flags &= ~CON_PRINTBUFFER; |
1212 | } else { | 1227 | } else { |
1213 | printk(KERN_INFO "console [%s%d] enabled\n", | 1228 | printk(KERN_INFO "console [%s%d] enabled\n", |
1214 | console->name, console->index); | 1229 | console->name, console->index); |
1215 | } | 1230 | } |
1216 | 1231 | ||
1217 | /* | 1232 | /* |
1218 | * Put this console in the list - keep the | 1233 | * Put this console in the list - keep the |
1219 | * preferred driver at the head of the list. | 1234 | * preferred driver at the head of the list. |
1220 | */ | 1235 | */ |
1221 | acquire_console_sem(); | 1236 | acquire_console_sem(); |
1222 | if ((console->flags & CON_CONSDEV) || console_drivers == NULL) { | 1237 | if ((console->flags & CON_CONSDEV) || console_drivers == NULL) { |
1223 | console->next = console_drivers; | 1238 | console->next = console_drivers; |
1224 | console_drivers = console; | 1239 | console_drivers = console; |
1225 | if (console->next) | 1240 | if (console->next) |
1226 | console->next->flags &= ~CON_CONSDEV; | 1241 | console->next->flags &= ~CON_CONSDEV; |
1227 | } else { | 1242 | } else { |
1228 | console->next = console_drivers->next; | 1243 | console->next = console_drivers->next; |
1229 | console_drivers->next = console; | 1244 | console_drivers->next = console; |
1230 | } | 1245 | } |
1231 | if (console->flags & CON_PRINTBUFFER) { | 1246 | if (console->flags & CON_PRINTBUFFER) { |
1232 | /* | 1247 | /* |
1233 | * release_console_sem() will print out the buffered messages | 1248 | * release_console_sem() will print out the buffered messages |
1234 | * for us. | 1249 | * for us. |
1235 | */ | 1250 | */ |
1236 | spin_lock_irqsave(&logbuf_lock, flags); | 1251 | spin_lock_irqsave(&logbuf_lock, flags); |
1237 | con_start = log_start; | 1252 | con_start = log_start; |
1238 | spin_unlock_irqrestore(&logbuf_lock, flags); | 1253 | spin_unlock_irqrestore(&logbuf_lock, flags); |
1239 | } | 1254 | } |
1240 | release_console_sem(); | 1255 | release_console_sem(); |
1241 | } | 1256 | } |
1242 | EXPORT_SYMBOL(register_console); | 1257 | EXPORT_SYMBOL(register_console); |
1243 | 1258 | ||
1244 | int unregister_console(struct console *console) | 1259 | int unregister_console(struct console *console) |
1245 | { | 1260 | { |
1246 | struct console *a, *b; | 1261 | struct console *a, *b; |
1247 | int res = 1; | 1262 | int res = 1; |
1248 | 1263 | ||
1249 | #ifdef CONFIG_A11Y_BRAILLE_CONSOLE | 1264 | #ifdef CONFIG_A11Y_BRAILLE_CONSOLE |
1250 | if (console->flags & CON_BRL) | 1265 | if (console->flags & CON_BRL) |
1251 | return braille_unregister_console(console); | 1266 | return braille_unregister_console(console); |
1252 | #endif | 1267 | #endif |
1253 | 1268 | ||
1254 | acquire_console_sem(); | 1269 | acquire_console_sem(); |
1255 | if (console_drivers == console) { | 1270 | if (console_drivers == console) { |
1256 | console_drivers=console->next; | 1271 | console_drivers=console->next; |
1257 | res = 0; | 1272 | res = 0; |
1258 | } else if (console_drivers) { | 1273 | } else if (console_drivers) { |
1259 | for (a=console_drivers->next, b=console_drivers ; | 1274 | for (a=console_drivers->next, b=console_drivers ; |
1260 | a; b=a, a=b->next) { | 1275 | a; b=a, a=b->next) { |
1261 | if (a == console) { | 1276 | if (a == console) { |
1262 | b->next = a->next; | 1277 | b->next = a->next; |
1263 | res = 0; | 1278 | res = 0; |
1264 | break; | 1279 | break; |
1265 | } | 1280 | } |
1266 | } | 1281 | } |
1267 | } | 1282 | } |
1268 | 1283 | ||
1269 | /* | 1284 | /* |
1270 | * If this isn't the last console and it has CON_CONSDEV set, we | 1285 | * If this isn't the last console and it has CON_CONSDEV set, we |
1271 | * need to set it on the next preferred console. | 1286 | * need to set it on the next preferred console. |
1272 | */ | 1287 | */ |
1273 | if (console_drivers != NULL && console->flags & CON_CONSDEV) | 1288 | if (console_drivers != NULL && console->flags & CON_CONSDEV) |
1274 | console_drivers->flags |= CON_CONSDEV; | 1289 | console_drivers->flags |= CON_CONSDEV; |
1275 | 1290 | ||
1276 | release_console_sem(); | 1291 | release_console_sem(); |
1277 | return res; | 1292 | return res; |
1278 | } | 1293 | } |
1279 | EXPORT_SYMBOL(unregister_console); | 1294 | EXPORT_SYMBOL(unregister_console); |
1280 | 1295 | ||
1281 | static int __init disable_boot_consoles(void) | 1296 | static int __init disable_boot_consoles(void) |
1282 | { | 1297 | { |
1283 | if (console_drivers != NULL) { | 1298 | if (console_drivers != NULL) { |
1284 | if (console_drivers->flags & CON_BOOT) { | 1299 | if (console_drivers->flags & CON_BOOT) { |
1285 | printk(KERN_INFO "turn off boot console %s%d\n", | 1300 | printk(KERN_INFO "turn off boot console %s%d\n", |
1286 | console_drivers->name, console_drivers->index); | 1301 | console_drivers->name, console_drivers->index); |
1287 | return unregister_console(console_drivers); | 1302 | return unregister_console(console_drivers); |
1288 | } | 1303 | } |
1289 | } | 1304 | } |
1290 | return 0; | 1305 | return 0; |
1291 | } | 1306 | } |
1292 | late_initcall(disable_boot_consoles); | 1307 | late_initcall(disable_boot_consoles); |
1293 | 1308 | ||
1294 | /** | 1309 | /** |
1295 | * tty_write_message - write a message to a certain tty, not just the console. | 1310 | * tty_write_message - write a message to a certain tty, not just the console. |
1296 | * @tty: the destination tty_struct | 1311 | * @tty: the destination tty_struct |
1297 | * @msg: the message to write | 1312 | * @msg: the message to write |
1298 | * | 1313 | * |
1299 | * This is used for messages that need to be redirected to a specific tty. | 1314 | * This is used for messages that need to be redirected to a specific tty. |
1300 | * We don't put it into the syslog queue right now maybe in the future if | 1315 | * We don't put it into the syslog queue right now maybe in the future if |
1301 | * really needed. | 1316 | * really needed. |
1302 | */ | 1317 | */ |
1303 | void tty_write_message(struct tty_struct *tty, char *msg) | 1318 | void tty_write_message(struct tty_struct *tty, char *msg) |
1304 | { | 1319 | { |
1305 | if (tty && tty->ops->write) | 1320 | if (tty && tty->ops->write) |
1306 | tty->ops->write(tty, msg, strlen(msg)); | 1321 | tty->ops->write(tty, msg, strlen(msg)); |
1307 | return; | 1322 | return; |
1308 | } | 1323 | } |
1309 | 1324 | ||
1310 | #if defined CONFIG_PRINTK | 1325 | #if defined CONFIG_PRINTK |
1311 | 1326 | ||
1312 | /* | 1327 | /* |
1313 | * printk rate limiting, lifted from the networking subsystem. | 1328 | * printk rate limiting, lifted from the networking subsystem. |
1314 | * | 1329 | * |
1315 | * This enforces a rate limit: not more than 10 kernel messages | 1330 | * This enforces a rate limit: not more than 10 kernel messages |
1316 | * every 5s to make a denial-of-service attack impossible. | 1331 | * every 5s to make a denial-of-service attack impossible. |
1317 | */ | 1332 | */ |
1318 | DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10); | 1333 | DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10); |
1319 | 1334 | ||
1320 | int printk_ratelimit(void) | 1335 | int printk_ratelimit(void) |
1321 | { | 1336 | { |
1322 | return __ratelimit(&printk_ratelimit_state); | 1337 | return __ratelimit(&printk_ratelimit_state); |
1323 | } | 1338 | } |
1324 | EXPORT_SYMBOL(printk_ratelimit); | 1339 | EXPORT_SYMBOL(printk_ratelimit); |
1325 | 1340 | ||
1326 | /** | 1341 | /** |
1327 | * printk_timed_ratelimit - caller-controlled printk ratelimiting | 1342 | * printk_timed_ratelimit - caller-controlled printk ratelimiting |
1328 | * @caller_jiffies: pointer to caller's state | 1343 | * @caller_jiffies: pointer to caller's state |
1329 | * @interval_msecs: minimum interval between prints | 1344 | * @interval_msecs: minimum interval between prints |
1330 | * | 1345 | * |
1331 | * printk_timed_ratelimit() returns true if more than @interval_msecs | 1346 | * printk_timed_ratelimit() returns true if more than @interval_msecs |
1332 | * milliseconds have elapsed since the last time printk_timed_ratelimit() | 1347 | * milliseconds have elapsed since the last time printk_timed_ratelimit() |
1333 | * returned true. | 1348 | * returned true. |
1334 | */ | 1349 | */ |
1335 | bool printk_timed_ratelimit(unsigned long *caller_jiffies, | 1350 | bool printk_timed_ratelimit(unsigned long *caller_jiffies, |
1336 | unsigned int interval_msecs) | 1351 | unsigned int interval_msecs) |
1337 | { | 1352 | { |
1338 | if (*caller_jiffies == 0 || time_after(jiffies, *caller_jiffies)) { | 1353 | if (*caller_jiffies == 0 || time_after(jiffies, *caller_jiffies)) { |
1339 | *caller_jiffies = jiffies + msecs_to_jiffies(interval_msecs); | 1354 | *caller_jiffies = jiffies + msecs_to_jiffies(interval_msecs); |
1340 | return true; | 1355 | return true; |
1341 | } | 1356 | } |
1342 | return false; | 1357 | return false; |
1343 | } | 1358 | } |
1344 | EXPORT_SYMBOL(printk_timed_ratelimit); | 1359 | EXPORT_SYMBOL(printk_timed_ratelimit); |
1345 | #endif | 1360 | #endif |
1346 | 1361 |
kernel/time/tick-sched.c
1 | /* | 1 | /* |
2 | * linux/kernel/time/tick-sched.c | 2 | * linux/kernel/time/tick-sched.c |
3 | * | 3 | * |
4 | * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> | 4 | * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> |
5 | * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar | 5 | * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar |
6 | * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner | 6 | * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner |
7 | * | 7 | * |
8 | * No idle tick implementation for low and high resolution timers | 8 | * No idle tick implementation for low and high resolution timers |
9 | * | 9 | * |
10 | * Started by: Thomas Gleixner and Ingo Molnar | 10 | * Started by: Thomas Gleixner and Ingo Molnar |
11 | * | 11 | * |
12 | * Distribute under GPLv2. | 12 | * Distribute under GPLv2. |
13 | */ | 13 | */ |
14 | #include <linux/cpu.h> | 14 | #include <linux/cpu.h> |
15 | #include <linux/err.h> | 15 | #include <linux/err.h> |
16 | #include <linux/hrtimer.h> | 16 | #include <linux/hrtimer.h> |
17 | #include <linux/interrupt.h> | 17 | #include <linux/interrupt.h> |
18 | #include <linux/kernel_stat.h> | 18 | #include <linux/kernel_stat.h> |
19 | #include <linux/percpu.h> | 19 | #include <linux/percpu.h> |
20 | #include <linux/profile.h> | 20 | #include <linux/profile.h> |
21 | #include <linux/sched.h> | 21 | #include <linux/sched.h> |
22 | #include <linux/tick.h> | 22 | #include <linux/tick.h> |
23 | 23 | ||
24 | #include <asm/irq_regs.h> | 24 | #include <asm/irq_regs.h> |
25 | 25 | ||
26 | #include "tick-internal.h" | 26 | #include "tick-internal.h" |
27 | 27 | ||
28 | /* | 28 | /* |
29 | * Per cpu nohz control structure | 29 | * Per cpu nohz control structure |
30 | */ | 30 | */ |
31 | static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); | 31 | static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); |
32 | 32 | ||
33 | /* | 33 | /* |
34 | * The time, when the last jiffy update happened. Protected by xtime_lock. | 34 | * The time, when the last jiffy update happened. Protected by xtime_lock. |
35 | */ | 35 | */ |
36 | static ktime_t last_jiffies_update; | 36 | static ktime_t last_jiffies_update; |
37 | 37 | ||
38 | struct tick_sched *tick_get_tick_sched(int cpu) | 38 | struct tick_sched *tick_get_tick_sched(int cpu) |
39 | { | 39 | { |
40 | return &per_cpu(tick_cpu_sched, cpu); | 40 | return &per_cpu(tick_cpu_sched, cpu); |
41 | } | 41 | } |
42 | 42 | ||
43 | /* | 43 | /* |
44 | * Must be called with interrupts disabled ! | 44 | * Must be called with interrupts disabled ! |
45 | */ | 45 | */ |
46 | static void tick_do_update_jiffies64(ktime_t now) | 46 | static void tick_do_update_jiffies64(ktime_t now) |
47 | { | 47 | { |
48 | unsigned long ticks = 0; | 48 | unsigned long ticks = 0; |
49 | ktime_t delta; | 49 | ktime_t delta; |
50 | 50 | ||
51 | /* | 51 | /* |
52 | * Do a quick check without holding xtime_lock: | 52 | * Do a quick check without holding xtime_lock: |
53 | */ | 53 | */ |
54 | delta = ktime_sub(now, last_jiffies_update); | 54 | delta = ktime_sub(now, last_jiffies_update); |
55 | if (delta.tv64 < tick_period.tv64) | 55 | if (delta.tv64 < tick_period.tv64) |
56 | return; | 56 | return; |
57 | 57 | ||
58 | /* Reevalute with xtime_lock held */ | 58 | /* Reevalute with xtime_lock held */ |
59 | write_seqlock(&xtime_lock); | 59 | write_seqlock(&xtime_lock); |
60 | 60 | ||
61 | delta = ktime_sub(now, last_jiffies_update); | 61 | delta = ktime_sub(now, last_jiffies_update); |
62 | if (delta.tv64 >= tick_period.tv64) { | 62 | if (delta.tv64 >= tick_period.tv64) { |
63 | 63 | ||
64 | delta = ktime_sub(delta, tick_period); | 64 | delta = ktime_sub(delta, tick_period); |
65 | last_jiffies_update = ktime_add(last_jiffies_update, | 65 | last_jiffies_update = ktime_add(last_jiffies_update, |
66 | tick_period); | 66 | tick_period); |
67 | 67 | ||
68 | /* Slow path for long timeouts */ | 68 | /* Slow path for long timeouts */ |
69 | if (unlikely(delta.tv64 >= tick_period.tv64)) { | 69 | if (unlikely(delta.tv64 >= tick_period.tv64)) { |
70 | s64 incr = ktime_to_ns(tick_period); | 70 | s64 incr = ktime_to_ns(tick_period); |
71 | 71 | ||
72 | ticks = ktime_divns(delta, incr); | 72 | ticks = ktime_divns(delta, incr); |
73 | 73 | ||
74 | last_jiffies_update = ktime_add_ns(last_jiffies_update, | 74 | last_jiffies_update = ktime_add_ns(last_jiffies_update, |
75 | incr * ticks); | 75 | incr * ticks); |
76 | } | 76 | } |
77 | do_timer(++ticks); | 77 | do_timer(++ticks); |
78 | } | 78 | } |
79 | write_sequnlock(&xtime_lock); | 79 | write_sequnlock(&xtime_lock); |
80 | } | 80 | } |
81 | 81 | ||
82 | /* | 82 | /* |
83 | * Initialize and return retrieve the jiffies update. | 83 | * Initialize and return retrieve the jiffies update. |
84 | */ | 84 | */ |
85 | static ktime_t tick_init_jiffy_update(void) | 85 | static ktime_t tick_init_jiffy_update(void) |
86 | { | 86 | { |
87 | ktime_t period; | 87 | ktime_t period; |
88 | 88 | ||
89 | write_seqlock(&xtime_lock); | 89 | write_seqlock(&xtime_lock); |
90 | /* Did we start the jiffies update yet ? */ | 90 | /* Did we start the jiffies update yet ? */ |
91 | if (last_jiffies_update.tv64 == 0) | 91 | if (last_jiffies_update.tv64 == 0) |
92 | last_jiffies_update = tick_next_period; | 92 | last_jiffies_update = tick_next_period; |
93 | period = last_jiffies_update; | 93 | period = last_jiffies_update; |
94 | write_sequnlock(&xtime_lock); | 94 | write_sequnlock(&xtime_lock); |
95 | return period; | 95 | return period; |
96 | } | 96 | } |
97 | 97 | ||
98 | /* | 98 | /* |
99 | * NOHZ - aka dynamic tick functionality | 99 | * NOHZ - aka dynamic tick functionality |
100 | */ | 100 | */ |
101 | #ifdef CONFIG_NO_HZ | 101 | #ifdef CONFIG_NO_HZ |
102 | /* | 102 | /* |
103 | * NO HZ enabled ? | 103 | * NO HZ enabled ? |
104 | */ | 104 | */ |
105 | static int tick_nohz_enabled __read_mostly = 1; | 105 | static int tick_nohz_enabled __read_mostly = 1; |
106 | 106 | ||
107 | /* | 107 | /* |
108 | * Enable / Disable tickless mode | 108 | * Enable / Disable tickless mode |
109 | */ | 109 | */ |
110 | static int __init setup_tick_nohz(char *str) | 110 | static int __init setup_tick_nohz(char *str) |
111 | { | 111 | { |
112 | if (!strcmp(str, "off")) | 112 | if (!strcmp(str, "off")) |
113 | tick_nohz_enabled = 0; | 113 | tick_nohz_enabled = 0; |
114 | else if (!strcmp(str, "on")) | 114 | else if (!strcmp(str, "on")) |
115 | tick_nohz_enabled = 1; | 115 | tick_nohz_enabled = 1; |
116 | else | 116 | else |
117 | return 0; | 117 | return 0; |
118 | return 1; | 118 | return 1; |
119 | } | 119 | } |
120 | 120 | ||
121 | __setup("nohz=", setup_tick_nohz); | 121 | __setup("nohz=", setup_tick_nohz); |
122 | 122 | ||
123 | /** | 123 | /** |
124 | * tick_nohz_update_jiffies - update jiffies when idle was interrupted | 124 | * tick_nohz_update_jiffies - update jiffies when idle was interrupted |
125 | * | 125 | * |
126 | * Called from interrupt entry when the CPU was idle | 126 | * Called from interrupt entry when the CPU was idle |
127 | * | 127 | * |
128 | * In case the sched_tick was stopped on this CPU, we have to check if jiffies | 128 | * In case the sched_tick was stopped on this CPU, we have to check if jiffies |
129 | * must be updated. Otherwise an interrupt handler could use a stale jiffy | 129 | * must be updated. Otherwise an interrupt handler could use a stale jiffy |
130 | * value. We do this unconditionally on any cpu, as we don't know whether the | 130 | * value. We do this unconditionally on any cpu, as we don't know whether the |
131 | * cpu, which has the update task assigned is in a long sleep. | 131 | * cpu, which has the update task assigned is in a long sleep. |
132 | */ | 132 | */ |
133 | void tick_nohz_update_jiffies(void) | 133 | void tick_nohz_update_jiffies(void) |
134 | { | 134 | { |
135 | int cpu = smp_processor_id(); | 135 | int cpu = smp_processor_id(); |
136 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 136 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
137 | unsigned long flags; | 137 | unsigned long flags; |
138 | ktime_t now; | 138 | ktime_t now; |
139 | 139 | ||
140 | if (!ts->tick_stopped) | 140 | if (!ts->tick_stopped) |
141 | return; | 141 | return; |
142 | 142 | ||
143 | cpu_clear(cpu, nohz_cpu_mask); | 143 | cpu_clear(cpu, nohz_cpu_mask); |
144 | now = ktime_get(); | 144 | now = ktime_get(); |
145 | ts->idle_waketime = now; | 145 | ts->idle_waketime = now; |
146 | 146 | ||
147 | local_irq_save(flags); | 147 | local_irq_save(flags); |
148 | tick_do_update_jiffies64(now); | 148 | tick_do_update_jiffies64(now); |
149 | local_irq_restore(flags); | 149 | local_irq_restore(flags); |
150 | 150 | ||
151 | touch_softlockup_watchdog(); | 151 | touch_softlockup_watchdog(); |
152 | } | 152 | } |
153 | 153 | ||
154 | void tick_nohz_stop_idle(int cpu) | 154 | void tick_nohz_stop_idle(int cpu) |
155 | { | 155 | { |
156 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 156 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
157 | 157 | ||
158 | if (ts->idle_active) { | 158 | if (ts->idle_active) { |
159 | ktime_t now, delta; | 159 | ktime_t now, delta; |
160 | now = ktime_get(); | 160 | now = ktime_get(); |
161 | delta = ktime_sub(now, ts->idle_entrytime); | 161 | delta = ktime_sub(now, ts->idle_entrytime); |
162 | ts->idle_lastupdate = now; | 162 | ts->idle_lastupdate = now; |
163 | ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); | 163 | ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); |
164 | ts->idle_active = 0; | 164 | ts->idle_active = 0; |
165 | } | 165 | } |
166 | } | 166 | } |
167 | 167 | ||
168 | static ktime_t tick_nohz_start_idle(struct tick_sched *ts) | 168 | static ktime_t tick_nohz_start_idle(struct tick_sched *ts) |
169 | { | 169 | { |
170 | ktime_t now, delta; | 170 | ktime_t now, delta; |
171 | 171 | ||
172 | now = ktime_get(); | 172 | now = ktime_get(); |
173 | if (ts->idle_active) { | 173 | if (ts->idle_active) { |
174 | delta = ktime_sub(now, ts->idle_entrytime); | 174 | delta = ktime_sub(now, ts->idle_entrytime); |
175 | ts->idle_lastupdate = now; | 175 | ts->idle_lastupdate = now; |
176 | ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); | 176 | ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); |
177 | } | 177 | } |
178 | ts->idle_entrytime = now; | 178 | ts->idle_entrytime = now; |
179 | ts->idle_active = 1; | 179 | ts->idle_active = 1; |
180 | return now; | 180 | return now; |
181 | } | 181 | } |
182 | 182 | ||
183 | u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) | 183 | u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) |
184 | { | 184 | { |
185 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 185 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
186 | 186 | ||
187 | *last_update_time = ktime_to_us(ts->idle_lastupdate); | 187 | *last_update_time = ktime_to_us(ts->idle_lastupdate); |
188 | return ktime_to_us(ts->idle_sleeptime); | 188 | return ktime_to_us(ts->idle_sleeptime); |
189 | } | 189 | } |
190 | 190 | ||
191 | /** | 191 | /** |
192 | * tick_nohz_stop_sched_tick - stop the idle tick from the idle task | 192 | * tick_nohz_stop_sched_tick - stop the idle tick from the idle task |
193 | * | 193 | * |
194 | * When the next event is more than a tick into the future, stop the idle tick | 194 | * When the next event is more than a tick into the future, stop the idle tick |
195 | * Called either from the idle loop or from irq_exit() when an idle period was | 195 | * Called either from the idle loop or from irq_exit() when an idle period was |
196 | * just interrupted by an interrupt which did not cause a reschedule. | 196 | * just interrupted by an interrupt which did not cause a reschedule. |
197 | */ | 197 | */ |
198 | void tick_nohz_stop_sched_tick(int inidle) | 198 | void tick_nohz_stop_sched_tick(int inidle) |
199 | { | 199 | { |
200 | unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags; | 200 | unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags; |
201 | struct tick_sched *ts; | 201 | struct tick_sched *ts; |
202 | ktime_t last_update, expires, now; | 202 | ktime_t last_update, expires, now; |
203 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; | 203 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; |
204 | int cpu; | 204 | int cpu; |
205 | 205 | ||
206 | local_irq_save(flags); | 206 | local_irq_save(flags); |
207 | 207 | ||
208 | cpu = smp_processor_id(); | 208 | cpu = smp_processor_id(); |
209 | ts = &per_cpu(tick_cpu_sched, cpu); | 209 | ts = &per_cpu(tick_cpu_sched, cpu); |
210 | now = tick_nohz_start_idle(ts); | 210 | now = tick_nohz_start_idle(ts); |
211 | 211 | ||
212 | /* | 212 | /* |
213 | * If this cpu is offline and it is the one which updates | 213 | * If this cpu is offline and it is the one which updates |
214 | * jiffies, then give up the assignment and let it be taken by | 214 | * jiffies, then give up the assignment and let it be taken by |
215 | * the cpu which runs the tick timer next. If we don't drop | 215 | * the cpu which runs the tick timer next. If we don't drop |
216 | * this here the jiffies might be stale and do_timer() never | 216 | * this here the jiffies might be stale and do_timer() never |
217 | * invoked. | 217 | * invoked. |
218 | */ | 218 | */ |
219 | if (unlikely(!cpu_online(cpu))) { | 219 | if (unlikely(!cpu_online(cpu))) { |
220 | if (cpu == tick_do_timer_cpu) | 220 | if (cpu == tick_do_timer_cpu) |
221 | tick_do_timer_cpu = -1; | 221 | tick_do_timer_cpu = -1; |
222 | } | 222 | } |
223 | 223 | ||
224 | if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) | 224 | if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) |
225 | goto end; | 225 | goto end; |
226 | 226 | ||
227 | if (!inidle && !ts->inidle) | 227 | if (!inidle && !ts->inidle) |
228 | goto end; | 228 | goto end; |
229 | 229 | ||
230 | ts->inidle = 1; | 230 | ts->inidle = 1; |
231 | 231 | ||
232 | if (need_resched()) | 232 | if (need_resched()) |
233 | goto end; | 233 | goto end; |
234 | 234 | ||
235 | if (unlikely(local_softirq_pending())) { | 235 | if (unlikely(local_softirq_pending())) { |
236 | static int ratelimit; | 236 | static int ratelimit; |
237 | 237 | ||
238 | if (ratelimit < 10) { | 238 | if (ratelimit < 10) { |
239 | printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", | 239 | printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", |
240 | local_softirq_pending()); | 240 | local_softirq_pending()); |
241 | ratelimit++; | 241 | ratelimit++; |
242 | } | 242 | } |
243 | goto end; | 243 | goto end; |
244 | } | 244 | } |
245 | 245 | ||
246 | ts->idle_calls++; | 246 | ts->idle_calls++; |
247 | /* Read jiffies and the time when jiffies were updated last */ | 247 | /* Read jiffies and the time when jiffies were updated last */ |
248 | do { | 248 | do { |
249 | seq = read_seqbegin(&xtime_lock); | 249 | seq = read_seqbegin(&xtime_lock); |
250 | last_update = last_jiffies_update; | 250 | last_update = last_jiffies_update; |
251 | last_jiffies = jiffies; | 251 | last_jiffies = jiffies; |
252 | } while (read_seqretry(&xtime_lock, seq)); | 252 | } while (read_seqretry(&xtime_lock, seq)); |
253 | 253 | ||
254 | /* Get the next timer wheel timer */ | 254 | /* Get the next timer wheel timer */ |
255 | next_jiffies = get_next_timer_interrupt(last_jiffies); | 255 | next_jiffies = get_next_timer_interrupt(last_jiffies); |
256 | delta_jiffies = next_jiffies - last_jiffies; | 256 | delta_jiffies = next_jiffies - last_jiffies; |
257 | 257 | ||
258 | if (rcu_needs_cpu(cpu)) | 258 | if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu)) |
259 | delta_jiffies = 1; | 259 | delta_jiffies = 1; |
260 | /* | 260 | /* |
261 | * Do not stop the tick, if we are only one off | 261 | * Do not stop the tick, if we are only one off |
262 | * or if the cpu is required for rcu | 262 | * or if the cpu is required for rcu |
263 | */ | 263 | */ |
264 | if (!ts->tick_stopped && delta_jiffies == 1) | 264 | if (!ts->tick_stopped && delta_jiffies == 1) |
265 | goto out; | 265 | goto out; |
266 | 266 | ||
267 | /* Schedule the tick, if we are at least one jiffie off */ | 267 | /* Schedule the tick, if we are at least one jiffie off */ |
268 | if ((long)delta_jiffies >= 1) { | 268 | if ((long)delta_jiffies >= 1) { |
269 | 269 | ||
270 | if (delta_jiffies > 1) | 270 | if (delta_jiffies > 1) |
271 | cpu_set(cpu, nohz_cpu_mask); | 271 | cpu_set(cpu, nohz_cpu_mask); |
272 | /* | 272 | /* |
273 | * nohz_stop_sched_tick can be called several times before | 273 | * nohz_stop_sched_tick can be called several times before |
274 | * the nohz_restart_sched_tick is called. This happens when | 274 | * the nohz_restart_sched_tick is called. This happens when |
275 | * interrupts arrive which do not cause a reschedule. In the | 275 | * interrupts arrive which do not cause a reschedule. In the |
276 | * first call we save the current tick time, so we can restart | 276 | * first call we save the current tick time, so we can restart |
277 | * the scheduler tick in nohz_restart_sched_tick. | 277 | * the scheduler tick in nohz_restart_sched_tick. |
278 | */ | 278 | */ |
279 | if (!ts->tick_stopped) { | 279 | if (!ts->tick_stopped) { |
280 | if (select_nohz_load_balancer(1)) { | 280 | if (select_nohz_load_balancer(1)) { |
281 | /* | 281 | /* |
282 | * sched tick not stopped! | 282 | * sched tick not stopped! |
283 | */ | 283 | */ |
284 | cpu_clear(cpu, nohz_cpu_mask); | 284 | cpu_clear(cpu, nohz_cpu_mask); |
285 | goto out; | 285 | goto out; |
286 | } | 286 | } |
287 | 287 | ||
288 | ts->idle_tick = ts->sched_timer.expires; | 288 | ts->idle_tick = ts->sched_timer.expires; |
289 | ts->tick_stopped = 1; | 289 | ts->tick_stopped = 1; |
290 | ts->idle_jiffies = last_jiffies; | 290 | ts->idle_jiffies = last_jiffies; |
291 | rcu_enter_nohz(); | 291 | rcu_enter_nohz(); |
292 | sched_clock_tick_stop(cpu); | 292 | sched_clock_tick_stop(cpu); |
293 | } | 293 | } |
294 | 294 | ||
295 | /* | 295 | /* |
296 | * If this cpu is the one which updates jiffies, then | 296 | * If this cpu is the one which updates jiffies, then |
297 | * give up the assignment and let it be taken by the | 297 | * give up the assignment and let it be taken by the |
298 | * cpu which runs the tick timer next, which might be | 298 | * cpu which runs the tick timer next, which might be |
299 | * this cpu as well. If we don't drop this here the | 299 | * this cpu as well. If we don't drop this here the |
300 | * jiffies might be stale and do_timer() never | 300 | * jiffies might be stale and do_timer() never |
301 | * invoked. | 301 | * invoked. |
302 | */ | 302 | */ |
303 | if (cpu == tick_do_timer_cpu) | 303 | if (cpu == tick_do_timer_cpu) |
304 | tick_do_timer_cpu = -1; | 304 | tick_do_timer_cpu = -1; |
305 | 305 | ||
306 | ts->idle_sleeps++; | 306 | ts->idle_sleeps++; |
307 | 307 | ||
308 | /* | 308 | /* |
309 | * delta_jiffies >= NEXT_TIMER_MAX_DELTA signals that | 309 | * delta_jiffies >= NEXT_TIMER_MAX_DELTA signals that |
310 | * there is no timer pending or at least extremly far | 310 | * there is no timer pending or at least extremly far |
311 | * into the future (12 days for HZ=1000). In this case | 311 | * into the future (12 days for HZ=1000). In this case |
312 | * we simply stop the tick timer: | 312 | * we simply stop the tick timer: |
313 | */ | 313 | */ |
314 | if (unlikely(delta_jiffies >= NEXT_TIMER_MAX_DELTA)) { | 314 | if (unlikely(delta_jiffies >= NEXT_TIMER_MAX_DELTA)) { |
315 | ts->idle_expires.tv64 = KTIME_MAX; | 315 | ts->idle_expires.tv64 = KTIME_MAX; |
316 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) | 316 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) |
317 | hrtimer_cancel(&ts->sched_timer); | 317 | hrtimer_cancel(&ts->sched_timer); |
318 | goto out; | 318 | goto out; |
319 | } | 319 | } |
320 | 320 | ||
321 | /* | 321 | /* |
322 | * calculate the expiry time for the next timer wheel | 322 | * calculate the expiry time for the next timer wheel |
323 | * timer | 323 | * timer |
324 | */ | 324 | */ |
325 | expires = ktime_add_ns(last_update, tick_period.tv64 * | 325 | expires = ktime_add_ns(last_update, tick_period.tv64 * |
326 | delta_jiffies); | 326 | delta_jiffies); |
327 | ts->idle_expires = expires; | 327 | ts->idle_expires = expires; |
328 | 328 | ||
329 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { | 329 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { |
330 | hrtimer_start(&ts->sched_timer, expires, | 330 | hrtimer_start(&ts->sched_timer, expires, |
331 | HRTIMER_MODE_ABS); | 331 | HRTIMER_MODE_ABS); |
332 | /* Check, if the timer was already in the past */ | 332 | /* Check, if the timer was already in the past */ |
333 | if (hrtimer_active(&ts->sched_timer)) | 333 | if (hrtimer_active(&ts->sched_timer)) |
334 | goto out; | 334 | goto out; |
335 | } else if (!tick_program_event(expires, 0)) | 335 | } else if (!tick_program_event(expires, 0)) |
336 | goto out; | 336 | goto out; |
337 | /* | 337 | /* |
338 | * We are past the event already. So we crossed a | 338 | * We are past the event already. So we crossed a |
339 | * jiffie boundary. Update jiffies and raise the | 339 | * jiffie boundary. Update jiffies and raise the |
340 | * softirq. | 340 | * softirq. |
341 | */ | 341 | */ |
342 | tick_do_update_jiffies64(ktime_get()); | 342 | tick_do_update_jiffies64(ktime_get()); |
343 | cpu_clear(cpu, nohz_cpu_mask); | 343 | cpu_clear(cpu, nohz_cpu_mask); |
344 | } | 344 | } |
345 | raise_softirq_irqoff(TIMER_SOFTIRQ); | 345 | raise_softirq_irqoff(TIMER_SOFTIRQ); |
346 | out: | 346 | out: |
347 | ts->next_jiffies = next_jiffies; | 347 | ts->next_jiffies = next_jiffies; |
348 | ts->last_jiffies = last_jiffies; | 348 | ts->last_jiffies = last_jiffies; |
349 | ts->sleep_length = ktime_sub(dev->next_event, now); | 349 | ts->sleep_length = ktime_sub(dev->next_event, now); |
350 | end: | 350 | end: |
351 | local_irq_restore(flags); | 351 | local_irq_restore(flags); |
352 | } | 352 | } |
353 | 353 | ||
354 | /** | 354 | /** |
355 | * tick_nohz_get_sleep_length - return the length of the current sleep | 355 | * tick_nohz_get_sleep_length - return the length of the current sleep |
356 | * | 356 | * |
357 | * Called from power state control code with interrupts disabled | 357 | * Called from power state control code with interrupts disabled |
358 | */ | 358 | */ |
359 | ktime_t tick_nohz_get_sleep_length(void) | 359 | ktime_t tick_nohz_get_sleep_length(void) |
360 | { | 360 | { |
361 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 361 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
362 | 362 | ||
363 | return ts->sleep_length; | 363 | return ts->sleep_length; |
364 | } | 364 | } |
365 | 365 | ||
366 | /** | 366 | /** |
367 | * tick_nohz_restart_sched_tick - restart the idle tick from the idle task | 367 | * tick_nohz_restart_sched_tick - restart the idle tick from the idle task |
368 | * | 368 | * |
369 | * Restart the idle tick when the CPU is woken up from idle | 369 | * Restart the idle tick when the CPU is woken up from idle |
370 | */ | 370 | */ |
371 | void tick_nohz_restart_sched_tick(void) | 371 | void tick_nohz_restart_sched_tick(void) |
372 | { | 372 | { |
373 | int cpu = smp_processor_id(); | 373 | int cpu = smp_processor_id(); |
374 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 374 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
375 | unsigned long ticks; | 375 | unsigned long ticks; |
376 | ktime_t now; | 376 | ktime_t now; |
377 | 377 | ||
378 | local_irq_disable(); | 378 | local_irq_disable(); |
379 | tick_nohz_stop_idle(cpu); | 379 | tick_nohz_stop_idle(cpu); |
380 | 380 | ||
381 | if (!ts->inidle || !ts->tick_stopped) { | 381 | if (!ts->inidle || !ts->tick_stopped) { |
382 | ts->inidle = 0; | 382 | ts->inidle = 0; |
383 | local_irq_enable(); | 383 | local_irq_enable(); |
384 | return; | 384 | return; |
385 | } | 385 | } |
386 | 386 | ||
387 | ts->inidle = 0; | 387 | ts->inidle = 0; |
388 | 388 | ||
389 | rcu_exit_nohz(); | 389 | rcu_exit_nohz(); |
390 | 390 | ||
391 | /* Update jiffies first */ | 391 | /* Update jiffies first */ |
392 | select_nohz_load_balancer(0); | 392 | select_nohz_load_balancer(0); |
393 | now = ktime_get(); | 393 | now = ktime_get(); |
394 | tick_do_update_jiffies64(now); | 394 | tick_do_update_jiffies64(now); |
395 | sched_clock_tick_start(cpu); | 395 | sched_clock_tick_start(cpu); |
396 | cpu_clear(cpu, nohz_cpu_mask); | 396 | cpu_clear(cpu, nohz_cpu_mask); |
397 | 397 | ||
398 | /* | 398 | /* |
399 | * We stopped the tick in idle. Update process times would miss the | 399 | * We stopped the tick in idle. Update process times would miss the |
400 | * time we slept as update_process_times does only a 1 tick | 400 | * time we slept as update_process_times does only a 1 tick |
401 | * accounting. Enforce that this is accounted to idle ! | 401 | * accounting. Enforce that this is accounted to idle ! |
402 | */ | 402 | */ |
403 | ticks = jiffies - ts->idle_jiffies; | 403 | ticks = jiffies - ts->idle_jiffies; |
404 | /* | 404 | /* |
405 | * We might be one off. Do not randomly account a huge number of ticks! | 405 | * We might be one off. Do not randomly account a huge number of ticks! |
406 | */ | 406 | */ |
407 | if (ticks && ticks < LONG_MAX) { | 407 | if (ticks && ticks < LONG_MAX) { |
408 | add_preempt_count(HARDIRQ_OFFSET); | 408 | add_preempt_count(HARDIRQ_OFFSET); |
409 | account_system_time(current, HARDIRQ_OFFSET, | 409 | account_system_time(current, HARDIRQ_OFFSET, |
410 | jiffies_to_cputime(ticks)); | 410 | jiffies_to_cputime(ticks)); |
411 | sub_preempt_count(HARDIRQ_OFFSET); | 411 | sub_preempt_count(HARDIRQ_OFFSET); |
412 | } | 412 | } |
413 | 413 | ||
414 | touch_softlockup_watchdog(); | 414 | touch_softlockup_watchdog(); |
415 | /* | 415 | /* |
416 | * Cancel the scheduled timer and restore the tick | 416 | * Cancel the scheduled timer and restore the tick |
417 | */ | 417 | */ |
418 | ts->tick_stopped = 0; | 418 | ts->tick_stopped = 0; |
419 | ts->idle_exittime = now; | 419 | ts->idle_exittime = now; |
420 | hrtimer_cancel(&ts->sched_timer); | 420 | hrtimer_cancel(&ts->sched_timer); |
421 | ts->sched_timer.expires = ts->idle_tick; | 421 | ts->sched_timer.expires = ts->idle_tick; |
422 | 422 | ||
423 | while (1) { | 423 | while (1) { |
424 | /* Forward the time to expire in the future */ | 424 | /* Forward the time to expire in the future */ |
425 | hrtimer_forward(&ts->sched_timer, now, tick_period); | 425 | hrtimer_forward(&ts->sched_timer, now, tick_period); |
426 | 426 | ||
427 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { | 427 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { |
428 | hrtimer_start(&ts->sched_timer, | 428 | hrtimer_start(&ts->sched_timer, |
429 | ts->sched_timer.expires, | 429 | ts->sched_timer.expires, |
430 | HRTIMER_MODE_ABS); | 430 | HRTIMER_MODE_ABS); |
431 | /* Check, if the timer was already in the past */ | 431 | /* Check, if the timer was already in the past */ |
432 | if (hrtimer_active(&ts->sched_timer)) | 432 | if (hrtimer_active(&ts->sched_timer)) |
433 | break; | 433 | break; |
434 | } else { | 434 | } else { |
435 | if (!tick_program_event(ts->sched_timer.expires, 0)) | 435 | if (!tick_program_event(ts->sched_timer.expires, 0)) |
436 | break; | 436 | break; |
437 | } | 437 | } |
438 | /* Update jiffies and reread time */ | 438 | /* Update jiffies and reread time */ |
439 | tick_do_update_jiffies64(now); | 439 | tick_do_update_jiffies64(now); |
440 | now = ktime_get(); | 440 | now = ktime_get(); |
441 | } | 441 | } |
442 | local_irq_enable(); | 442 | local_irq_enable(); |
443 | } | 443 | } |
444 | 444 | ||
445 | static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now) | 445 | static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now) |
446 | { | 446 | { |
447 | hrtimer_forward(&ts->sched_timer, now, tick_period); | 447 | hrtimer_forward(&ts->sched_timer, now, tick_period); |
448 | return tick_program_event(ts->sched_timer.expires, 0); | 448 | return tick_program_event(ts->sched_timer.expires, 0); |
449 | } | 449 | } |
450 | 450 | ||
451 | /* | 451 | /* |
452 | * The nohz low res interrupt handler | 452 | * The nohz low res interrupt handler |
453 | */ | 453 | */ |
454 | static void tick_nohz_handler(struct clock_event_device *dev) | 454 | static void tick_nohz_handler(struct clock_event_device *dev) |
455 | { | 455 | { |
456 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 456 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
457 | struct pt_regs *regs = get_irq_regs(); | 457 | struct pt_regs *regs = get_irq_regs(); |
458 | int cpu = smp_processor_id(); | 458 | int cpu = smp_processor_id(); |
459 | ktime_t now = ktime_get(); | 459 | ktime_t now = ktime_get(); |
460 | 460 | ||
461 | dev->next_event.tv64 = KTIME_MAX; | 461 | dev->next_event.tv64 = KTIME_MAX; |
462 | 462 | ||
463 | /* | 463 | /* |
464 | * Check if the do_timer duty was dropped. We don't care about | 464 | * Check if the do_timer duty was dropped. We don't care about |
465 | * concurrency: This happens only when the cpu in charge went | 465 | * concurrency: This happens only when the cpu in charge went |
466 | * into a long sleep. If two cpus happen to assign themself to | 466 | * into a long sleep. If two cpus happen to assign themself to |
467 | * this duty, then the jiffies update is still serialized by | 467 | * this duty, then the jiffies update is still serialized by |
468 | * xtime_lock. | 468 | * xtime_lock. |
469 | */ | 469 | */ |
470 | if (unlikely(tick_do_timer_cpu == -1)) | 470 | if (unlikely(tick_do_timer_cpu == -1)) |
471 | tick_do_timer_cpu = cpu; | 471 | tick_do_timer_cpu = cpu; |
472 | 472 | ||
473 | /* Check, if the jiffies need an update */ | 473 | /* Check, if the jiffies need an update */ |
474 | if (tick_do_timer_cpu == cpu) | 474 | if (tick_do_timer_cpu == cpu) |
475 | tick_do_update_jiffies64(now); | 475 | tick_do_update_jiffies64(now); |
476 | 476 | ||
477 | /* | 477 | /* |
478 | * When we are idle and the tick is stopped, we have to touch | 478 | * When we are idle and the tick is stopped, we have to touch |
479 | * the watchdog as we might not schedule for a really long | 479 | * the watchdog as we might not schedule for a really long |
480 | * time. This happens on complete idle SMP systems while | 480 | * time. This happens on complete idle SMP systems while |
481 | * waiting on the login prompt. We also increment the "start | 481 | * waiting on the login prompt. We also increment the "start |
482 | * of idle" jiffy stamp so the idle accounting adjustment we | 482 | * of idle" jiffy stamp so the idle accounting adjustment we |
483 | * do when we go busy again does not account too much ticks. | 483 | * do when we go busy again does not account too much ticks. |
484 | */ | 484 | */ |
485 | if (ts->tick_stopped) { | 485 | if (ts->tick_stopped) { |
486 | touch_softlockup_watchdog(); | 486 | touch_softlockup_watchdog(); |
487 | ts->idle_jiffies++; | 487 | ts->idle_jiffies++; |
488 | } | 488 | } |
489 | 489 | ||
490 | update_process_times(user_mode(regs)); | 490 | update_process_times(user_mode(regs)); |
491 | profile_tick(CPU_PROFILING); | 491 | profile_tick(CPU_PROFILING); |
492 | 492 | ||
493 | /* Do not restart, when we are in the idle loop */ | 493 | /* Do not restart, when we are in the idle loop */ |
494 | if (ts->tick_stopped) | 494 | if (ts->tick_stopped) |
495 | return; | 495 | return; |
496 | 496 | ||
497 | while (tick_nohz_reprogram(ts, now)) { | 497 | while (tick_nohz_reprogram(ts, now)) { |
498 | now = ktime_get(); | 498 | now = ktime_get(); |
499 | tick_do_update_jiffies64(now); | 499 | tick_do_update_jiffies64(now); |
500 | } | 500 | } |
501 | } | 501 | } |
502 | 502 | ||
503 | /** | 503 | /** |
504 | * tick_nohz_switch_to_nohz - switch to nohz mode | 504 | * tick_nohz_switch_to_nohz - switch to nohz mode |
505 | */ | 505 | */ |
506 | static void tick_nohz_switch_to_nohz(void) | 506 | static void tick_nohz_switch_to_nohz(void) |
507 | { | 507 | { |
508 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 508 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
509 | ktime_t next; | 509 | ktime_t next; |
510 | 510 | ||
511 | if (!tick_nohz_enabled) | 511 | if (!tick_nohz_enabled) |
512 | return; | 512 | return; |
513 | 513 | ||
514 | local_irq_disable(); | 514 | local_irq_disable(); |
515 | if (tick_switch_to_oneshot(tick_nohz_handler)) { | 515 | if (tick_switch_to_oneshot(tick_nohz_handler)) { |
516 | local_irq_enable(); | 516 | local_irq_enable(); |
517 | return; | 517 | return; |
518 | } | 518 | } |
519 | 519 | ||
520 | ts->nohz_mode = NOHZ_MODE_LOWRES; | 520 | ts->nohz_mode = NOHZ_MODE_LOWRES; |
521 | 521 | ||
522 | /* | 522 | /* |
523 | * Recycle the hrtimer in ts, so we can share the | 523 | * Recycle the hrtimer in ts, so we can share the |
524 | * hrtimer_forward with the highres code. | 524 | * hrtimer_forward with the highres code. |
525 | */ | 525 | */ |
526 | hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | 526 | hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); |
527 | /* Get the next period */ | 527 | /* Get the next period */ |
528 | next = tick_init_jiffy_update(); | 528 | next = tick_init_jiffy_update(); |
529 | 529 | ||
530 | for (;;) { | 530 | for (;;) { |
531 | ts->sched_timer.expires = next; | 531 | ts->sched_timer.expires = next; |
532 | if (!tick_program_event(next, 0)) | 532 | if (!tick_program_event(next, 0)) |
533 | break; | 533 | break; |
534 | next = ktime_add(next, tick_period); | 534 | next = ktime_add(next, tick_period); |
535 | } | 535 | } |
536 | local_irq_enable(); | 536 | local_irq_enable(); |
537 | 537 | ||
538 | printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", | 538 | printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", |
539 | smp_processor_id()); | 539 | smp_processor_id()); |
540 | } | 540 | } |
541 | 541 | ||
542 | #else | 542 | #else |
543 | 543 | ||
544 | static inline void tick_nohz_switch_to_nohz(void) { } | 544 | static inline void tick_nohz_switch_to_nohz(void) { } |
545 | 545 | ||
546 | #endif /* NO_HZ */ | 546 | #endif /* NO_HZ */ |
547 | 547 | ||
548 | /* | 548 | /* |
549 | * High resolution timer specific code | 549 | * High resolution timer specific code |
550 | */ | 550 | */ |
551 | #ifdef CONFIG_HIGH_RES_TIMERS | 551 | #ifdef CONFIG_HIGH_RES_TIMERS |
552 | /* | 552 | /* |
553 | * We rearm the timer until we get disabled by the idle code. | 553 | * We rearm the timer until we get disabled by the idle code. |
554 | * Called with interrupts disabled and timer->base->cpu_base->lock held. | 554 | * Called with interrupts disabled and timer->base->cpu_base->lock held. |
555 | */ | 555 | */ |
556 | static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) | 556 | static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) |
557 | { | 557 | { |
558 | struct tick_sched *ts = | 558 | struct tick_sched *ts = |
559 | container_of(timer, struct tick_sched, sched_timer); | 559 | container_of(timer, struct tick_sched, sched_timer); |
560 | struct pt_regs *regs = get_irq_regs(); | 560 | struct pt_regs *regs = get_irq_regs(); |
561 | ktime_t now = ktime_get(); | 561 | ktime_t now = ktime_get(); |
562 | int cpu = smp_processor_id(); | 562 | int cpu = smp_processor_id(); |
563 | 563 | ||
564 | #ifdef CONFIG_NO_HZ | 564 | #ifdef CONFIG_NO_HZ |
565 | /* | 565 | /* |
566 | * Check if the do_timer duty was dropped. We don't care about | 566 | * Check if the do_timer duty was dropped. We don't care about |
567 | * concurrency: This happens only when the cpu in charge went | 567 | * concurrency: This happens only when the cpu in charge went |
568 | * into a long sleep. If two cpus happen to assign themself to | 568 | * into a long sleep. If two cpus happen to assign themself to |
569 | * this duty, then the jiffies update is still serialized by | 569 | * this duty, then the jiffies update is still serialized by |
570 | * xtime_lock. | 570 | * xtime_lock. |
571 | */ | 571 | */ |
572 | if (unlikely(tick_do_timer_cpu == -1)) | 572 | if (unlikely(tick_do_timer_cpu == -1)) |
573 | tick_do_timer_cpu = cpu; | 573 | tick_do_timer_cpu = cpu; |
574 | #endif | 574 | #endif |
575 | 575 | ||
576 | /* Check, if the jiffies need an update */ | 576 | /* Check, if the jiffies need an update */ |
577 | if (tick_do_timer_cpu == cpu) | 577 | if (tick_do_timer_cpu == cpu) |
578 | tick_do_update_jiffies64(now); | 578 | tick_do_update_jiffies64(now); |
579 | 579 | ||
580 | /* | 580 | /* |
581 | * Do not call, when we are not in irq context and have | 581 | * Do not call, when we are not in irq context and have |
582 | * no valid regs pointer | 582 | * no valid regs pointer |
583 | */ | 583 | */ |
584 | if (regs) { | 584 | if (regs) { |
585 | /* | 585 | /* |
586 | * When we are idle and the tick is stopped, we have to touch | 586 | * When we are idle and the tick is stopped, we have to touch |
587 | * the watchdog as we might not schedule for a really long | 587 | * the watchdog as we might not schedule for a really long |
588 | * time. This happens on complete idle SMP systems while | 588 | * time. This happens on complete idle SMP systems while |
589 | * waiting on the login prompt. We also increment the "start of | 589 | * waiting on the login prompt. We also increment the "start of |
590 | * idle" jiffy stamp so the idle accounting adjustment we do | 590 | * idle" jiffy stamp so the idle accounting adjustment we do |
591 | * when we go busy again does not account too much ticks. | 591 | * when we go busy again does not account too much ticks. |
592 | */ | 592 | */ |
593 | if (ts->tick_stopped) { | 593 | if (ts->tick_stopped) { |
594 | touch_softlockup_watchdog(); | 594 | touch_softlockup_watchdog(); |
595 | ts->idle_jiffies++; | 595 | ts->idle_jiffies++; |
596 | } | 596 | } |
597 | update_process_times(user_mode(regs)); | 597 | update_process_times(user_mode(regs)); |
598 | profile_tick(CPU_PROFILING); | 598 | profile_tick(CPU_PROFILING); |
599 | } | 599 | } |
600 | 600 | ||
601 | /* Do not restart, when we are in the idle loop */ | 601 | /* Do not restart, when we are in the idle loop */ |
602 | if (ts->tick_stopped) | 602 | if (ts->tick_stopped) |
603 | return HRTIMER_NORESTART; | 603 | return HRTIMER_NORESTART; |
604 | 604 | ||
605 | hrtimer_forward(timer, now, tick_period); | 605 | hrtimer_forward(timer, now, tick_period); |
606 | 606 | ||
607 | return HRTIMER_RESTART; | 607 | return HRTIMER_RESTART; |
608 | } | 608 | } |
609 | 609 | ||
610 | /** | 610 | /** |
611 | * tick_setup_sched_timer - setup the tick emulation timer | 611 | * tick_setup_sched_timer - setup the tick emulation timer |
612 | */ | 612 | */ |
613 | void tick_setup_sched_timer(void) | 613 | void tick_setup_sched_timer(void) |
614 | { | 614 | { |
615 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 615 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
616 | ktime_t now = ktime_get(); | 616 | ktime_t now = ktime_get(); |
617 | u64 offset; | 617 | u64 offset; |
618 | 618 | ||
619 | /* | 619 | /* |
620 | * Emulate tick processing via per-CPU hrtimers: | 620 | * Emulate tick processing via per-CPU hrtimers: |
621 | */ | 621 | */ |
622 | hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | 622 | hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); |
623 | ts->sched_timer.function = tick_sched_timer; | 623 | ts->sched_timer.function = tick_sched_timer; |
624 | ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; | 624 | ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; |
625 | 625 | ||
626 | /* Get the next period (per cpu) */ | 626 | /* Get the next period (per cpu) */ |
627 | ts->sched_timer.expires = tick_init_jiffy_update(); | 627 | ts->sched_timer.expires = tick_init_jiffy_update(); |
628 | offset = ktime_to_ns(tick_period) >> 1; | 628 | offset = ktime_to_ns(tick_period) >> 1; |
629 | do_div(offset, num_possible_cpus()); | 629 | do_div(offset, num_possible_cpus()); |
630 | offset *= smp_processor_id(); | 630 | offset *= smp_processor_id(); |
631 | ts->sched_timer.expires = ktime_add_ns(ts->sched_timer.expires, offset); | 631 | ts->sched_timer.expires = ktime_add_ns(ts->sched_timer.expires, offset); |
632 | 632 | ||
633 | for (;;) { | 633 | for (;;) { |
634 | hrtimer_forward(&ts->sched_timer, now, tick_period); | 634 | hrtimer_forward(&ts->sched_timer, now, tick_period); |
635 | hrtimer_start(&ts->sched_timer, ts->sched_timer.expires, | 635 | hrtimer_start(&ts->sched_timer, ts->sched_timer.expires, |
636 | HRTIMER_MODE_ABS); | 636 | HRTIMER_MODE_ABS); |
637 | /* Check, if the timer was already in the past */ | 637 | /* Check, if the timer was already in the past */ |
638 | if (hrtimer_active(&ts->sched_timer)) | 638 | if (hrtimer_active(&ts->sched_timer)) |
639 | break; | 639 | break; |
640 | now = ktime_get(); | 640 | now = ktime_get(); |
641 | } | 641 | } |
642 | 642 | ||
643 | #ifdef CONFIG_NO_HZ | 643 | #ifdef CONFIG_NO_HZ |
644 | if (tick_nohz_enabled) | 644 | if (tick_nohz_enabled) |
645 | ts->nohz_mode = NOHZ_MODE_HIGHRES; | 645 | ts->nohz_mode = NOHZ_MODE_HIGHRES; |
646 | #endif | 646 | #endif |
647 | } | 647 | } |
648 | 648 | ||
649 | void tick_cancel_sched_timer(int cpu) | 649 | void tick_cancel_sched_timer(int cpu) |
650 | { | 650 | { |
651 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 651 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
652 | 652 | ||
653 | if (ts->sched_timer.base) | 653 | if (ts->sched_timer.base) |
654 | hrtimer_cancel(&ts->sched_timer); | 654 | hrtimer_cancel(&ts->sched_timer); |
655 | 655 | ||
656 | ts->nohz_mode = NOHZ_MODE_INACTIVE; | 656 | ts->nohz_mode = NOHZ_MODE_INACTIVE; |
657 | } | 657 | } |
658 | #endif /* HIGH_RES_TIMERS */ | 658 | #endif /* HIGH_RES_TIMERS */ |
659 | 659 | ||
660 | /** | 660 | /** |
661 | * Async notification about clocksource changes | 661 | * Async notification about clocksource changes |
662 | */ | 662 | */ |
663 | void tick_clock_notify(void) | 663 | void tick_clock_notify(void) |
664 | { | 664 | { |
665 | int cpu; | 665 | int cpu; |
666 | 666 | ||
667 | for_each_possible_cpu(cpu) | 667 | for_each_possible_cpu(cpu) |
668 | set_bit(0, &per_cpu(tick_cpu_sched, cpu).check_clocks); | 668 | set_bit(0, &per_cpu(tick_cpu_sched, cpu).check_clocks); |
669 | } | 669 | } |
670 | 670 | ||
671 | /* | 671 | /* |
672 | * Async notification about clock event changes | 672 | * Async notification about clock event changes |
673 | */ | 673 | */ |
674 | void tick_oneshot_notify(void) | 674 | void tick_oneshot_notify(void) |
675 | { | 675 | { |
676 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 676 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
677 | 677 | ||
678 | set_bit(0, &ts->check_clocks); | 678 | set_bit(0, &ts->check_clocks); |
679 | } | 679 | } |
680 | 680 | ||
681 | /** | 681 | /** |
682 | * Check, if a change happened, which makes oneshot possible. | 682 | * Check, if a change happened, which makes oneshot possible. |
683 | * | 683 | * |
684 | * Called cyclic from the hrtimer softirq (driven by the timer | 684 | * Called cyclic from the hrtimer softirq (driven by the timer |
685 | * softirq) allow_nohz signals, that we can switch into low-res nohz | 685 | * softirq) allow_nohz signals, that we can switch into low-res nohz |
686 | * mode, because high resolution timers are disabled (either compile | 686 | * mode, because high resolution timers are disabled (either compile |
687 | * or runtime). | 687 | * or runtime). |
688 | */ | 688 | */ |
689 | int tick_check_oneshot_change(int allow_nohz) | 689 | int tick_check_oneshot_change(int allow_nohz) |
690 | { | 690 | { |
691 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 691 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
692 | 692 | ||
693 | if (!test_and_clear_bit(0, &ts->check_clocks)) | 693 | if (!test_and_clear_bit(0, &ts->check_clocks)) |
694 | return 0; | 694 | return 0; |
695 | 695 | ||
696 | if (ts->nohz_mode != NOHZ_MODE_INACTIVE) | 696 | if (ts->nohz_mode != NOHZ_MODE_INACTIVE) |
697 | return 0; | 697 | return 0; |
698 | 698 | ||
699 | if (!timekeeping_valid_for_hres() || !tick_is_oneshot_available()) | 699 | if (!timekeeping_valid_for_hres() || !tick_is_oneshot_available()) |
700 | return 0; | 700 | return 0; |
701 | 701 | ||
702 | if (!allow_nohz) | 702 | if (!allow_nohz) |
703 | return 1; | 703 | return 1; |
704 | 704 | ||
705 | tick_nohz_switch_to_nohz(); | 705 | tick_nohz_switch_to_nohz(); |
706 | return 0; | 706 | return 0; |
707 | } | 707 | } |
708 | 708 |
kernel/timer.c
1 | /* | 1 | /* |
2 | * linux/kernel/timer.c | 2 | * linux/kernel/timer.c |
3 | * | 3 | * |
4 | * Kernel internal timers, basic process system calls | 4 | * Kernel internal timers, basic process system calls |
5 | * | 5 | * |
6 | * Copyright (C) 1991, 1992 Linus Torvalds | 6 | * Copyright (C) 1991, 1992 Linus Torvalds |
7 | * | 7 | * |
8 | * 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better. | 8 | * 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better. |
9 | * | 9 | * |
10 | * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 | 10 | * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 |
11 | * "A Kernel Model for Precision Timekeeping" by Dave Mills | 11 | * "A Kernel Model for Precision Timekeeping" by Dave Mills |
12 | * 1998-12-24 Fixed a xtime SMP race (we need the xtime_lock rw spinlock to | 12 | * 1998-12-24 Fixed a xtime SMP race (we need the xtime_lock rw spinlock to |
13 | * serialize accesses to xtime/lost_ticks). | 13 | * serialize accesses to xtime/lost_ticks). |
14 | * Copyright (C) 1998 Andrea Arcangeli | 14 | * Copyright (C) 1998 Andrea Arcangeli |
15 | * 1999-03-10 Improved NTP compatibility by Ulrich Windl | 15 | * 1999-03-10 Improved NTP compatibility by Ulrich Windl |
16 | * 2002-05-31 Move sys_sysinfo here and make its locking sane, Robert Love | 16 | * 2002-05-31 Move sys_sysinfo here and make its locking sane, Robert Love |
17 | * 2000-10-05 Implemented scalable SMP per-CPU timer handling. | 17 | * 2000-10-05 Implemented scalable SMP per-CPU timer handling. |
18 | * Copyright (C) 2000, 2001, 2002 Ingo Molnar | 18 | * Copyright (C) 2000, 2001, 2002 Ingo Molnar |
19 | * Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar | 19 | * Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar |
20 | */ | 20 | */ |
21 | 21 | ||
22 | #include <linux/kernel_stat.h> | 22 | #include <linux/kernel_stat.h> |
23 | #include <linux/module.h> | 23 | #include <linux/module.h> |
24 | #include <linux/interrupt.h> | 24 | #include <linux/interrupt.h> |
25 | #include <linux/percpu.h> | 25 | #include <linux/percpu.h> |
26 | #include <linux/init.h> | 26 | #include <linux/init.h> |
27 | #include <linux/mm.h> | 27 | #include <linux/mm.h> |
28 | #include <linux/swap.h> | 28 | #include <linux/swap.h> |
29 | #include <linux/pid_namespace.h> | 29 | #include <linux/pid_namespace.h> |
30 | #include <linux/notifier.h> | 30 | #include <linux/notifier.h> |
31 | #include <linux/thread_info.h> | 31 | #include <linux/thread_info.h> |
32 | #include <linux/time.h> | 32 | #include <linux/time.h> |
33 | #include <linux/jiffies.h> | 33 | #include <linux/jiffies.h> |
34 | #include <linux/posix-timers.h> | 34 | #include <linux/posix-timers.h> |
35 | #include <linux/cpu.h> | 35 | #include <linux/cpu.h> |
36 | #include <linux/syscalls.h> | 36 | #include <linux/syscalls.h> |
37 | #include <linux/delay.h> | 37 | #include <linux/delay.h> |
38 | #include <linux/tick.h> | 38 | #include <linux/tick.h> |
39 | #include <linux/kallsyms.h> | 39 | #include <linux/kallsyms.h> |
40 | 40 | ||
41 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
42 | #include <asm/unistd.h> | 42 | #include <asm/unistd.h> |
43 | #include <asm/div64.h> | 43 | #include <asm/div64.h> |
44 | #include <asm/timex.h> | 44 | #include <asm/timex.h> |
45 | #include <asm/io.h> | 45 | #include <asm/io.h> |
46 | 46 | ||
47 | u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; | 47 | u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; |
48 | 48 | ||
49 | EXPORT_SYMBOL(jiffies_64); | 49 | EXPORT_SYMBOL(jiffies_64); |
50 | 50 | ||
51 | /* | 51 | /* |
52 | * per-CPU timer vector definitions: | 52 | * per-CPU timer vector definitions: |
53 | */ | 53 | */ |
54 | #define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6) | 54 | #define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6) |
55 | #define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8) | 55 | #define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8) |
56 | #define TVN_SIZE (1 << TVN_BITS) | 56 | #define TVN_SIZE (1 << TVN_BITS) |
57 | #define TVR_SIZE (1 << TVR_BITS) | 57 | #define TVR_SIZE (1 << TVR_BITS) |
58 | #define TVN_MASK (TVN_SIZE - 1) | 58 | #define TVN_MASK (TVN_SIZE - 1) |
59 | #define TVR_MASK (TVR_SIZE - 1) | 59 | #define TVR_MASK (TVR_SIZE - 1) |
60 | 60 | ||
61 | struct tvec { | 61 | struct tvec { |
62 | struct list_head vec[TVN_SIZE]; | 62 | struct list_head vec[TVN_SIZE]; |
63 | }; | 63 | }; |
64 | 64 | ||
65 | struct tvec_root { | 65 | struct tvec_root { |
66 | struct list_head vec[TVR_SIZE]; | 66 | struct list_head vec[TVR_SIZE]; |
67 | }; | 67 | }; |
68 | 68 | ||
69 | struct tvec_base { | 69 | struct tvec_base { |
70 | spinlock_t lock; | 70 | spinlock_t lock; |
71 | struct timer_list *running_timer; | 71 | struct timer_list *running_timer; |
72 | unsigned long timer_jiffies; | 72 | unsigned long timer_jiffies; |
73 | struct tvec_root tv1; | 73 | struct tvec_root tv1; |
74 | struct tvec tv2; | 74 | struct tvec tv2; |
75 | struct tvec tv3; | 75 | struct tvec tv3; |
76 | struct tvec tv4; | 76 | struct tvec tv4; |
77 | struct tvec tv5; | 77 | struct tvec tv5; |
78 | } ____cacheline_aligned; | 78 | } ____cacheline_aligned; |
79 | 79 | ||
80 | struct tvec_base boot_tvec_bases; | 80 | struct tvec_base boot_tvec_bases; |
81 | EXPORT_SYMBOL(boot_tvec_bases); | 81 | EXPORT_SYMBOL(boot_tvec_bases); |
82 | static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases; | 82 | static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases; |
83 | 83 | ||
84 | /* | 84 | /* |
85 | * Note that all tvec_bases are 2 byte aligned and lower bit of | 85 | * Note that all tvec_bases are 2 byte aligned and lower bit of |
86 | * base in timer_list is guaranteed to be zero. Use the LSB for | 86 | * base in timer_list is guaranteed to be zero. Use the LSB for |
87 | * the new flag to indicate whether the timer is deferrable | 87 | * the new flag to indicate whether the timer is deferrable |
88 | */ | 88 | */ |
89 | #define TBASE_DEFERRABLE_FLAG (0x1) | 89 | #define TBASE_DEFERRABLE_FLAG (0x1) |
90 | 90 | ||
91 | /* Functions below help us manage 'deferrable' flag */ | 91 | /* Functions below help us manage 'deferrable' flag */ |
92 | static inline unsigned int tbase_get_deferrable(struct tvec_base *base) | 92 | static inline unsigned int tbase_get_deferrable(struct tvec_base *base) |
93 | { | 93 | { |
94 | return ((unsigned int)(unsigned long)base & TBASE_DEFERRABLE_FLAG); | 94 | return ((unsigned int)(unsigned long)base & TBASE_DEFERRABLE_FLAG); |
95 | } | 95 | } |
96 | 96 | ||
97 | static inline struct tvec_base *tbase_get_base(struct tvec_base *base) | 97 | static inline struct tvec_base *tbase_get_base(struct tvec_base *base) |
98 | { | 98 | { |
99 | return ((struct tvec_base *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG)); | 99 | return ((struct tvec_base *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG)); |
100 | } | 100 | } |
101 | 101 | ||
102 | static inline void timer_set_deferrable(struct timer_list *timer) | 102 | static inline void timer_set_deferrable(struct timer_list *timer) |
103 | { | 103 | { |
104 | timer->base = ((struct tvec_base *)((unsigned long)(timer->base) | | 104 | timer->base = ((struct tvec_base *)((unsigned long)(timer->base) | |
105 | TBASE_DEFERRABLE_FLAG)); | 105 | TBASE_DEFERRABLE_FLAG)); |
106 | } | 106 | } |
107 | 107 | ||
108 | static inline void | 108 | static inline void |
109 | timer_set_base(struct timer_list *timer, struct tvec_base *new_base) | 109 | timer_set_base(struct timer_list *timer, struct tvec_base *new_base) |
110 | { | 110 | { |
111 | timer->base = (struct tvec_base *)((unsigned long)(new_base) | | 111 | timer->base = (struct tvec_base *)((unsigned long)(new_base) | |
112 | tbase_get_deferrable(timer->base)); | 112 | tbase_get_deferrable(timer->base)); |
113 | } | 113 | } |
114 | 114 | ||
115 | /** | 115 | /** |
116 | * __round_jiffies - function to round jiffies to a full second | 116 | * __round_jiffies - function to round jiffies to a full second |
117 | * @j: the time in (absolute) jiffies that should be rounded | 117 | * @j: the time in (absolute) jiffies that should be rounded |
118 | * @cpu: the processor number on which the timeout will happen | 118 | * @cpu: the processor number on which the timeout will happen |
119 | * | 119 | * |
120 | * __round_jiffies() rounds an absolute time in the future (in jiffies) | 120 | * __round_jiffies() rounds an absolute time in the future (in jiffies) |
121 | * up or down to (approximately) full seconds. This is useful for timers | 121 | * up or down to (approximately) full seconds. This is useful for timers |
122 | * for which the exact time they fire does not matter too much, as long as | 122 | * for which the exact time they fire does not matter too much, as long as |
123 | * they fire approximately every X seconds. | 123 | * they fire approximately every X seconds. |
124 | * | 124 | * |
125 | * By rounding these timers to whole seconds, all such timers will fire | 125 | * By rounding these timers to whole seconds, all such timers will fire |
126 | * at the same time, rather than at various times spread out. The goal | 126 | * at the same time, rather than at various times spread out. The goal |
127 | * of this is to have the CPU wake up less, which saves power. | 127 | * of this is to have the CPU wake up less, which saves power. |
128 | * | 128 | * |
129 | * The exact rounding is skewed for each processor to avoid all | 129 | * The exact rounding is skewed for each processor to avoid all |
130 | * processors firing at the exact same time, which could lead | 130 | * processors firing at the exact same time, which could lead |
131 | * to lock contention or spurious cache line bouncing. | 131 | * to lock contention or spurious cache line bouncing. |
132 | * | 132 | * |
133 | * The return value is the rounded version of the @j parameter. | 133 | * The return value is the rounded version of the @j parameter. |
134 | */ | 134 | */ |
135 | unsigned long __round_jiffies(unsigned long j, int cpu) | 135 | unsigned long __round_jiffies(unsigned long j, int cpu) |
136 | { | 136 | { |
137 | int rem; | 137 | int rem; |
138 | unsigned long original = j; | 138 | unsigned long original = j; |
139 | 139 | ||
140 | /* | 140 | /* |
141 | * We don't want all cpus firing their timers at once hitting the | 141 | * We don't want all cpus firing their timers at once hitting the |
142 | * same lock or cachelines, so we skew each extra cpu with an extra | 142 | * same lock or cachelines, so we skew each extra cpu with an extra |
143 | * 3 jiffies. This 3 jiffies came originally from the mm/ code which | 143 | * 3 jiffies. This 3 jiffies came originally from the mm/ code which |
144 | * already did this. | 144 | * already did this. |
145 | * The skew is done by adding 3*cpunr, then round, then subtract this | 145 | * The skew is done by adding 3*cpunr, then round, then subtract this |
146 | * extra offset again. | 146 | * extra offset again. |
147 | */ | 147 | */ |
148 | j += cpu * 3; | 148 | j += cpu * 3; |
149 | 149 | ||
150 | rem = j % HZ; | 150 | rem = j % HZ; |
151 | 151 | ||
152 | /* | 152 | /* |
153 | * If the target jiffie is just after a whole second (which can happen | 153 | * If the target jiffie is just after a whole second (which can happen |
154 | * due to delays of the timer irq, long irq off times etc etc) then | 154 | * due to delays of the timer irq, long irq off times etc etc) then |
155 | * we should round down to the whole second, not up. Use 1/4th second | 155 | * we should round down to the whole second, not up. Use 1/4th second |
156 | * as cutoff for this rounding as an extreme upper bound for this. | 156 | * as cutoff for this rounding as an extreme upper bound for this. |
157 | */ | 157 | */ |
158 | if (rem < HZ/4) /* round down */ | 158 | if (rem < HZ/4) /* round down */ |
159 | j = j - rem; | 159 | j = j - rem; |
160 | else /* round up */ | 160 | else /* round up */ |
161 | j = j - rem + HZ; | 161 | j = j - rem + HZ; |
162 | 162 | ||
163 | /* now that we have rounded, subtract the extra skew again */ | 163 | /* now that we have rounded, subtract the extra skew again */ |
164 | j -= cpu * 3; | 164 | j -= cpu * 3; |
165 | 165 | ||
166 | if (j <= jiffies) /* rounding ate our timeout entirely; */ | 166 | if (j <= jiffies) /* rounding ate our timeout entirely; */ |
167 | return original; | 167 | return original; |
168 | return j; | 168 | return j; |
169 | } | 169 | } |
170 | EXPORT_SYMBOL_GPL(__round_jiffies); | 170 | EXPORT_SYMBOL_GPL(__round_jiffies); |
171 | 171 | ||
172 | /** | 172 | /** |
173 | * __round_jiffies_relative - function to round jiffies to a full second | 173 | * __round_jiffies_relative - function to round jiffies to a full second |
174 | * @j: the time in (relative) jiffies that should be rounded | 174 | * @j: the time in (relative) jiffies that should be rounded |
175 | * @cpu: the processor number on which the timeout will happen | 175 | * @cpu: the processor number on which the timeout will happen |
176 | * | 176 | * |
177 | * __round_jiffies_relative() rounds a time delta in the future (in jiffies) | 177 | * __round_jiffies_relative() rounds a time delta in the future (in jiffies) |
178 | * up or down to (approximately) full seconds. This is useful for timers | 178 | * up or down to (approximately) full seconds. This is useful for timers |
179 | * for which the exact time they fire does not matter too much, as long as | 179 | * for which the exact time they fire does not matter too much, as long as |
180 | * they fire approximately every X seconds. | 180 | * they fire approximately every X seconds. |
181 | * | 181 | * |
182 | * By rounding these timers to whole seconds, all such timers will fire | 182 | * By rounding these timers to whole seconds, all such timers will fire |
183 | * at the same time, rather than at various times spread out. The goal | 183 | * at the same time, rather than at various times spread out. The goal |
184 | * of this is to have the CPU wake up less, which saves power. | 184 | * of this is to have the CPU wake up less, which saves power. |
185 | * | 185 | * |
186 | * The exact rounding is skewed for each processor to avoid all | 186 | * The exact rounding is skewed for each processor to avoid all |
187 | * processors firing at the exact same time, which could lead | 187 | * processors firing at the exact same time, which could lead |
188 | * to lock contention or spurious cache line bouncing. | 188 | * to lock contention or spurious cache line bouncing. |
189 | * | 189 | * |
190 | * The return value is the rounded version of the @j parameter. | 190 | * The return value is the rounded version of the @j parameter. |
191 | */ | 191 | */ |
192 | unsigned long __round_jiffies_relative(unsigned long j, int cpu) | 192 | unsigned long __round_jiffies_relative(unsigned long j, int cpu) |
193 | { | 193 | { |
194 | /* | 194 | /* |
195 | * In theory the following code can skip a jiffy in case jiffies | 195 | * In theory the following code can skip a jiffy in case jiffies |
196 | * increments right between the addition and the later subtraction. | 196 | * increments right between the addition and the later subtraction. |
197 | * However since the entire point of this function is to use approximate | 197 | * However since the entire point of this function is to use approximate |
198 | * timeouts, it's entirely ok to not handle that. | 198 | * timeouts, it's entirely ok to not handle that. |
199 | */ | 199 | */ |
200 | return __round_jiffies(j + jiffies, cpu) - jiffies; | 200 | return __round_jiffies(j + jiffies, cpu) - jiffies; |
201 | } | 201 | } |
202 | EXPORT_SYMBOL_GPL(__round_jiffies_relative); | 202 | EXPORT_SYMBOL_GPL(__round_jiffies_relative); |
203 | 203 | ||
204 | /** | 204 | /** |
205 | * round_jiffies - function to round jiffies to a full second | 205 | * round_jiffies - function to round jiffies to a full second |
206 | * @j: the time in (absolute) jiffies that should be rounded | 206 | * @j: the time in (absolute) jiffies that should be rounded |
207 | * | 207 | * |
208 | * round_jiffies() rounds an absolute time in the future (in jiffies) | 208 | * round_jiffies() rounds an absolute time in the future (in jiffies) |
209 | * up or down to (approximately) full seconds. This is useful for timers | 209 | * up or down to (approximately) full seconds. This is useful for timers |
210 | * for which the exact time they fire does not matter too much, as long as | 210 | * for which the exact time they fire does not matter too much, as long as |
211 | * they fire approximately every X seconds. | 211 | * they fire approximately every X seconds. |
212 | * | 212 | * |
213 | * By rounding these timers to whole seconds, all such timers will fire | 213 | * By rounding these timers to whole seconds, all such timers will fire |
214 | * at the same time, rather than at various times spread out. The goal | 214 | * at the same time, rather than at various times spread out. The goal |
215 | * of this is to have the CPU wake up less, which saves power. | 215 | * of this is to have the CPU wake up less, which saves power. |
216 | * | 216 | * |
217 | * The return value is the rounded version of the @j parameter. | 217 | * The return value is the rounded version of the @j parameter. |
218 | */ | 218 | */ |
219 | unsigned long round_jiffies(unsigned long j) | 219 | unsigned long round_jiffies(unsigned long j) |
220 | { | 220 | { |
221 | return __round_jiffies(j, raw_smp_processor_id()); | 221 | return __round_jiffies(j, raw_smp_processor_id()); |
222 | } | 222 | } |
223 | EXPORT_SYMBOL_GPL(round_jiffies); | 223 | EXPORT_SYMBOL_GPL(round_jiffies); |
224 | 224 | ||
225 | /** | 225 | /** |
226 | * round_jiffies_relative - function to round jiffies to a full second | 226 | * round_jiffies_relative - function to round jiffies to a full second |
227 | * @j: the time in (relative) jiffies that should be rounded | 227 | * @j: the time in (relative) jiffies that should be rounded |
228 | * | 228 | * |
229 | * round_jiffies_relative() rounds a time delta in the future (in jiffies) | 229 | * round_jiffies_relative() rounds a time delta in the future (in jiffies) |
230 | * up or down to (approximately) full seconds. This is useful for timers | 230 | * up or down to (approximately) full seconds. This is useful for timers |
231 | * for which the exact time they fire does not matter too much, as long as | 231 | * for which the exact time they fire does not matter too much, as long as |
232 | * they fire approximately every X seconds. | 232 | * they fire approximately every X seconds. |
233 | * | 233 | * |
234 | * By rounding these timers to whole seconds, all such timers will fire | 234 | * By rounding these timers to whole seconds, all such timers will fire |
235 | * at the same time, rather than at various times spread out. The goal | 235 | * at the same time, rather than at various times spread out. The goal |
236 | * of this is to have the CPU wake up less, which saves power. | 236 | * of this is to have the CPU wake up less, which saves power. |
237 | * | 237 | * |
238 | * The return value is the rounded version of the @j parameter. | 238 | * The return value is the rounded version of the @j parameter. |
239 | */ | 239 | */ |
240 | unsigned long round_jiffies_relative(unsigned long j) | 240 | unsigned long round_jiffies_relative(unsigned long j) |
241 | { | 241 | { |
242 | return __round_jiffies_relative(j, raw_smp_processor_id()); | 242 | return __round_jiffies_relative(j, raw_smp_processor_id()); |
243 | } | 243 | } |
244 | EXPORT_SYMBOL_GPL(round_jiffies_relative); | 244 | EXPORT_SYMBOL_GPL(round_jiffies_relative); |
245 | 245 | ||
246 | 246 | ||
247 | static inline void set_running_timer(struct tvec_base *base, | 247 | static inline void set_running_timer(struct tvec_base *base, |
248 | struct timer_list *timer) | 248 | struct timer_list *timer) |
249 | { | 249 | { |
250 | #ifdef CONFIG_SMP | 250 | #ifdef CONFIG_SMP |
251 | base->running_timer = timer; | 251 | base->running_timer = timer; |
252 | #endif | 252 | #endif |
253 | } | 253 | } |
254 | 254 | ||
255 | static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) | 255 | static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) |
256 | { | 256 | { |
257 | unsigned long expires = timer->expires; | 257 | unsigned long expires = timer->expires; |
258 | unsigned long idx = expires - base->timer_jiffies; | 258 | unsigned long idx = expires - base->timer_jiffies; |
259 | struct list_head *vec; | 259 | struct list_head *vec; |
260 | 260 | ||
261 | if (idx < TVR_SIZE) { | 261 | if (idx < TVR_SIZE) { |
262 | int i = expires & TVR_MASK; | 262 | int i = expires & TVR_MASK; |
263 | vec = base->tv1.vec + i; | 263 | vec = base->tv1.vec + i; |
264 | } else if (idx < 1 << (TVR_BITS + TVN_BITS)) { | 264 | } else if (idx < 1 << (TVR_BITS + TVN_BITS)) { |
265 | int i = (expires >> TVR_BITS) & TVN_MASK; | 265 | int i = (expires >> TVR_BITS) & TVN_MASK; |
266 | vec = base->tv2.vec + i; | 266 | vec = base->tv2.vec + i; |
267 | } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) { | 267 | } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) { |
268 | int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK; | 268 | int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK; |
269 | vec = base->tv3.vec + i; | 269 | vec = base->tv3.vec + i; |
270 | } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) { | 270 | } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) { |
271 | int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK; | 271 | int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK; |
272 | vec = base->tv4.vec + i; | 272 | vec = base->tv4.vec + i; |
273 | } else if ((signed long) idx < 0) { | 273 | } else if ((signed long) idx < 0) { |
274 | /* | 274 | /* |
275 | * Can happen if you add a timer with expires == jiffies, | 275 | * Can happen if you add a timer with expires == jiffies, |
276 | * or you set a timer to go off in the past | 276 | * or you set a timer to go off in the past |
277 | */ | 277 | */ |
278 | vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK); | 278 | vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK); |
279 | } else { | 279 | } else { |
280 | int i; | 280 | int i; |
281 | /* If the timeout is larger than 0xffffffff on 64-bit | 281 | /* If the timeout is larger than 0xffffffff on 64-bit |
282 | * architectures then we use the maximum timeout: | 282 | * architectures then we use the maximum timeout: |
283 | */ | 283 | */ |
284 | if (idx > 0xffffffffUL) { | 284 | if (idx > 0xffffffffUL) { |
285 | idx = 0xffffffffUL; | 285 | idx = 0xffffffffUL; |
286 | expires = idx + base->timer_jiffies; | 286 | expires = idx + base->timer_jiffies; |
287 | } | 287 | } |
288 | i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; | 288 | i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; |
289 | vec = base->tv5.vec + i; | 289 | vec = base->tv5.vec + i; |
290 | } | 290 | } |
291 | /* | 291 | /* |
292 | * Timers are FIFO: | 292 | * Timers are FIFO: |
293 | */ | 293 | */ |
294 | list_add_tail(&timer->entry, vec); | 294 | list_add_tail(&timer->entry, vec); |
295 | } | 295 | } |
296 | 296 | ||
297 | #ifdef CONFIG_TIMER_STATS | 297 | #ifdef CONFIG_TIMER_STATS |
298 | void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr) | 298 | void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr) |
299 | { | 299 | { |
300 | if (timer->start_site) | 300 | if (timer->start_site) |
301 | return; | 301 | return; |
302 | 302 | ||
303 | timer->start_site = addr; | 303 | timer->start_site = addr; |
304 | memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); | 304 | memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); |
305 | timer->start_pid = current->pid; | 305 | timer->start_pid = current->pid; |
306 | } | 306 | } |
307 | 307 | ||
308 | static void timer_stats_account_timer(struct timer_list *timer) | 308 | static void timer_stats_account_timer(struct timer_list *timer) |
309 | { | 309 | { |
310 | unsigned int flag = 0; | 310 | unsigned int flag = 0; |
311 | 311 | ||
312 | if (unlikely(tbase_get_deferrable(timer->base))) | 312 | if (unlikely(tbase_get_deferrable(timer->base))) |
313 | flag |= TIMER_STATS_FLAG_DEFERRABLE; | 313 | flag |= TIMER_STATS_FLAG_DEFERRABLE; |
314 | 314 | ||
315 | timer_stats_update_stats(timer, timer->start_pid, timer->start_site, | 315 | timer_stats_update_stats(timer, timer->start_pid, timer->start_site, |
316 | timer->function, timer->start_comm, flag); | 316 | timer->function, timer->start_comm, flag); |
317 | } | 317 | } |
318 | 318 | ||
319 | #else | 319 | #else |
320 | static void timer_stats_account_timer(struct timer_list *timer) {} | 320 | static void timer_stats_account_timer(struct timer_list *timer) {} |
321 | #endif | 321 | #endif |
322 | 322 | ||
323 | #ifdef CONFIG_DEBUG_OBJECTS_TIMERS | 323 | #ifdef CONFIG_DEBUG_OBJECTS_TIMERS |
324 | 324 | ||
325 | static struct debug_obj_descr timer_debug_descr; | 325 | static struct debug_obj_descr timer_debug_descr; |
326 | 326 | ||
327 | /* | 327 | /* |
328 | * fixup_init is called when: | 328 | * fixup_init is called when: |
329 | * - an active object is initialized | 329 | * - an active object is initialized |
330 | */ | 330 | */ |
331 | static int timer_fixup_init(void *addr, enum debug_obj_state state) | 331 | static int timer_fixup_init(void *addr, enum debug_obj_state state) |
332 | { | 332 | { |
333 | struct timer_list *timer = addr; | 333 | struct timer_list *timer = addr; |
334 | 334 | ||
335 | switch (state) { | 335 | switch (state) { |
336 | case ODEBUG_STATE_ACTIVE: | 336 | case ODEBUG_STATE_ACTIVE: |
337 | del_timer_sync(timer); | 337 | del_timer_sync(timer); |
338 | debug_object_init(timer, &timer_debug_descr); | 338 | debug_object_init(timer, &timer_debug_descr); |
339 | return 1; | 339 | return 1; |
340 | default: | 340 | default: |
341 | return 0; | 341 | return 0; |
342 | } | 342 | } |
343 | } | 343 | } |
344 | 344 | ||
345 | /* | 345 | /* |
346 | * fixup_activate is called when: | 346 | * fixup_activate is called when: |
347 | * - an active object is activated | 347 | * - an active object is activated |
348 | * - an unknown object is activated (might be a statically initialized object) | 348 | * - an unknown object is activated (might be a statically initialized object) |
349 | */ | 349 | */ |
350 | static int timer_fixup_activate(void *addr, enum debug_obj_state state) | 350 | static int timer_fixup_activate(void *addr, enum debug_obj_state state) |
351 | { | 351 | { |
352 | struct timer_list *timer = addr; | 352 | struct timer_list *timer = addr; |
353 | 353 | ||
354 | switch (state) { | 354 | switch (state) { |
355 | 355 | ||
356 | case ODEBUG_STATE_NOTAVAILABLE: | 356 | case ODEBUG_STATE_NOTAVAILABLE: |
357 | /* | 357 | /* |
358 | * This is not really a fixup. The timer was | 358 | * This is not really a fixup. The timer was |
359 | * statically initialized. We just make sure that it | 359 | * statically initialized. We just make sure that it |
360 | * is tracked in the object tracker. | 360 | * is tracked in the object tracker. |
361 | */ | 361 | */ |
362 | if (timer->entry.next == NULL && | 362 | if (timer->entry.next == NULL && |
363 | timer->entry.prev == TIMER_ENTRY_STATIC) { | 363 | timer->entry.prev == TIMER_ENTRY_STATIC) { |
364 | debug_object_init(timer, &timer_debug_descr); | 364 | debug_object_init(timer, &timer_debug_descr); |
365 | debug_object_activate(timer, &timer_debug_descr); | 365 | debug_object_activate(timer, &timer_debug_descr); |
366 | return 0; | 366 | return 0; |
367 | } else { | 367 | } else { |
368 | WARN_ON_ONCE(1); | 368 | WARN_ON_ONCE(1); |
369 | } | 369 | } |
370 | return 0; | 370 | return 0; |
371 | 371 | ||
372 | case ODEBUG_STATE_ACTIVE: | 372 | case ODEBUG_STATE_ACTIVE: |
373 | WARN_ON(1); | 373 | WARN_ON(1); |
374 | 374 | ||
375 | default: | 375 | default: |
376 | return 0; | 376 | return 0; |
377 | } | 377 | } |
378 | } | 378 | } |
379 | 379 | ||
380 | /* | 380 | /* |
381 | * fixup_free is called when: | 381 | * fixup_free is called when: |
382 | * - an active object is freed | 382 | * - an active object is freed |
383 | */ | 383 | */ |
384 | static int timer_fixup_free(void *addr, enum debug_obj_state state) | 384 | static int timer_fixup_free(void *addr, enum debug_obj_state state) |
385 | { | 385 | { |
386 | struct timer_list *timer = addr; | 386 | struct timer_list *timer = addr; |
387 | 387 | ||
388 | switch (state) { | 388 | switch (state) { |
389 | case ODEBUG_STATE_ACTIVE: | 389 | case ODEBUG_STATE_ACTIVE: |
390 | del_timer_sync(timer); | 390 | del_timer_sync(timer); |
391 | debug_object_free(timer, &timer_debug_descr); | 391 | debug_object_free(timer, &timer_debug_descr); |
392 | return 1; | 392 | return 1; |
393 | default: | 393 | default: |
394 | return 0; | 394 | return 0; |
395 | } | 395 | } |
396 | } | 396 | } |
397 | 397 | ||
398 | static struct debug_obj_descr timer_debug_descr = { | 398 | static struct debug_obj_descr timer_debug_descr = { |
399 | .name = "timer_list", | 399 | .name = "timer_list", |
400 | .fixup_init = timer_fixup_init, | 400 | .fixup_init = timer_fixup_init, |
401 | .fixup_activate = timer_fixup_activate, | 401 | .fixup_activate = timer_fixup_activate, |
402 | .fixup_free = timer_fixup_free, | 402 | .fixup_free = timer_fixup_free, |
403 | }; | 403 | }; |
404 | 404 | ||
405 | static inline void debug_timer_init(struct timer_list *timer) | 405 | static inline void debug_timer_init(struct timer_list *timer) |
406 | { | 406 | { |
407 | debug_object_init(timer, &timer_debug_descr); | 407 | debug_object_init(timer, &timer_debug_descr); |
408 | } | 408 | } |
409 | 409 | ||
410 | static inline void debug_timer_activate(struct timer_list *timer) | 410 | static inline void debug_timer_activate(struct timer_list *timer) |
411 | { | 411 | { |
412 | debug_object_activate(timer, &timer_debug_descr); | 412 | debug_object_activate(timer, &timer_debug_descr); |
413 | } | 413 | } |
414 | 414 | ||
415 | static inline void debug_timer_deactivate(struct timer_list *timer) | 415 | static inline void debug_timer_deactivate(struct timer_list *timer) |
416 | { | 416 | { |
417 | debug_object_deactivate(timer, &timer_debug_descr); | 417 | debug_object_deactivate(timer, &timer_debug_descr); |
418 | } | 418 | } |
419 | 419 | ||
420 | static inline void debug_timer_free(struct timer_list *timer) | 420 | static inline void debug_timer_free(struct timer_list *timer) |
421 | { | 421 | { |
422 | debug_object_free(timer, &timer_debug_descr); | 422 | debug_object_free(timer, &timer_debug_descr); |
423 | } | 423 | } |
424 | 424 | ||
425 | static void __init_timer(struct timer_list *timer); | 425 | static void __init_timer(struct timer_list *timer); |
426 | 426 | ||
427 | void init_timer_on_stack(struct timer_list *timer) | 427 | void init_timer_on_stack(struct timer_list *timer) |
428 | { | 428 | { |
429 | debug_object_init_on_stack(timer, &timer_debug_descr); | 429 | debug_object_init_on_stack(timer, &timer_debug_descr); |
430 | __init_timer(timer); | 430 | __init_timer(timer); |
431 | } | 431 | } |
432 | EXPORT_SYMBOL_GPL(init_timer_on_stack); | 432 | EXPORT_SYMBOL_GPL(init_timer_on_stack); |
433 | 433 | ||
434 | void destroy_timer_on_stack(struct timer_list *timer) | 434 | void destroy_timer_on_stack(struct timer_list *timer) |
435 | { | 435 | { |
436 | debug_object_free(timer, &timer_debug_descr); | 436 | debug_object_free(timer, &timer_debug_descr); |
437 | } | 437 | } |
438 | EXPORT_SYMBOL_GPL(destroy_timer_on_stack); | 438 | EXPORT_SYMBOL_GPL(destroy_timer_on_stack); |
439 | 439 | ||
440 | #else | 440 | #else |
441 | static inline void debug_timer_init(struct timer_list *timer) { } | 441 | static inline void debug_timer_init(struct timer_list *timer) { } |
442 | static inline void debug_timer_activate(struct timer_list *timer) { } | 442 | static inline void debug_timer_activate(struct timer_list *timer) { } |
443 | static inline void debug_timer_deactivate(struct timer_list *timer) { } | 443 | static inline void debug_timer_deactivate(struct timer_list *timer) { } |
444 | #endif | 444 | #endif |
445 | 445 | ||
446 | static void __init_timer(struct timer_list *timer) | 446 | static void __init_timer(struct timer_list *timer) |
447 | { | 447 | { |
448 | timer->entry.next = NULL; | 448 | timer->entry.next = NULL; |
449 | timer->base = __raw_get_cpu_var(tvec_bases); | 449 | timer->base = __raw_get_cpu_var(tvec_bases); |
450 | #ifdef CONFIG_TIMER_STATS | 450 | #ifdef CONFIG_TIMER_STATS |
451 | timer->start_site = NULL; | 451 | timer->start_site = NULL; |
452 | timer->start_pid = -1; | 452 | timer->start_pid = -1; |
453 | memset(timer->start_comm, 0, TASK_COMM_LEN); | 453 | memset(timer->start_comm, 0, TASK_COMM_LEN); |
454 | #endif | 454 | #endif |
455 | } | 455 | } |
456 | 456 | ||
457 | /** | 457 | /** |
458 | * init_timer - initialize a timer. | 458 | * init_timer - initialize a timer. |
459 | * @timer: the timer to be initialized | 459 | * @timer: the timer to be initialized |
460 | * | 460 | * |
461 | * init_timer() must be done to a timer prior calling *any* of the | 461 | * init_timer() must be done to a timer prior calling *any* of the |
462 | * other timer functions. | 462 | * other timer functions. |
463 | */ | 463 | */ |
464 | void init_timer(struct timer_list *timer) | 464 | void init_timer(struct timer_list *timer) |
465 | { | 465 | { |
466 | debug_timer_init(timer); | 466 | debug_timer_init(timer); |
467 | __init_timer(timer); | 467 | __init_timer(timer); |
468 | } | 468 | } |
469 | EXPORT_SYMBOL(init_timer); | 469 | EXPORT_SYMBOL(init_timer); |
470 | 470 | ||
471 | void init_timer_deferrable(struct timer_list *timer) | 471 | void init_timer_deferrable(struct timer_list *timer) |
472 | { | 472 | { |
473 | init_timer(timer); | 473 | init_timer(timer); |
474 | timer_set_deferrable(timer); | 474 | timer_set_deferrable(timer); |
475 | } | 475 | } |
476 | EXPORT_SYMBOL(init_timer_deferrable); | 476 | EXPORT_SYMBOL(init_timer_deferrable); |
477 | 477 | ||
478 | static inline void detach_timer(struct timer_list *timer, | 478 | static inline void detach_timer(struct timer_list *timer, |
479 | int clear_pending) | 479 | int clear_pending) |
480 | { | 480 | { |
481 | struct list_head *entry = &timer->entry; | 481 | struct list_head *entry = &timer->entry; |
482 | 482 | ||
483 | debug_timer_deactivate(timer); | 483 | debug_timer_deactivate(timer); |
484 | 484 | ||
485 | __list_del(entry->prev, entry->next); | 485 | __list_del(entry->prev, entry->next); |
486 | if (clear_pending) | 486 | if (clear_pending) |
487 | entry->next = NULL; | 487 | entry->next = NULL; |
488 | entry->prev = LIST_POISON2; | 488 | entry->prev = LIST_POISON2; |
489 | } | 489 | } |
490 | 490 | ||
491 | /* | 491 | /* |
492 | * We are using hashed locking: holding per_cpu(tvec_bases).lock | 492 | * We are using hashed locking: holding per_cpu(tvec_bases).lock |
493 | * means that all timers which are tied to this base via timer->base are | 493 | * means that all timers which are tied to this base via timer->base are |
494 | * locked, and the base itself is locked too. | 494 | * locked, and the base itself is locked too. |
495 | * | 495 | * |
496 | * So __run_timers/migrate_timers can safely modify all timers which could | 496 | * So __run_timers/migrate_timers can safely modify all timers which could |
497 | * be found on ->tvX lists. | 497 | * be found on ->tvX lists. |
498 | * | 498 | * |
499 | * When the timer's base is locked, and the timer removed from list, it is | 499 | * When the timer's base is locked, and the timer removed from list, it is |
500 | * possible to set timer->base = NULL and drop the lock: the timer remains | 500 | * possible to set timer->base = NULL and drop the lock: the timer remains |
501 | * locked. | 501 | * locked. |
502 | */ | 502 | */ |
503 | static struct tvec_base *lock_timer_base(struct timer_list *timer, | 503 | static struct tvec_base *lock_timer_base(struct timer_list *timer, |
504 | unsigned long *flags) | 504 | unsigned long *flags) |
505 | __acquires(timer->base->lock) | 505 | __acquires(timer->base->lock) |
506 | { | 506 | { |
507 | struct tvec_base *base; | 507 | struct tvec_base *base; |
508 | 508 | ||
509 | for (;;) { | 509 | for (;;) { |
510 | struct tvec_base *prelock_base = timer->base; | 510 | struct tvec_base *prelock_base = timer->base; |
511 | base = tbase_get_base(prelock_base); | 511 | base = tbase_get_base(prelock_base); |
512 | if (likely(base != NULL)) { | 512 | if (likely(base != NULL)) { |
513 | spin_lock_irqsave(&base->lock, *flags); | 513 | spin_lock_irqsave(&base->lock, *flags); |
514 | if (likely(prelock_base == timer->base)) | 514 | if (likely(prelock_base == timer->base)) |
515 | return base; | 515 | return base; |
516 | /* The timer has migrated to another CPU */ | 516 | /* The timer has migrated to another CPU */ |
517 | spin_unlock_irqrestore(&base->lock, *flags); | 517 | spin_unlock_irqrestore(&base->lock, *flags); |
518 | } | 518 | } |
519 | cpu_relax(); | 519 | cpu_relax(); |
520 | } | 520 | } |
521 | } | 521 | } |
522 | 522 | ||
523 | int __mod_timer(struct timer_list *timer, unsigned long expires) | 523 | int __mod_timer(struct timer_list *timer, unsigned long expires) |
524 | { | 524 | { |
525 | struct tvec_base *base, *new_base; | 525 | struct tvec_base *base, *new_base; |
526 | unsigned long flags; | 526 | unsigned long flags; |
527 | int ret = 0; | 527 | int ret = 0; |
528 | 528 | ||
529 | timer_stats_timer_set_start_info(timer); | 529 | timer_stats_timer_set_start_info(timer); |
530 | BUG_ON(!timer->function); | 530 | BUG_ON(!timer->function); |
531 | 531 | ||
532 | base = lock_timer_base(timer, &flags); | 532 | base = lock_timer_base(timer, &flags); |
533 | 533 | ||
534 | if (timer_pending(timer)) { | 534 | if (timer_pending(timer)) { |
535 | detach_timer(timer, 0); | 535 | detach_timer(timer, 0); |
536 | ret = 1; | 536 | ret = 1; |
537 | } | 537 | } |
538 | 538 | ||
539 | debug_timer_activate(timer); | 539 | debug_timer_activate(timer); |
540 | 540 | ||
541 | new_base = __get_cpu_var(tvec_bases); | 541 | new_base = __get_cpu_var(tvec_bases); |
542 | 542 | ||
543 | if (base != new_base) { | 543 | if (base != new_base) { |
544 | /* | 544 | /* |
545 | * We are trying to schedule the timer on the local CPU. | 545 | * We are trying to schedule the timer on the local CPU. |
546 | * However we can't change timer's base while it is running, | 546 | * However we can't change timer's base while it is running, |
547 | * otherwise del_timer_sync() can't detect that the timer's | 547 | * otherwise del_timer_sync() can't detect that the timer's |
548 | * handler yet has not finished. This also guarantees that | 548 | * handler yet has not finished. This also guarantees that |
549 | * the timer is serialized wrt itself. | 549 | * the timer is serialized wrt itself. |
550 | */ | 550 | */ |
551 | if (likely(base->running_timer != timer)) { | 551 | if (likely(base->running_timer != timer)) { |
552 | /* See the comment in lock_timer_base() */ | 552 | /* See the comment in lock_timer_base() */ |
553 | timer_set_base(timer, NULL); | 553 | timer_set_base(timer, NULL); |
554 | spin_unlock(&base->lock); | 554 | spin_unlock(&base->lock); |
555 | base = new_base; | 555 | base = new_base; |
556 | spin_lock(&base->lock); | 556 | spin_lock(&base->lock); |
557 | timer_set_base(timer, base); | 557 | timer_set_base(timer, base); |
558 | } | 558 | } |
559 | } | 559 | } |
560 | 560 | ||
561 | timer->expires = expires; | 561 | timer->expires = expires; |
562 | internal_add_timer(base, timer); | 562 | internal_add_timer(base, timer); |
563 | spin_unlock_irqrestore(&base->lock, flags); | 563 | spin_unlock_irqrestore(&base->lock, flags); |
564 | 564 | ||
565 | return ret; | 565 | return ret; |
566 | } | 566 | } |
567 | 567 | ||
568 | EXPORT_SYMBOL(__mod_timer); | 568 | EXPORT_SYMBOL(__mod_timer); |
569 | 569 | ||
570 | /** | 570 | /** |
571 | * add_timer_on - start a timer on a particular CPU | 571 | * add_timer_on - start a timer on a particular CPU |
572 | * @timer: the timer to be added | 572 | * @timer: the timer to be added |
573 | * @cpu: the CPU to start it on | 573 | * @cpu: the CPU to start it on |
574 | * | 574 | * |
575 | * This is not very scalable on SMP. Double adds are not possible. | 575 | * This is not very scalable on SMP. Double adds are not possible. |
576 | */ | 576 | */ |
577 | void add_timer_on(struct timer_list *timer, int cpu) | 577 | void add_timer_on(struct timer_list *timer, int cpu) |
578 | { | 578 | { |
579 | struct tvec_base *base = per_cpu(tvec_bases, cpu); | 579 | struct tvec_base *base = per_cpu(tvec_bases, cpu); |
580 | unsigned long flags; | 580 | unsigned long flags; |
581 | 581 | ||
582 | timer_stats_timer_set_start_info(timer); | 582 | timer_stats_timer_set_start_info(timer); |
583 | BUG_ON(timer_pending(timer) || !timer->function); | 583 | BUG_ON(timer_pending(timer) || !timer->function); |
584 | spin_lock_irqsave(&base->lock, flags); | 584 | spin_lock_irqsave(&base->lock, flags); |
585 | timer_set_base(timer, base); | 585 | timer_set_base(timer, base); |
586 | debug_timer_activate(timer); | 586 | debug_timer_activate(timer); |
587 | internal_add_timer(base, timer); | 587 | internal_add_timer(base, timer); |
588 | /* | 588 | /* |
589 | * Check whether the other CPU is idle and needs to be | 589 | * Check whether the other CPU is idle and needs to be |
590 | * triggered to reevaluate the timer wheel when nohz is | 590 | * triggered to reevaluate the timer wheel when nohz is |
591 | * active. We are protected against the other CPU fiddling | 591 | * active. We are protected against the other CPU fiddling |
592 | * with the timer by holding the timer base lock. This also | 592 | * with the timer by holding the timer base lock. This also |
593 | * makes sure that a CPU on the way to idle can not evaluate | 593 | * makes sure that a CPU on the way to idle can not evaluate |
594 | * the timer wheel. | 594 | * the timer wheel. |
595 | */ | 595 | */ |
596 | wake_up_idle_cpu(cpu); | 596 | wake_up_idle_cpu(cpu); |
597 | spin_unlock_irqrestore(&base->lock, flags); | 597 | spin_unlock_irqrestore(&base->lock, flags); |
598 | } | 598 | } |
599 | 599 | ||
600 | /** | 600 | /** |
601 | * mod_timer - modify a timer's timeout | 601 | * mod_timer - modify a timer's timeout |
602 | * @timer: the timer to be modified | 602 | * @timer: the timer to be modified |
603 | * @expires: new timeout in jiffies | 603 | * @expires: new timeout in jiffies |
604 | * | 604 | * |
605 | * mod_timer() is a more efficient way to update the expire field of an | 605 | * mod_timer() is a more efficient way to update the expire field of an |
606 | * active timer (if the timer is inactive it will be activated) | 606 | * active timer (if the timer is inactive it will be activated) |
607 | * | 607 | * |
608 | * mod_timer(timer, expires) is equivalent to: | 608 | * mod_timer(timer, expires) is equivalent to: |
609 | * | 609 | * |
610 | * del_timer(timer); timer->expires = expires; add_timer(timer); | 610 | * del_timer(timer); timer->expires = expires; add_timer(timer); |
611 | * | 611 | * |
612 | * Note that if there are multiple unserialized concurrent users of the | 612 | * Note that if there are multiple unserialized concurrent users of the |
613 | * same timer, then mod_timer() is the only safe way to modify the timeout, | 613 | * same timer, then mod_timer() is the only safe way to modify the timeout, |
614 | * since add_timer() cannot modify an already running timer. | 614 | * since add_timer() cannot modify an already running timer. |
615 | * | 615 | * |
616 | * The function returns whether it has modified a pending timer or not. | 616 | * The function returns whether it has modified a pending timer or not. |
617 | * (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an | 617 | * (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an |
618 | * active timer returns 1.) | 618 | * active timer returns 1.) |
619 | */ | 619 | */ |
620 | int mod_timer(struct timer_list *timer, unsigned long expires) | 620 | int mod_timer(struct timer_list *timer, unsigned long expires) |
621 | { | 621 | { |
622 | BUG_ON(!timer->function); | 622 | BUG_ON(!timer->function); |
623 | 623 | ||
624 | timer_stats_timer_set_start_info(timer); | 624 | timer_stats_timer_set_start_info(timer); |
625 | /* | 625 | /* |
626 | * This is a common optimization triggered by the | 626 | * This is a common optimization triggered by the |
627 | * networking code - if the timer is re-modified | 627 | * networking code - if the timer is re-modified |
628 | * to be the same thing then just return: | 628 | * to be the same thing then just return: |
629 | */ | 629 | */ |
630 | if (timer->expires == expires && timer_pending(timer)) | 630 | if (timer->expires == expires && timer_pending(timer)) |
631 | return 1; | 631 | return 1; |
632 | 632 | ||
633 | return __mod_timer(timer, expires); | 633 | return __mod_timer(timer, expires); |
634 | } | 634 | } |
635 | 635 | ||
636 | EXPORT_SYMBOL(mod_timer); | 636 | EXPORT_SYMBOL(mod_timer); |
637 | 637 | ||
638 | /** | 638 | /** |
639 | * del_timer - deactive a timer. | 639 | * del_timer - deactive a timer. |
640 | * @timer: the timer to be deactivated | 640 | * @timer: the timer to be deactivated |
641 | * | 641 | * |
642 | * del_timer() deactivates a timer - this works on both active and inactive | 642 | * del_timer() deactivates a timer - this works on both active and inactive |
643 | * timers. | 643 | * timers. |
644 | * | 644 | * |
645 | * The function returns whether it has deactivated a pending timer or not. | 645 | * The function returns whether it has deactivated a pending timer or not. |
646 | * (ie. del_timer() of an inactive timer returns 0, del_timer() of an | 646 | * (ie. del_timer() of an inactive timer returns 0, del_timer() of an |
647 | * active timer returns 1.) | 647 | * active timer returns 1.) |
648 | */ | 648 | */ |
649 | int del_timer(struct timer_list *timer) | 649 | int del_timer(struct timer_list *timer) |
650 | { | 650 | { |
651 | struct tvec_base *base; | 651 | struct tvec_base *base; |
652 | unsigned long flags; | 652 | unsigned long flags; |
653 | int ret = 0; | 653 | int ret = 0; |
654 | 654 | ||
655 | timer_stats_timer_clear_start_info(timer); | 655 | timer_stats_timer_clear_start_info(timer); |
656 | if (timer_pending(timer)) { | 656 | if (timer_pending(timer)) { |
657 | base = lock_timer_base(timer, &flags); | 657 | base = lock_timer_base(timer, &flags); |
658 | if (timer_pending(timer)) { | 658 | if (timer_pending(timer)) { |
659 | detach_timer(timer, 1); | 659 | detach_timer(timer, 1); |
660 | ret = 1; | 660 | ret = 1; |
661 | } | 661 | } |
662 | spin_unlock_irqrestore(&base->lock, flags); | 662 | spin_unlock_irqrestore(&base->lock, flags); |
663 | } | 663 | } |
664 | 664 | ||
665 | return ret; | 665 | return ret; |
666 | } | 666 | } |
667 | 667 | ||
668 | EXPORT_SYMBOL(del_timer); | 668 | EXPORT_SYMBOL(del_timer); |
669 | 669 | ||
670 | #ifdef CONFIG_SMP | 670 | #ifdef CONFIG_SMP |
671 | /** | 671 | /** |
672 | * try_to_del_timer_sync - Try to deactivate a timer | 672 | * try_to_del_timer_sync - Try to deactivate a timer |
673 | * @timer: timer do del | 673 | * @timer: timer do del |
674 | * | 674 | * |
675 | * This function tries to deactivate a timer. Upon successful (ret >= 0) | 675 | * This function tries to deactivate a timer. Upon successful (ret >= 0) |
676 | * exit the timer is not queued and the handler is not running on any CPU. | 676 | * exit the timer is not queued and the handler is not running on any CPU. |
677 | * | 677 | * |
678 | * It must not be called from interrupt contexts. | 678 | * It must not be called from interrupt contexts. |
679 | */ | 679 | */ |
680 | int try_to_del_timer_sync(struct timer_list *timer) | 680 | int try_to_del_timer_sync(struct timer_list *timer) |
681 | { | 681 | { |
682 | struct tvec_base *base; | 682 | struct tvec_base *base; |
683 | unsigned long flags; | 683 | unsigned long flags; |
684 | int ret = -1; | 684 | int ret = -1; |
685 | 685 | ||
686 | base = lock_timer_base(timer, &flags); | 686 | base = lock_timer_base(timer, &flags); |
687 | 687 | ||
688 | if (base->running_timer == timer) | 688 | if (base->running_timer == timer) |
689 | goto out; | 689 | goto out; |
690 | 690 | ||
691 | ret = 0; | 691 | ret = 0; |
692 | if (timer_pending(timer)) { | 692 | if (timer_pending(timer)) { |
693 | detach_timer(timer, 1); | 693 | detach_timer(timer, 1); |
694 | ret = 1; | 694 | ret = 1; |
695 | } | 695 | } |
696 | out: | 696 | out: |
697 | spin_unlock_irqrestore(&base->lock, flags); | 697 | spin_unlock_irqrestore(&base->lock, flags); |
698 | 698 | ||
699 | return ret; | 699 | return ret; |
700 | } | 700 | } |
701 | 701 | ||
702 | EXPORT_SYMBOL(try_to_del_timer_sync); | 702 | EXPORT_SYMBOL(try_to_del_timer_sync); |
703 | 703 | ||
704 | /** | 704 | /** |
705 | * del_timer_sync - deactivate a timer and wait for the handler to finish. | 705 | * del_timer_sync - deactivate a timer and wait for the handler to finish. |
706 | * @timer: the timer to be deactivated | 706 | * @timer: the timer to be deactivated |
707 | * | 707 | * |
708 | * This function only differs from del_timer() on SMP: besides deactivating | 708 | * This function only differs from del_timer() on SMP: besides deactivating |
709 | * the timer it also makes sure the handler has finished executing on other | 709 | * the timer it also makes sure the handler has finished executing on other |
710 | * CPUs. | 710 | * CPUs. |
711 | * | 711 | * |
712 | * Synchronization rules: Callers must prevent restarting of the timer, | 712 | * Synchronization rules: Callers must prevent restarting of the timer, |
713 | * otherwise this function is meaningless. It must not be called from | 713 | * otherwise this function is meaningless. It must not be called from |
714 | * interrupt contexts. The caller must not hold locks which would prevent | 714 | * interrupt contexts. The caller must not hold locks which would prevent |
715 | * completion of the timer's handler. The timer's handler must not call | 715 | * completion of the timer's handler. The timer's handler must not call |
716 | * add_timer_on(). Upon exit the timer is not queued and the handler is | 716 | * add_timer_on(). Upon exit the timer is not queued and the handler is |
717 | * not running on any CPU. | 717 | * not running on any CPU. |
718 | * | 718 | * |
719 | * The function returns whether it has deactivated a pending timer or not. | 719 | * The function returns whether it has deactivated a pending timer or not. |
720 | */ | 720 | */ |
721 | int del_timer_sync(struct timer_list *timer) | 721 | int del_timer_sync(struct timer_list *timer) |
722 | { | 722 | { |
723 | for (;;) { | 723 | for (;;) { |
724 | int ret = try_to_del_timer_sync(timer); | 724 | int ret = try_to_del_timer_sync(timer); |
725 | if (ret >= 0) | 725 | if (ret >= 0) |
726 | return ret; | 726 | return ret; |
727 | cpu_relax(); | 727 | cpu_relax(); |
728 | } | 728 | } |
729 | } | 729 | } |
730 | 730 | ||
731 | EXPORT_SYMBOL(del_timer_sync); | 731 | EXPORT_SYMBOL(del_timer_sync); |
732 | #endif | 732 | #endif |
733 | 733 | ||
734 | static int cascade(struct tvec_base *base, struct tvec *tv, int index) | 734 | static int cascade(struct tvec_base *base, struct tvec *tv, int index) |
735 | { | 735 | { |
736 | /* cascade all the timers from tv up one level */ | 736 | /* cascade all the timers from tv up one level */ |
737 | struct timer_list *timer, *tmp; | 737 | struct timer_list *timer, *tmp; |
738 | struct list_head tv_list; | 738 | struct list_head tv_list; |
739 | 739 | ||
740 | list_replace_init(tv->vec + index, &tv_list); | 740 | list_replace_init(tv->vec + index, &tv_list); |
741 | 741 | ||
742 | /* | 742 | /* |
743 | * We are removing _all_ timers from the list, so we | 743 | * We are removing _all_ timers from the list, so we |
744 | * don't have to detach them individually. | 744 | * don't have to detach them individually. |
745 | */ | 745 | */ |
746 | list_for_each_entry_safe(timer, tmp, &tv_list, entry) { | 746 | list_for_each_entry_safe(timer, tmp, &tv_list, entry) { |
747 | BUG_ON(tbase_get_base(timer->base) != base); | 747 | BUG_ON(tbase_get_base(timer->base) != base); |
748 | internal_add_timer(base, timer); | 748 | internal_add_timer(base, timer); |
749 | } | 749 | } |
750 | 750 | ||
751 | return index; | 751 | return index; |
752 | } | 752 | } |
753 | 753 | ||
754 | #define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) | 754 | #define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) |
755 | 755 | ||
756 | /** | 756 | /** |
757 | * __run_timers - run all expired timers (if any) on this CPU. | 757 | * __run_timers - run all expired timers (if any) on this CPU. |
758 | * @base: the timer vector to be processed. | 758 | * @base: the timer vector to be processed. |
759 | * | 759 | * |
760 | * This function cascades all vectors and executes all expired timer | 760 | * This function cascades all vectors and executes all expired timer |
761 | * vectors. | 761 | * vectors. |
762 | */ | 762 | */ |
763 | static inline void __run_timers(struct tvec_base *base) | 763 | static inline void __run_timers(struct tvec_base *base) |
764 | { | 764 | { |
765 | struct timer_list *timer; | 765 | struct timer_list *timer; |
766 | 766 | ||
767 | spin_lock_irq(&base->lock); | 767 | spin_lock_irq(&base->lock); |
768 | while (time_after_eq(jiffies, base->timer_jiffies)) { | 768 | while (time_after_eq(jiffies, base->timer_jiffies)) { |
769 | struct list_head work_list; | 769 | struct list_head work_list; |
770 | struct list_head *head = &work_list; | 770 | struct list_head *head = &work_list; |
771 | int index = base->timer_jiffies & TVR_MASK; | 771 | int index = base->timer_jiffies & TVR_MASK; |
772 | 772 | ||
773 | /* | 773 | /* |
774 | * Cascade timers: | 774 | * Cascade timers: |
775 | */ | 775 | */ |
776 | if (!index && | 776 | if (!index && |
777 | (!cascade(base, &base->tv2, INDEX(0))) && | 777 | (!cascade(base, &base->tv2, INDEX(0))) && |
778 | (!cascade(base, &base->tv3, INDEX(1))) && | 778 | (!cascade(base, &base->tv3, INDEX(1))) && |
779 | !cascade(base, &base->tv4, INDEX(2))) | 779 | !cascade(base, &base->tv4, INDEX(2))) |
780 | cascade(base, &base->tv5, INDEX(3)); | 780 | cascade(base, &base->tv5, INDEX(3)); |
781 | ++base->timer_jiffies; | 781 | ++base->timer_jiffies; |
782 | list_replace_init(base->tv1.vec + index, &work_list); | 782 | list_replace_init(base->tv1.vec + index, &work_list); |
783 | while (!list_empty(head)) { | 783 | while (!list_empty(head)) { |
784 | void (*fn)(unsigned long); | 784 | void (*fn)(unsigned long); |
785 | unsigned long data; | 785 | unsigned long data; |
786 | 786 | ||
787 | timer = list_first_entry(head, struct timer_list,entry); | 787 | timer = list_first_entry(head, struct timer_list,entry); |
788 | fn = timer->function; | 788 | fn = timer->function; |
789 | data = timer->data; | 789 | data = timer->data; |
790 | 790 | ||
791 | timer_stats_account_timer(timer); | 791 | timer_stats_account_timer(timer); |
792 | 792 | ||
793 | set_running_timer(base, timer); | 793 | set_running_timer(base, timer); |
794 | detach_timer(timer, 1); | 794 | detach_timer(timer, 1); |
795 | spin_unlock_irq(&base->lock); | 795 | spin_unlock_irq(&base->lock); |
796 | { | 796 | { |
797 | int preempt_count = preempt_count(); | 797 | int preempt_count = preempt_count(); |
798 | fn(data); | 798 | fn(data); |
799 | if (preempt_count != preempt_count()) { | 799 | if (preempt_count != preempt_count()) { |
800 | printk(KERN_ERR "huh, entered %p " | 800 | printk(KERN_ERR "huh, entered %p " |
801 | "with preempt_count %08x, exited" | 801 | "with preempt_count %08x, exited" |
802 | " with %08x?\n", | 802 | " with %08x?\n", |
803 | fn, preempt_count, | 803 | fn, preempt_count, |
804 | preempt_count()); | 804 | preempt_count()); |
805 | BUG(); | 805 | BUG(); |
806 | } | 806 | } |
807 | } | 807 | } |
808 | spin_lock_irq(&base->lock); | 808 | spin_lock_irq(&base->lock); |
809 | } | 809 | } |
810 | } | 810 | } |
811 | set_running_timer(base, NULL); | 811 | set_running_timer(base, NULL); |
812 | spin_unlock_irq(&base->lock); | 812 | spin_unlock_irq(&base->lock); |
813 | } | 813 | } |
814 | 814 | ||
815 | #ifdef CONFIG_NO_HZ | 815 | #ifdef CONFIG_NO_HZ |
816 | /* | 816 | /* |
817 | * Find out when the next timer event is due to happen. This | 817 | * Find out when the next timer event is due to happen. This |
818 | * is used on S/390 to stop all activity when a cpus is idle. | 818 | * is used on S/390 to stop all activity when a cpus is idle. |
819 | * This functions needs to be called disabled. | 819 | * This functions needs to be called disabled. |
820 | */ | 820 | */ |
821 | static unsigned long __next_timer_interrupt(struct tvec_base *base) | 821 | static unsigned long __next_timer_interrupt(struct tvec_base *base) |
822 | { | 822 | { |
823 | unsigned long timer_jiffies = base->timer_jiffies; | 823 | unsigned long timer_jiffies = base->timer_jiffies; |
824 | unsigned long expires = timer_jiffies + NEXT_TIMER_MAX_DELTA; | 824 | unsigned long expires = timer_jiffies + NEXT_TIMER_MAX_DELTA; |
825 | int index, slot, array, found = 0; | 825 | int index, slot, array, found = 0; |
826 | struct timer_list *nte; | 826 | struct timer_list *nte; |
827 | struct tvec *varray[4]; | 827 | struct tvec *varray[4]; |
828 | 828 | ||
829 | /* Look for timer events in tv1. */ | 829 | /* Look for timer events in tv1. */ |
830 | index = slot = timer_jiffies & TVR_MASK; | 830 | index = slot = timer_jiffies & TVR_MASK; |
831 | do { | 831 | do { |
832 | list_for_each_entry(nte, base->tv1.vec + slot, entry) { | 832 | list_for_each_entry(nte, base->tv1.vec + slot, entry) { |
833 | if (tbase_get_deferrable(nte->base)) | 833 | if (tbase_get_deferrable(nte->base)) |
834 | continue; | 834 | continue; |
835 | 835 | ||
836 | found = 1; | 836 | found = 1; |
837 | expires = nte->expires; | 837 | expires = nte->expires; |
838 | /* Look at the cascade bucket(s)? */ | 838 | /* Look at the cascade bucket(s)? */ |
839 | if (!index || slot < index) | 839 | if (!index || slot < index) |
840 | goto cascade; | 840 | goto cascade; |
841 | return expires; | 841 | return expires; |
842 | } | 842 | } |
843 | slot = (slot + 1) & TVR_MASK; | 843 | slot = (slot + 1) & TVR_MASK; |
844 | } while (slot != index); | 844 | } while (slot != index); |
845 | 845 | ||
846 | cascade: | 846 | cascade: |
847 | /* Calculate the next cascade event */ | 847 | /* Calculate the next cascade event */ |
848 | if (index) | 848 | if (index) |
849 | timer_jiffies += TVR_SIZE - index; | 849 | timer_jiffies += TVR_SIZE - index; |
850 | timer_jiffies >>= TVR_BITS; | 850 | timer_jiffies >>= TVR_BITS; |
851 | 851 | ||
852 | /* Check tv2-tv5. */ | 852 | /* Check tv2-tv5. */ |
853 | varray[0] = &base->tv2; | 853 | varray[0] = &base->tv2; |
854 | varray[1] = &base->tv3; | 854 | varray[1] = &base->tv3; |
855 | varray[2] = &base->tv4; | 855 | varray[2] = &base->tv4; |
856 | varray[3] = &base->tv5; | 856 | varray[3] = &base->tv5; |
857 | 857 | ||
858 | for (array = 0; array < 4; array++) { | 858 | for (array = 0; array < 4; array++) { |
859 | struct tvec *varp = varray[array]; | 859 | struct tvec *varp = varray[array]; |
860 | 860 | ||
861 | index = slot = timer_jiffies & TVN_MASK; | 861 | index = slot = timer_jiffies & TVN_MASK; |
862 | do { | 862 | do { |
863 | list_for_each_entry(nte, varp->vec + slot, entry) { | 863 | list_for_each_entry(nte, varp->vec + slot, entry) { |
864 | found = 1; | 864 | found = 1; |
865 | if (time_before(nte->expires, expires)) | 865 | if (time_before(nte->expires, expires)) |
866 | expires = nte->expires; | 866 | expires = nte->expires; |
867 | } | 867 | } |
868 | /* | 868 | /* |
869 | * Do we still search for the first timer or are | 869 | * Do we still search for the first timer or are |
870 | * we looking up the cascade buckets ? | 870 | * we looking up the cascade buckets ? |
871 | */ | 871 | */ |
872 | if (found) { | 872 | if (found) { |
873 | /* Look at the cascade bucket(s)? */ | 873 | /* Look at the cascade bucket(s)? */ |
874 | if (!index || slot < index) | 874 | if (!index || slot < index) |
875 | break; | 875 | break; |
876 | return expires; | 876 | return expires; |
877 | } | 877 | } |
878 | slot = (slot + 1) & TVN_MASK; | 878 | slot = (slot + 1) & TVN_MASK; |
879 | } while (slot != index); | 879 | } while (slot != index); |
880 | 880 | ||
881 | if (index) | 881 | if (index) |
882 | timer_jiffies += TVN_SIZE - index; | 882 | timer_jiffies += TVN_SIZE - index; |
883 | timer_jiffies >>= TVN_BITS; | 883 | timer_jiffies >>= TVN_BITS; |
884 | } | 884 | } |
885 | return expires; | 885 | return expires; |
886 | } | 886 | } |
887 | 887 | ||
888 | /* | 888 | /* |
889 | * Check, if the next hrtimer event is before the next timer wheel | 889 | * Check, if the next hrtimer event is before the next timer wheel |
890 | * event: | 890 | * event: |
891 | */ | 891 | */ |
892 | static unsigned long cmp_next_hrtimer_event(unsigned long now, | 892 | static unsigned long cmp_next_hrtimer_event(unsigned long now, |
893 | unsigned long expires) | 893 | unsigned long expires) |
894 | { | 894 | { |
895 | ktime_t hr_delta = hrtimer_get_next_event(); | 895 | ktime_t hr_delta = hrtimer_get_next_event(); |
896 | struct timespec tsdelta; | 896 | struct timespec tsdelta; |
897 | unsigned long delta; | 897 | unsigned long delta; |
898 | 898 | ||
899 | if (hr_delta.tv64 == KTIME_MAX) | 899 | if (hr_delta.tv64 == KTIME_MAX) |
900 | return expires; | 900 | return expires; |
901 | 901 | ||
902 | /* | 902 | /* |
903 | * Expired timer available, let it expire in the next tick | 903 | * Expired timer available, let it expire in the next tick |
904 | */ | 904 | */ |
905 | if (hr_delta.tv64 <= 0) | 905 | if (hr_delta.tv64 <= 0) |
906 | return now + 1; | 906 | return now + 1; |
907 | 907 | ||
908 | tsdelta = ktime_to_timespec(hr_delta); | 908 | tsdelta = ktime_to_timespec(hr_delta); |
909 | delta = timespec_to_jiffies(&tsdelta); | 909 | delta = timespec_to_jiffies(&tsdelta); |
910 | 910 | ||
911 | /* | 911 | /* |
912 | * Limit the delta to the max value, which is checked in | 912 | * Limit the delta to the max value, which is checked in |
913 | * tick_nohz_stop_sched_tick(): | 913 | * tick_nohz_stop_sched_tick(): |
914 | */ | 914 | */ |
915 | if (delta > NEXT_TIMER_MAX_DELTA) | 915 | if (delta > NEXT_TIMER_MAX_DELTA) |
916 | delta = NEXT_TIMER_MAX_DELTA; | 916 | delta = NEXT_TIMER_MAX_DELTA; |
917 | 917 | ||
918 | /* | 918 | /* |
919 | * Take rounding errors in to account and make sure, that it | 919 | * Take rounding errors in to account and make sure, that it |
920 | * expires in the next tick. Otherwise we go into an endless | 920 | * expires in the next tick. Otherwise we go into an endless |
921 | * ping pong due to tick_nohz_stop_sched_tick() retriggering | 921 | * ping pong due to tick_nohz_stop_sched_tick() retriggering |
922 | * the timer softirq | 922 | * the timer softirq |
923 | */ | 923 | */ |
924 | if (delta < 1) | 924 | if (delta < 1) |
925 | delta = 1; | 925 | delta = 1; |
926 | now += delta; | 926 | now += delta; |
927 | if (time_before(now, expires)) | 927 | if (time_before(now, expires)) |
928 | return now; | 928 | return now; |
929 | return expires; | 929 | return expires; |
930 | } | 930 | } |
931 | 931 | ||
932 | /** | 932 | /** |
933 | * get_next_timer_interrupt - return the jiffy of the next pending timer | 933 | * get_next_timer_interrupt - return the jiffy of the next pending timer |
934 | * @now: current time (in jiffies) | 934 | * @now: current time (in jiffies) |
935 | */ | 935 | */ |
936 | unsigned long get_next_timer_interrupt(unsigned long now) | 936 | unsigned long get_next_timer_interrupt(unsigned long now) |
937 | { | 937 | { |
938 | struct tvec_base *base = __get_cpu_var(tvec_bases); | 938 | struct tvec_base *base = __get_cpu_var(tvec_bases); |
939 | unsigned long expires; | 939 | unsigned long expires; |
940 | 940 | ||
941 | spin_lock(&base->lock); | 941 | spin_lock(&base->lock); |
942 | expires = __next_timer_interrupt(base); | 942 | expires = __next_timer_interrupt(base); |
943 | spin_unlock(&base->lock); | 943 | spin_unlock(&base->lock); |
944 | 944 | ||
945 | if (time_before_eq(expires, now)) | 945 | if (time_before_eq(expires, now)) |
946 | return now; | 946 | return now; |
947 | 947 | ||
948 | return cmp_next_hrtimer_event(now, expires); | 948 | return cmp_next_hrtimer_event(now, expires); |
949 | } | 949 | } |
950 | #endif | 950 | #endif |
951 | 951 | ||
952 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | 952 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING |
953 | void account_process_tick(struct task_struct *p, int user_tick) | 953 | void account_process_tick(struct task_struct *p, int user_tick) |
954 | { | 954 | { |
955 | cputime_t one_jiffy = jiffies_to_cputime(1); | 955 | cputime_t one_jiffy = jiffies_to_cputime(1); |
956 | 956 | ||
957 | if (user_tick) { | 957 | if (user_tick) { |
958 | account_user_time(p, one_jiffy); | 958 | account_user_time(p, one_jiffy); |
959 | account_user_time_scaled(p, cputime_to_scaled(one_jiffy)); | 959 | account_user_time_scaled(p, cputime_to_scaled(one_jiffy)); |
960 | } else { | 960 | } else { |
961 | account_system_time(p, HARDIRQ_OFFSET, one_jiffy); | 961 | account_system_time(p, HARDIRQ_OFFSET, one_jiffy); |
962 | account_system_time_scaled(p, cputime_to_scaled(one_jiffy)); | 962 | account_system_time_scaled(p, cputime_to_scaled(one_jiffy)); |
963 | } | 963 | } |
964 | } | 964 | } |
965 | #endif | 965 | #endif |
966 | 966 | ||
967 | /* | 967 | /* |
968 | * Called from the timer interrupt handler to charge one tick to the current | 968 | * Called from the timer interrupt handler to charge one tick to the current |
969 | * process. user_tick is 1 if the tick is user time, 0 for system. | 969 | * process. user_tick is 1 if the tick is user time, 0 for system. |
970 | */ | 970 | */ |
971 | void update_process_times(int user_tick) | 971 | void update_process_times(int user_tick) |
972 | { | 972 | { |
973 | struct task_struct *p = current; | 973 | struct task_struct *p = current; |
974 | int cpu = smp_processor_id(); | 974 | int cpu = smp_processor_id(); |
975 | 975 | ||
976 | /* Note: this timer irq context must be accounted for as well. */ | 976 | /* Note: this timer irq context must be accounted for as well. */ |
977 | account_process_tick(p, user_tick); | 977 | account_process_tick(p, user_tick); |
978 | run_local_timers(); | 978 | run_local_timers(); |
979 | if (rcu_pending(cpu)) | 979 | if (rcu_pending(cpu)) |
980 | rcu_check_callbacks(cpu, user_tick); | 980 | rcu_check_callbacks(cpu, user_tick); |
981 | printk_tick(); | ||
981 | scheduler_tick(); | 982 | scheduler_tick(); |
982 | run_posix_cpu_timers(p); | 983 | run_posix_cpu_timers(p); |
983 | } | 984 | } |
984 | 985 | ||
985 | /* | 986 | /* |
986 | * Nr of active tasks - counted in fixed-point numbers | 987 | * Nr of active tasks - counted in fixed-point numbers |
987 | */ | 988 | */ |
988 | static unsigned long count_active_tasks(void) | 989 | static unsigned long count_active_tasks(void) |
989 | { | 990 | { |
990 | return nr_active() * FIXED_1; | 991 | return nr_active() * FIXED_1; |
991 | } | 992 | } |
992 | 993 | ||
993 | /* | 994 | /* |
994 | * Hmm.. Changed this, as the GNU make sources (load.c) seems to | 995 | * Hmm.. Changed this, as the GNU make sources (load.c) seems to |
995 | * imply that avenrun[] is the standard name for this kind of thing. | 996 | * imply that avenrun[] is the standard name for this kind of thing. |
996 | * Nothing else seems to be standardized: the fractional size etc | 997 | * Nothing else seems to be standardized: the fractional size etc |
997 | * all seem to differ on different machines. | 998 | * all seem to differ on different machines. |
998 | * | 999 | * |
999 | * Requires xtime_lock to access. | 1000 | * Requires xtime_lock to access. |
1000 | */ | 1001 | */ |
1001 | unsigned long avenrun[3]; | 1002 | unsigned long avenrun[3]; |
1002 | 1003 | ||
1003 | EXPORT_SYMBOL(avenrun); | 1004 | EXPORT_SYMBOL(avenrun); |
1004 | 1005 | ||
1005 | /* | 1006 | /* |
1006 | * calc_load - given tick count, update the avenrun load estimates. | 1007 | * calc_load - given tick count, update the avenrun load estimates. |
1007 | * This is called while holding a write_lock on xtime_lock. | 1008 | * This is called while holding a write_lock on xtime_lock. |
1008 | */ | 1009 | */ |
1009 | static inline void calc_load(unsigned long ticks) | 1010 | static inline void calc_load(unsigned long ticks) |
1010 | { | 1011 | { |
1011 | unsigned long active_tasks; /* fixed-point */ | 1012 | unsigned long active_tasks; /* fixed-point */ |
1012 | static int count = LOAD_FREQ; | 1013 | static int count = LOAD_FREQ; |
1013 | 1014 | ||
1014 | count -= ticks; | 1015 | count -= ticks; |
1015 | if (unlikely(count < 0)) { | 1016 | if (unlikely(count < 0)) { |
1016 | active_tasks = count_active_tasks(); | 1017 | active_tasks = count_active_tasks(); |
1017 | do { | 1018 | do { |
1018 | CALC_LOAD(avenrun[0], EXP_1, active_tasks); | 1019 | CALC_LOAD(avenrun[0], EXP_1, active_tasks); |
1019 | CALC_LOAD(avenrun[1], EXP_5, active_tasks); | 1020 | CALC_LOAD(avenrun[1], EXP_5, active_tasks); |
1020 | CALC_LOAD(avenrun[2], EXP_15, active_tasks); | 1021 | CALC_LOAD(avenrun[2], EXP_15, active_tasks); |
1021 | count += LOAD_FREQ; | 1022 | count += LOAD_FREQ; |
1022 | } while (count < 0); | 1023 | } while (count < 0); |
1023 | } | 1024 | } |
1024 | } | 1025 | } |
1025 | 1026 | ||
1026 | /* | 1027 | /* |
1027 | * This function runs timers and the timer-tq in bottom half context. | 1028 | * This function runs timers and the timer-tq in bottom half context. |
1028 | */ | 1029 | */ |
1029 | static void run_timer_softirq(struct softirq_action *h) | 1030 | static void run_timer_softirq(struct softirq_action *h) |
1030 | { | 1031 | { |
1031 | struct tvec_base *base = __get_cpu_var(tvec_bases); | 1032 | struct tvec_base *base = __get_cpu_var(tvec_bases); |
1032 | 1033 | ||
1033 | hrtimer_run_pending(); | 1034 | hrtimer_run_pending(); |
1034 | 1035 | ||
1035 | if (time_after_eq(jiffies, base->timer_jiffies)) | 1036 | if (time_after_eq(jiffies, base->timer_jiffies)) |
1036 | __run_timers(base); | 1037 | __run_timers(base); |
1037 | } | 1038 | } |
1038 | 1039 | ||
1039 | /* | 1040 | /* |
1040 | * Called by the local, per-CPU timer interrupt on SMP. | 1041 | * Called by the local, per-CPU timer interrupt on SMP. |
1041 | */ | 1042 | */ |
1042 | void run_local_timers(void) | 1043 | void run_local_timers(void) |
1043 | { | 1044 | { |
1044 | hrtimer_run_queues(); | 1045 | hrtimer_run_queues(); |
1045 | raise_softirq(TIMER_SOFTIRQ); | 1046 | raise_softirq(TIMER_SOFTIRQ); |
1046 | softlockup_tick(); | 1047 | softlockup_tick(); |
1047 | } | 1048 | } |
1048 | 1049 | ||
1049 | /* | 1050 | /* |
1050 | * Called by the timer interrupt. xtime_lock must already be taken | 1051 | * Called by the timer interrupt. xtime_lock must already be taken |
1051 | * by the timer IRQ! | 1052 | * by the timer IRQ! |
1052 | */ | 1053 | */ |
1053 | static inline void update_times(unsigned long ticks) | 1054 | static inline void update_times(unsigned long ticks) |
1054 | { | 1055 | { |
1055 | update_wall_time(); | 1056 | update_wall_time(); |
1056 | calc_load(ticks); | 1057 | calc_load(ticks); |
1057 | } | 1058 | } |
1058 | 1059 | ||
1059 | /* | 1060 | /* |
1060 | * The 64-bit jiffies value is not atomic - you MUST NOT read it | 1061 | * The 64-bit jiffies value is not atomic - you MUST NOT read it |
1061 | * without sampling the sequence number in xtime_lock. | 1062 | * without sampling the sequence number in xtime_lock. |
1062 | * jiffies is defined in the linker script... | 1063 | * jiffies is defined in the linker script... |
1063 | */ | 1064 | */ |
1064 | 1065 | ||
1065 | void do_timer(unsigned long ticks) | 1066 | void do_timer(unsigned long ticks) |
1066 | { | 1067 | { |
1067 | jiffies_64 += ticks; | 1068 | jiffies_64 += ticks; |
1068 | update_times(ticks); | 1069 | update_times(ticks); |
1069 | } | 1070 | } |
1070 | 1071 | ||
1071 | #ifdef __ARCH_WANT_SYS_ALARM | 1072 | #ifdef __ARCH_WANT_SYS_ALARM |
1072 | 1073 | ||
1073 | /* | 1074 | /* |
1074 | * For backwards compatibility? This can be done in libc so Alpha | 1075 | * For backwards compatibility? This can be done in libc so Alpha |
1075 | * and all newer ports shouldn't need it. | 1076 | * and all newer ports shouldn't need it. |
1076 | */ | 1077 | */ |
1077 | asmlinkage unsigned long sys_alarm(unsigned int seconds) | 1078 | asmlinkage unsigned long sys_alarm(unsigned int seconds) |
1078 | { | 1079 | { |
1079 | return alarm_setitimer(seconds); | 1080 | return alarm_setitimer(seconds); |
1080 | } | 1081 | } |
1081 | 1082 | ||
1082 | #endif | 1083 | #endif |
1083 | 1084 | ||
1084 | #ifndef __alpha__ | 1085 | #ifndef __alpha__ |
1085 | 1086 | ||
1086 | /* | 1087 | /* |
1087 | * The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this | 1088 | * The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this |
1088 | * should be moved into arch/i386 instead? | 1089 | * should be moved into arch/i386 instead? |
1089 | */ | 1090 | */ |
1090 | 1091 | ||
1091 | /** | 1092 | /** |
1092 | * sys_getpid - return the thread group id of the current process | 1093 | * sys_getpid - return the thread group id of the current process |
1093 | * | 1094 | * |
1094 | * Note, despite the name, this returns the tgid not the pid. The tgid and | 1095 | * Note, despite the name, this returns the tgid not the pid. The tgid and |
1095 | * the pid are identical unless CLONE_THREAD was specified on clone() in | 1096 | * the pid are identical unless CLONE_THREAD was specified on clone() in |
1096 | * which case the tgid is the same in all threads of the same group. | 1097 | * which case the tgid is the same in all threads of the same group. |
1097 | * | 1098 | * |
1098 | * This is SMP safe as current->tgid does not change. | 1099 | * This is SMP safe as current->tgid does not change. |
1099 | */ | 1100 | */ |
1100 | asmlinkage long sys_getpid(void) | 1101 | asmlinkage long sys_getpid(void) |
1101 | { | 1102 | { |
1102 | return task_tgid_vnr(current); | 1103 | return task_tgid_vnr(current); |
1103 | } | 1104 | } |
1104 | 1105 | ||
1105 | /* | 1106 | /* |
1106 | * Accessing ->real_parent is not SMP-safe, it could | 1107 | * Accessing ->real_parent is not SMP-safe, it could |
1107 | * change from under us. However, we can use a stale | 1108 | * change from under us. However, we can use a stale |
1108 | * value of ->real_parent under rcu_read_lock(), see | 1109 | * value of ->real_parent under rcu_read_lock(), see |
1109 | * release_task()->call_rcu(delayed_put_task_struct). | 1110 | * release_task()->call_rcu(delayed_put_task_struct). |
1110 | */ | 1111 | */ |
1111 | asmlinkage long sys_getppid(void) | 1112 | asmlinkage long sys_getppid(void) |
1112 | { | 1113 | { |
1113 | int pid; | 1114 | int pid; |
1114 | 1115 | ||
1115 | rcu_read_lock(); | 1116 | rcu_read_lock(); |
1116 | pid = task_tgid_vnr(current->real_parent); | 1117 | pid = task_tgid_vnr(current->real_parent); |
1117 | rcu_read_unlock(); | 1118 | rcu_read_unlock(); |
1118 | 1119 | ||
1119 | return pid; | 1120 | return pid; |
1120 | } | 1121 | } |
1121 | 1122 | ||
1122 | asmlinkage long sys_getuid(void) | 1123 | asmlinkage long sys_getuid(void) |
1123 | { | 1124 | { |
1124 | /* Only we change this so SMP safe */ | 1125 | /* Only we change this so SMP safe */ |
1125 | return current->uid; | 1126 | return current->uid; |
1126 | } | 1127 | } |
1127 | 1128 | ||
1128 | asmlinkage long sys_geteuid(void) | 1129 | asmlinkage long sys_geteuid(void) |
1129 | { | 1130 | { |
1130 | /* Only we change this so SMP safe */ | 1131 | /* Only we change this so SMP safe */ |
1131 | return current->euid; | 1132 | return current->euid; |
1132 | } | 1133 | } |
1133 | 1134 | ||
1134 | asmlinkage long sys_getgid(void) | 1135 | asmlinkage long sys_getgid(void) |
1135 | { | 1136 | { |
1136 | /* Only we change this so SMP safe */ | 1137 | /* Only we change this so SMP safe */ |
1137 | return current->gid; | 1138 | return current->gid; |
1138 | } | 1139 | } |
1139 | 1140 | ||
1140 | asmlinkage long sys_getegid(void) | 1141 | asmlinkage long sys_getegid(void) |
1141 | { | 1142 | { |
1142 | /* Only we change this so SMP safe */ | 1143 | /* Only we change this so SMP safe */ |
1143 | return current->egid; | 1144 | return current->egid; |
1144 | } | 1145 | } |
1145 | 1146 | ||
1146 | #endif | 1147 | #endif |
1147 | 1148 | ||
1148 | static void process_timeout(unsigned long __data) | 1149 | static void process_timeout(unsigned long __data) |
1149 | { | 1150 | { |
1150 | wake_up_process((struct task_struct *)__data); | 1151 | wake_up_process((struct task_struct *)__data); |
1151 | } | 1152 | } |
1152 | 1153 | ||
1153 | /** | 1154 | /** |
1154 | * schedule_timeout - sleep until timeout | 1155 | * schedule_timeout - sleep until timeout |
1155 | * @timeout: timeout value in jiffies | 1156 | * @timeout: timeout value in jiffies |
1156 | * | 1157 | * |
1157 | * Make the current task sleep until @timeout jiffies have | 1158 | * Make the current task sleep until @timeout jiffies have |
1158 | * elapsed. The routine will return immediately unless | 1159 | * elapsed. The routine will return immediately unless |
1159 | * the current task state has been set (see set_current_state()). | 1160 | * the current task state has been set (see set_current_state()). |
1160 | * | 1161 | * |
1161 | * You can set the task state as follows - | 1162 | * You can set the task state as follows - |
1162 | * | 1163 | * |
1163 | * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to | 1164 | * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to |
1164 | * pass before the routine returns. The routine will return 0 | 1165 | * pass before the routine returns. The routine will return 0 |
1165 | * | 1166 | * |
1166 | * %TASK_INTERRUPTIBLE - the routine may return early if a signal is | 1167 | * %TASK_INTERRUPTIBLE - the routine may return early if a signal is |
1167 | * delivered to the current task. In this case the remaining time | 1168 | * delivered to the current task. In this case the remaining time |
1168 | * in jiffies will be returned, or 0 if the timer expired in time | 1169 | * in jiffies will be returned, or 0 if the timer expired in time |
1169 | * | 1170 | * |
1170 | * The current task state is guaranteed to be TASK_RUNNING when this | 1171 | * The current task state is guaranteed to be TASK_RUNNING when this |
1171 | * routine returns. | 1172 | * routine returns. |
1172 | * | 1173 | * |
1173 | * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule | 1174 | * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule |
1174 | * the CPU away without a bound on the timeout. In this case the return | 1175 | * the CPU away without a bound on the timeout. In this case the return |
1175 | * value will be %MAX_SCHEDULE_TIMEOUT. | 1176 | * value will be %MAX_SCHEDULE_TIMEOUT. |
1176 | * | 1177 | * |
1177 | * In all cases the return value is guaranteed to be non-negative. | 1178 | * In all cases the return value is guaranteed to be non-negative. |
1178 | */ | 1179 | */ |
1179 | signed long __sched schedule_timeout(signed long timeout) | 1180 | signed long __sched schedule_timeout(signed long timeout) |
1180 | { | 1181 | { |
1181 | struct timer_list timer; | 1182 | struct timer_list timer; |
1182 | unsigned long expire; | 1183 | unsigned long expire; |
1183 | 1184 | ||
1184 | switch (timeout) | 1185 | switch (timeout) |
1185 | { | 1186 | { |
1186 | case MAX_SCHEDULE_TIMEOUT: | 1187 | case MAX_SCHEDULE_TIMEOUT: |
1187 | /* | 1188 | /* |
1188 | * These two special cases are useful to be comfortable | 1189 | * These two special cases are useful to be comfortable |
1189 | * in the caller. Nothing more. We could take | 1190 | * in the caller. Nothing more. We could take |
1190 | * MAX_SCHEDULE_TIMEOUT from one of the negative value | 1191 | * MAX_SCHEDULE_TIMEOUT from one of the negative value |
1191 | * but I' d like to return a valid offset (>=0) to allow | 1192 | * but I' d like to return a valid offset (>=0) to allow |
1192 | * the caller to do everything it want with the retval. | 1193 | * the caller to do everything it want with the retval. |
1193 | */ | 1194 | */ |
1194 | schedule(); | 1195 | schedule(); |
1195 | goto out; | 1196 | goto out; |
1196 | default: | 1197 | default: |
1197 | /* | 1198 | /* |
1198 | * Another bit of PARANOID. Note that the retval will be | 1199 | * Another bit of PARANOID. Note that the retval will be |
1199 | * 0 since no piece of kernel is supposed to do a check | 1200 | * 0 since no piece of kernel is supposed to do a check |
1200 | * for a negative retval of schedule_timeout() (since it | 1201 | * for a negative retval of schedule_timeout() (since it |
1201 | * should never happens anyway). You just have the printk() | 1202 | * should never happens anyway). You just have the printk() |
1202 | * that will tell you if something is gone wrong and where. | 1203 | * that will tell you if something is gone wrong and where. |
1203 | */ | 1204 | */ |
1204 | if (timeout < 0) { | 1205 | if (timeout < 0) { |
1205 | printk(KERN_ERR "schedule_timeout: wrong timeout " | 1206 | printk(KERN_ERR "schedule_timeout: wrong timeout " |
1206 | "value %lx\n", timeout); | 1207 | "value %lx\n", timeout); |
1207 | dump_stack(); | 1208 | dump_stack(); |
1208 | current->state = TASK_RUNNING; | 1209 | current->state = TASK_RUNNING; |
1209 | goto out; | 1210 | goto out; |
1210 | } | 1211 | } |
1211 | } | 1212 | } |
1212 | 1213 | ||
1213 | expire = timeout + jiffies; | 1214 | expire = timeout + jiffies; |
1214 | 1215 | ||
1215 | setup_timer_on_stack(&timer, process_timeout, (unsigned long)current); | 1216 | setup_timer_on_stack(&timer, process_timeout, (unsigned long)current); |
1216 | __mod_timer(&timer, expire); | 1217 | __mod_timer(&timer, expire); |
1217 | schedule(); | 1218 | schedule(); |
1218 | del_singleshot_timer_sync(&timer); | 1219 | del_singleshot_timer_sync(&timer); |
1219 | 1220 | ||
1220 | /* Remove the timer from the object tracker */ | 1221 | /* Remove the timer from the object tracker */ |
1221 | destroy_timer_on_stack(&timer); | 1222 | destroy_timer_on_stack(&timer); |
1222 | 1223 | ||
1223 | timeout = expire - jiffies; | 1224 | timeout = expire - jiffies; |
1224 | 1225 | ||
1225 | out: | 1226 | out: |
1226 | return timeout < 0 ? 0 : timeout; | 1227 | return timeout < 0 ? 0 : timeout; |
1227 | } | 1228 | } |
1228 | EXPORT_SYMBOL(schedule_timeout); | 1229 | EXPORT_SYMBOL(schedule_timeout); |
1229 | 1230 | ||
1230 | /* | 1231 | /* |
1231 | * We can use __set_current_state() here because schedule_timeout() calls | 1232 | * We can use __set_current_state() here because schedule_timeout() calls |
1232 | * schedule() unconditionally. | 1233 | * schedule() unconditionally. |
1233 | */ | 1234 | */ |
1234 | signed long __sched schedule_timeout_interruptible(signed long timeout) | 1235 | signed long __sched schedule_timeout_interruptible(signed long timeout) |
1235 | { | 1236 | { |
1236 | __set_current_state(TASK_INTERRUPTIBLE); | 1237 | __set_current_state(TASK_INTERRUPTIBLE); |
1237 | return schedule_timeout(timeout); | 1238 | return schedule_timeout(timeout); |
1238 | } | 1239 | } |
1239 | EXPORT_SYMBOL(schedule_timeout_interruptible); | 1240 | EXPORT_SYMBOL(schedule_timeout_interruptible); |
1240 | 1241 | ||
1241 | signed long __sched schedule_timeout_killable(signed long timeout) | 1242 | signed long __sched schedule_timeout_killable(signed long timeout) |
1242 | { | 1243 | { |
1243 | __set_current_state(TASK_KILLABLE); | 1244 | __set_current_state(TASK_KILLABLE); |
1244 | return schedule_timeout(timeout); | 1245 | return schedule_timeout(timeout); |
1245 | } | 1246 | } |
1246 | EXPORT_SYMBOL(schedule_timeout_killable); | 1247 | EXPORT_SYMBOL(schedule_timeout_killable); |
1247 | 1248 | ||
1248 | signed long __sched schedule_timeout_uninterruptible(signed long timeout) | 1249 | signed long __sched schedule_timeout_uninterruptible(signed long timeout) |
1249 | { | 1250 | { |
1250 | __set_current_state(TASK_UNINTERRUPTIBLE); | 1251 | __set_current_state(TASK_UNINTERRUPTIBLE); |
1251 | return schedule_timeout(timeout); | 1252 | return schedule_timeout(timeout); |
1252 | } | 1253 | } |
1253 | EXPORT_SYMBOL(schedule_timeout_uninterruptible); | 1254 | EXPORT_SYMBOL(schedule_timeout_uninterruptible); |
1254 | 1255 | ||
1255 | /* Thread ID - the internal kernel "pid" */ | 1256 | /* Thread ID - the internal kernel "pid" */ |
1256 | asmlinkage long sys_gettid(void) | 1257 | asmlinkage long sys_gettid(void) |
1257 | { | 1258 | { |
1258 | return task_pid_vnr(current); | 1259 | return task_pid_vnr(current); |
1259 | } | 1260 | } |
1260 | 1261 | ||
1261 | /** | 1262 | /** |
1262 | * do_sysinfo - fill in sysinfo struct | 1263 | * do_sysinfo - fill in sysinfo struct |
1263 | * @info: pointer to buffer to fill | 1264 | * @info: pointer to buffer to fill |
1264 | */ | 1265 | */ |
1265 | int do_sysinfo(struct sysinfo *info) | 1266 | int do_sysinfo(struct sysinfo *info) |
1266 | { | 1267 | { |
1267 | unsigned long mem_total, sav_total; | 1268 | unsigned long mem_total, sav_total; |
1268 | unsigned int mem_unit, bitcount; | 1269 | unsigned int mem_unit, bitcount; |
1269 | unsigned long seq; | 1270 | unsigned long seq; |
1270 | 1271 | ||
1271 | memset(info, 0, sizeof(struct sysinfo)); | 1272 | memset(info, 0, sizeof(struct sysinfo)); |
1272 | 1273 | ||
1273 | do { | 1274 | do { |
1274 | struct timespec tp; | 1275 | struct timespec tp; |
1275 | seq = read_seqbegin(&xtime_lock); | 1276 | seq = read_seqbegin(&xtime_lock); |
1276 | 1277 | ||
1277 | /* | 1278 | /* |
1278 | * This is annoying. The below is the same thing | 1279 | * This is annoying. The below is the same thing |
1279 | * posix_get_clock_monotonic() does, but it wants to | 1280 | * posix_get_clock_monotonic() does, but it wants to |
1280 | * take the lock which we want to cover the loads stuff | 1281 | * take the lock which we want to cover the loads stuff |
1281 | * too. | 1282 | * too. |
1282 | */ | 1283 | */ |
1283 | 1284 | ||
1284 | getnstimeofday(&tp); | 1285 | getnstimeofday(&tp); |
1285 | tp.tv_sec += wall_to_monotonic.tv_sec; | 1286 | tp.tv_sec += wall_to_monotonic.tv_sec; |
1286 | tp.tv_nsec += wall_to_monotonic.tv_nsec; | 1287 | tp.tv_nsec += wall_to_monotonic.tv_nsec; |
1287 | monotonic_to_bootbased(&tp); | 1288 | monotonic_to_bootbased(&tp); |
1288 | if (tp.tv_nsec - NSEC_PER_SEC >= 0) { | 1289 | if (tp.tv_nsec - NSEC_PER_SEC >= 0) { |
1289 | tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC; | 1290 | tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC; |
1290 | tp.tv_sec++; | 1291 | tp.tv_sec++; |
1291 | } | 1292 | } |
1292 | info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); | 1293 | info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); |
1293 | 1294 | ||
1294 | info->loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); | 1295 | info->loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); |
1295 | info->loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT); | 1296 | info->loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT); |
1296 | info->loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT); | 1297 | info->loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT); |
1297 | 1298 | ||
1298 | info->procs = nr_threads; | 1299 | info->procs = nr_threads; |
1299 | } while (read_seqretry(&xtime_lock, seq)); | 1300 | } while (read_seqretry(&xtime_lock, seq)); |
1300 | 1301 | ||
1301 | si_meminfo(info); | 1302 | si_meminfo(info); |
1302 | si_swapinfo(info); | 1303 | si_swapinfo(info); |
1303 | 1304 | ||
1304 | /* | 1305 | /* |
1305 | * If the sum of all the available memory (i.e. ram + swap) | 1306 | * If the sum of all the available memory (i.e. ram + swap) |
1306 | * is less than can be stored in a 32 bit unsigned long then | 1307 | * is less than can be stored in a 32 bit unsigned long then |
1307 | * we can be binary compatible with 2.2.x kernels. If not, | 1308 | * we can be binary compatible with 2.2.x kernels. If not, |
1308 | * well, in that case 2.2.x was broken anyways... | 1309 | * well, in that case 2.2.x was broken anyways... |
1309 | * | 1310 | * |
1310 | * -Erik Andersen <andersee@debian.org> | 1311 | * -Erik Andersen <andersee@debian.org> |
1311 | */ | 1312 | */ |
1312 | 1313 | ||
1313 | mem_total = info->totalram + info->totalswap; | 1314 | mem_total = info->totalram + info->totalswap; |
1314 | if (mem_total < info->totalram || mem_total < info->totalswap) | 1315 | if (mem_total < info->totalram || mem_total < info->totalswap) |
1315 | goto out; | 1316 | goto out; |
1316 | bitcount = 0; | 1317 | bitcount = 0; |
1317 | mem_unit = info->mem_unit; | 1318 | mem_unit = info->mem_unit; |
1318 | while (mem_unit > 1) { | 1319 | while (mem_unit > 1) { |
1319 | bitcount++; | 1320 | bitcount++; |
1320 | mem_unit >>= 1; | 1321 | mem_unit >>= 1; |
1321 | sav_total = mem_total; | 1322 | sav_total = mem_total; |
1322 | mem_total <<= 1; | 1323 | mem_total <<= 1; |
1323 | if (mem_total < sav_total) | 1324 | if (mem_total < sav_total) |
1324 | goto out; | 1325 | goto out; |
1325 | } | 1326 | } |
1326 | 1327 | ||
1327 | /* | 1328 | /* |
1328 | * If mem_total did not overflow, multiply all memory values by | 1329 | * If mem_total did not overflow, multiply all memory values by |
1329 | * info->mem_unit and set it to 1. This leaves things compatible | 1330 | * info->mem_unit and set it to 1. This leaves things compatible |
1330 | * with 2.2.x, and also retains compatibility with earlier 2.4.x | 1331 | * with 2.2.x, and also retains compatibility with earlier 2.4.x |
1331 | * kernels... | 1332 | * kernels... |
1332 | */ | 1333 | */ |
1333 | 1334 | ||
1334 | info->mem_unit = 1; | 1335 | info->mem_unit = 1; |
1335 | info->totalram <<= bitcount; | 1336 | info->totalram <<= bitcount; |
1336 | info->freeram <<= bitcount; | 1337 | info->freeram <<= bitcount; |
1337 | info->sharedram <<= bitcount; | 1338 | info->sharedram <<= bitcount; |
1338 | info->bufferram <<= bitcount; | 1339 | info->bufferram <<= bitcount; |
1339 | info->totalswap <<= bitcount; | 1340 | info->totalswap <<= bitcount; |
1340 | info->freeswap <<= bitcount; | 1341 | info->freeswap <<= bitcount; |
1341 | info->totalhigh <<= bitcount; | 1342 | info->totalhigh <<= bitcount; |
1342 | info->freehigh <<= bitcount; | 1343 | info->freehigh <<= bitcount; |
1343 | 1344 | ||
1344 | out: | 1345 | out: |
1345 | return 0; | 1346 | return 0; |
1346 | } | 1347 | } |
1347 | 1348 | ||
1348 | asmlinkage long sys_sysinfo(struct sysinfo __user *info) | 1349 | asmlinkage long sys_sysinfo(struct sysinfo __user *info) |
1349 | { | 1350 | { |
1350 | struct sysinfo val; | 1351 | struct sysinfo val; |
1351 | 1352 | ||
1352 | do_sysinfo(&val); | 1353 | do_sysinfo(&val); |
1353 | 1354 | ||
1354 | if (copy_to_user(info, &val, sizeof(struct sysinfo))) | 1355 | if (copy_to_user(info, &val, sizeof(struct sysinfo))) |
1355 | return -EFAULT; | 1356 | return -EFAULT; |
1356 | 1357 | ||
1357 | return 0; | 1358 | return 0; |
1358 | } | 1359 | } |
1359 | 1360 | ||
1360 | static int __cpuinit init_timers_cpu(int cpu) | 1361 | static int __cpuinit init_timers_cpu(int cpu) |
1361 | { | 1362 | { |
1362 | int j; | 1363 | int j; |
1363 | struct tvec_base *base; | 1364 | struct tvec_base *base; |
1364 | static char __cpuinitdata tvec_base_done[NR_CPUS]; | 1365 | static char __cpuinitdata tvec_base_done[NR_CPUS]; |
1365 | 1366 | ||
1366 | if (!tvec_base_done[cpu]) { | 1367 | if (!tvec_base_done[cpu]) { |
1367 | static char boot_done; | 1368 | static char boot_done; |
1368 | 1369 | ||
1369 | if (boot_done) { | 1370 | if (boot_done) { |
1370 | /* | 1371 | /* |
1371 | * The APs use this path later in boot | 1372 | * The APs use this path later in boot |
1372 | */ | 1373 | */ |
1373 | base = kmalloc_node(sizeof(*base), | 1374 | base = kmalloc_node(sizeof(*base), |
1374 | GFP_KERNEL | __GFP_ZERO, | 1375 | GFP_KERNEL | __GFP_ZERO, |
1375 | cpu_to_node(cpu)); | 1376 | cpu_to_node(cpu)); |
1376 | if (!base) | 1377 | if (!base) |
1377 | return -ENOMEM; | 1378 | return -ENOMEM; |
1378 | 1379 | ||
1379 | /* Make sure that tvec_base is 2 byte aligned */ | 1380 | /* Make sure that tvec_base is 2 byte aligned */ |
1380 | if (tbase_get_deferrable(base)) { | 1381 | if (tbase_get_deferrable(base)) { |
1381 | WARN_ON(1); | 1382 | WARN_ON(1); |
1382 | kfree(base); | 1383 | kfree(base); |
1383 | return -ENOMEM; | 1384 | return -ENOMEM; |
1384 | } | 1385 | } |
1385 | per_cpu(tvec_bases, cpu) = base; | 1386 | per_cpu(tvec_bases, cpu) = base; |
1386 | } else { | 1387 | } else { |
1387 | /* | 1388 | /* |
1388 | * This is for the boot CPU - we use compile-time | 1389 | * This is for the boot CPU - we use compile-time |
1389 | * static initialisation because per-cpu memory isn't | 1390 | * static initialisation because per-cpu memory isn't |
1390 | * ready yet and because the memory allocators are not | 1391 | * ready yet and because the memory allocators are not |
1391 | * initialised either. | 1392 | * initialised either. |
1392 | */ | 1393 | */ |
1393 | boot_done = 1; | 1394 | boot_done = 1; |
1394 | base = &boot_tvec_bases; | 1395 | base = &boot_tvec_bases; |
1395 | } | 1396 | } |
1396 | tvec_base_done[cpu] = 1; | 1397 | tvec_base_done[cpu] = 1; |
1397 | } else { | 1398 | } else { |
1398 | base = per_cpu(tvec_bases, cpu); | 1399 | base = per_cpu(tvec_bases, cpu); |
1399 | } | 1400 | } |
1400 | 1401 | ||
1401 | spin_lock_init(&base->lock); | 1402 | spin_lock_init(&base->lock); |
1402 | 1403 | ||
1403 | for (j = 0; j < TVN_SIZE; j++) { | 1404 | for (j = 0; j < TVN_SIZE; j++) { |
1404 | INIT_LIST_HEAD(base->tv5.vec + j); | 1405 | INIT_LIST_HEAD(base->tv5.vec + j); |
1405 | INIT_LIST_HEAD(base->tv4.vec + j); | 1406 | INIT_LIST_HEAD(base->tv4.vec + j); |
1406 | INIT_LIST_HEAD(base->tv3.vec + j); | 1407 | INIT_LIST_HEAD(base->tv3.vec + j); |
1407 | INIT_LIST_HEAD(base->tv2.vec + j); | 1408 | INIT_LIST_HEAD(base->tv2.vec + j); |
1408 | } | 1409 | } |
1409 | for (j = 0; j < TVR_SIZE; j++) | 1410 | for (j = 0; j < TVR_SIZE; j++) |
1410 | INIT_LIST_HEAD(base->tv1.vec + j); | 1411 | INIT_LIST_HEAD(base->tv1.vec + j); |
1411 | 1412 | ||
1412 | base->timer_jiffies = jiffies; | 1413 | base->timer_jiffies = jiffies; |
1413 | return 0; | 1414 | return 0; |
1414 | } | 1415 | } |
1415 | 1416 | ||
1416 | #ifdef CONFIG_HOTPLUG_CPU | 1417 | #ifdef CONFIG_HOTPLUG_CPU |
1417 | static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head) | 1418 | static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head) |
1418 | { | 1419 | { |
1419 | struct timer_list *timer; | 1420 | struct timer_list *timer; |
1420 | 1421 | ||
1421 | while (!list_empty(head)) { | 1422 | while (!list_empty(head)) { |
1422 | timer = list_first_entry(head, struct timer_list, entry); | 1423 | timer = list_first_entry(head, struct timer_list, entry); |
1423 | detach_timer(timer, 0); | 1424 | detach_timer(timer, 0); |
1424 | timer_set_base(timer, new_base); | 1425 | timer_set_base(timer, new_base); |
1425 | internal_add_timer(new_base, timer); | 1426 | internal_add_timer(new_base, timer); |
1426 | } | 1427 | } |
1427 | } | 1428 | } |
1428 | 1429 | ||
1429 | static void __cpuinit migrate_timers(int cpu) | 1430 | static void __cpuinit migrate_timers(int cpu) |
1430 | { | 1431 | { |
1431 | struct tvec_base *old_base; | 1432 | struct tvec_base *old_base; |
1432 | struct tvec_base *new_base; | 1433 | struct tvec_base *new_base; |
1433 | int i; | 1434 | int i; |
1434 | 1435 | ||
1435 | BUG_ON(cpu_online(cpu)); | 1436 | BUG_ON(cpu_online(cpu)); |
1436 | old_base = per_cpu(tvec_bases, cpu); | 1437 | old_base = per_cpu(tvec_bases, cpu); |
1437 | new_base = get_cpu_var(tvec_bases); | 1438 | new_base = get_cpu_var(tvec_bases); |
1438 | 1439 | ||
1439 | local_irq_disable(); | 1440 | local_irq_disable(); |
1440 | spin_lock(&new_base->lock); | 1441 | spin_lock(&new_base->lock); |
1441 | spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); | 1442 | spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); |
1442 | 1443 | ||
1443 | BUG_ON(old_base->running_timer); | 1444 | BUG_ON(old_base->running_timer); |
1444 | 1445 | ||
1445 | for (i = 0; i < TVR_SIZE; i++) | 1446 | for (i = 0; i < TVR_SIZE; i++) |
1446 | migrate_timer_list(new_base, old_base->tv1.vec + i); | 1447 | migrate_timer_list(new_base, old_base->tv1.vec + i); |
1447 | for (i = 0; i < TVN_SIZE; i++) { | 1448 | for (i = 0; i < TVN_SIZE; i++) { |
1448 | migrate_timer_list(new_base, old_base->tv2.vec + i); | 1449 | migrate_timer_list(new_base, old_base->tv2.vec + i); |
1449 | migrate_timer_list(new_base, old_base->tv3.vec + i); | 1450 | migrate_timer_list(new_base, old_base->tv3.vec + i); |
1450 | migrate_timer_list(new_base, old_base->tv4.vec + i); | 1451 | migrate_timer_list(new_base, old_base->tv4.vec + i); |
1451 | migrate_timer_list(new_base, old_base->tv5.vec + i); | 1452 | migrate_timer_list(new_base, old_base->tv5.vec + i); |
1452 | } | 1453 | } |
1453 | 1454 | ||
1454 | spin_unlock(&old_base->lock); | 1455 | spin_unlock(&old_base->lock); |
1455 | spin_unlock(&new_base->lock); | 1456 | spin_unlock(&new_base->lock); |
1456 | local_irq_enable(); | 1457 | local_irq_enable(); |
1457 | put_cpu_var(tvec_bases); | 1458 | put_cpu_var(tvec_bases); |
1458 | } | 1459 | } |
1459 | #endif /* CONFIG_HOTPLUG_CPU */ | 1460 | #endif /* CONFIG_HOTPLUG_CPU */ |
1460 | 1461 | ||
1461 | static int __cpuinit timer_cpu_notify(struct notifier_block *self, | 1462 | static int __cpuinit timer_cpu_notify(struct notifier_block *self, |
1462 | unsigned long action, void *hcpu) | 1463 | unsigned long action, void *hcpu) |
1463 | { | 1464 | { |
1464 | long cpu = (long)hcpu; | 1465 | long cpu = (long)hcpu; |
1465 | switch(action) { | 1466 | switch(action) { |
1466 | case CPU_UP_PREPARE: | 1467 | case CPU_UP_PREPARE: |
1467 | case CPU_UP_PREPARE_FROZEN: | 1468 | case CPU_UP_PREPARE_FROZEN: |
1468 | if (init_timers_cpu(cpu) < 0) | 1469 | if (init_timers_cpu(cpu) < 0) |
1469 | return NOTIFY_BAD; | 1470 | return NOTIFY_BAD; |
1470 | break; | 1471 | break; |
1471 | #ifdef CONFIG_HOTPLUG_CPU | 1472 | #ifdef CONFIG_HOTPLUG_CPU |
1472 | case CPU_DEAD: | 1473 | case CPU_DEAD: |
1473 | case CPU_DEAD_FROZEN: | 1474 | case CPU_DEAD_FROZEN: |
1474 | migrate_timers(cpu); | 1475 | migrate_timers(cpu); |
1475 | break; | 1476 | break; |
1476 | #endif | 1477 | #endif |
1477 | default: | 1478 | default: |
1478 | break; | 1479 | break; |
1479 | } | 1480 | } |
1480 | return NOTIFY_OK; | 1481 | return NOTIFY_OK; |
1481 | } | 1482 | } |
1482 | 1483 | ||
1483 | static struct notifier_block __cpuinitdata timers_nb = { | 1484 | static struct notifier_block __cpuinitdata timers_nb = { |
1484 | .notifier_call = timer_cpu_notify, | 1485 | .notifier_call = timer_cpu_notify, |
1485 | }; | 1486 | }; |
1486 | 1487 | ||
1487 | 1488 | ||
1488 | void __init init_timers(void) | 1489 | void __init init_timers(void) |
1489 | { | 1490 | { |
1490 | int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE, | 1491 | int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE, |
1491 | (void *)(long)smp_processor_id()); | 1492 | (void *)(long)smp_processor_id()); |
1492 | 1493 | ||
1493 | init_timer_stats(); | 1494 | init_timer_stats(); |
1494 | 1495 | ||
1495 | BUG_ON(err == NOTIFY_BAD); | 1496 | BUG_ON(err == NOTIFY_BAD); |
1496 | register_cpu_notifier(&timers_nb); | 1497 | register_cpu_notifier(&timers_nb); |
1497 | open_softirq(TIMER_SOFTIRQ, run_timer_softirq); | 1498 | open_softirq(TIMER_SOFTIRQ, run_timer_softirq); |
1498 | } | 1499 | } |
1499 | 1500 | ||
1500 | /** | 1501 | /** |
1501 | * msleep - sleep safely even with waitqueue interruptions | 1502 | * msleep - sleep safely even with waitqueue interruptions |
1502 | * @msecs: Time in milliseconds to sleep for | 1503 | * @msecs: Time in milliseconds to sleep for |
1503 | */ | 1504 | */ |
1504 | void msleep(unsigned int msecs) | 1505 | void msleep(unsigned int msecs) |
1505 | { | 1506 | { |
1506 | unsigned long timeout = msecs_to_jiffies(msecs) + 1; | 1507 | unsigned long timeout = msecs_to_jiffies(msecs) + 1; |
1507 | 1508 | ||
1508 | while (timeout) | 1509 | while (timeout) |
1509 | timeout = schedule_timeout_uninterruptible(timeout); | 1510 | timeout = schedule_timeout_uninterruptible(timeout); |
1510 | } | 1511 | } |
1511 | 1512 | ||
1512 | EXPORT_SYMBOL(msleep); | 1513 | EXPORT_SYMBOL(msleep); |
1513 | 1514 | ||
1514 | /** | 1515 | /** |
1515 | * msleep_interruptible - sleep waiting for signals | 1516 | * msleep_interruptible - sleep waiting for signals |
1516 | * @msecs: Time in milliseconds to sleep for | 1517 | * @msecs: Time in milliseconds to sleep for |
1517 | */ | 1518 | */ |
1518 | unsigned long msleep_interruptible(unsigned int msecs) | 1519 | unsigned long msleep_interruptible(unsigned int msecs) |
1519 | { | 1520 | { |
1520 | unsigned long timeout = msecs_to_jiffies(msecs) + 1; | 1521 | unsigned long timeout = msecs_to_jiffies(msecs) + 1; |
1521 | 1522 | ||
1522 | while (timeout && !signal_pending(current)) | 1523 | while (timeout && !signal_pending(current)) |
1523 | timeout = schedule_timeout_interruptible(timeout); | 1524 | timeout = schedule_timeout_interruptible(timeout); |
1524 | return jiffies_to_msecs(timeout); | 1525 | return jiffies_to_msecs(timeout); |
1525 | } | 1526 | } |
1526 | 1527 | ||
1527 | EXPORT_SYMBOL(msleep_interruptible); | 1528 | EXPORT_SYMBOL(msleep_interruptible); |
1528 | 1529 |