Commit faa8b6c3c2e1454175609167a25ae525d075f045

Authored by Linus Torvalds
1 parent 3ec2ab5514

Revert "ipmi: add new IPMI nmi watchdog handling"

This reverts commit f64da958dfc83335de1d2bef9d3868f30feb4e53.

Andi Kleen is unhappy with the changes, and they really do not seem
worth it.  IPMI could use DIE_NMI_IPI instead of the new callback, even
though that ends up having its own set of problems too, mainly because
the IPMI code cannot really know the NMI was from IPMI or not.

Manually fix up conflicts in arch/x86_64/kernel/traps.c and
drivers/char/ipmi/ipmi_watchdog.c.

Cc: Andi Kleen <ak@suse.de>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Corey Minyard <minyard@acm.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 5 changed files with 42 additions and 102 deletions Inline Diff

arch/i386/kernel/traps.c
1 /* 1 /*
2 * linux/arch/i386/traps.c 2 * linux/arch/i386/traps.c
3 * 3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds 4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * 5 *
6 * Pentium III FXSR, SSE support 6 * Pentium III FXSR, SSE support
7 * Gareth Hughes <gareth@valinux.com>, May 2000 7 * Gareth Hughes <gareth@valinux.com>, May 2000
8 */ 8 */
9 9
10 /* 10 /*
11 * 'Traps.c' handles hardware traps and faults after we have saved some 11 * 'Traps.c' handles hardware traps and faults after we have saved some
12 * state in 'asm.s'. 12 * state in 'asm.s'.
13 */ 13 */
14 #include <linux/sched.h> 14 #include <linux/sched.h>
15 #include <linux/kernel.h> 15 #include <linux/kernel.h>
16 #include <linux/string.h> 16 #include <linux/string.h>
17 #include <linux/errno.h> 17 #include <linux/errno.h>
18 #include <linux/timer.h> 18 #include <linux/timer.h>
19 #include <linux/mm.h> 19 #include <linux/mm.h>
20 #include <linux/init.h> 20 #include <linux/init.h>
21 #include <linux/delay.h> 21 #include <linux/delay.h>
22 #include <linux/spinlock.h> 22 #include <linux/spinlock.h>
23 #include <linux/interrupt.h> 23 #include <linux/interrupt.h>
24 #include <linux/highmem.h> 24 #include <linux/highmem.h>
25 #include <linux/kallsyms.h> 25 #include <linux/kallsyms.h>
26 #include <linux/ptrace.h> 26 #include <linux/ptrace.h>
27 #include <linux/utsname.h> 27 #include <linux/utsname.h>
28 #include <linux/kprobes.h> 28 #include <linux/kprobes.h>
29 #include <linux/kexec.h> 29 #include <linux/kexec.h>
30 #include <linux/unwind.h> 30 #include <linux/unwind.h>
31 #include <linux/uaccess.h> 31 #include <linux/uaccess.h>
32 #include <linux/nmi.h> 32 #include <linux/nmi.h>
33 #include <linux/bug.h> 33 #include <linux/bug.h>
34 34
35 #ifdef CONFIG_EISA 35 #ifdef CONFIG_EISA
36 #include <linux/ioport.h> 36 #include <linux/ioport.h>
37 #include <linux/eisa.h> 37 #include <linux/eisa.h>
38 #endif 38 #endif
39 39
40 #ifdef CONFIG_MCA 40 #ifdef CONFIG_MCA
41 #include <linux/mca.h> 41 #include <linux/mca.h>
42 #endif 42 #endif
43 43
44 #include <asm/processor.h> 44 #include <asm/processor.h>
45 #include <asm/system.h> 45 #include <asm/system.h>
46 #include <asm/io.h> 46 #include <asm/io.h>
47 #include <asm/atomic.h> 47 #include <asm/atomic.h>
48 #include <asm/debugreg.h> 48 #include <asm/debugreg.h>
49 #include <asm/desc.h> 49 #include <asm/desc.h>
50 #include <asm/i387.h> 50 #include <asm/i387.h>
51 #include <asm/nmi.h> 51 #include <asm/nmi.h>
52 #include <asm/unwind.h> 52 #include <asm/unwind.h>
53 #include <asm/smp.h> 53 #include <asm/smp.h>
54 #include <asm/arch_hooks.h> 54 #include <asm/arch_hooks.h>
55 #include <linux/kdebug.h> 55 #include <linux/kdebug.h>
56 #include <asm/stacktrace.h> 56 #include <asm/stacktrace.h>
57 57
58 #include <linux/module.h> 58 #include <linux/module.h>
59 59
60 #include "mach_traps.h" 60 #include "mach_traps.h"
61 61
62 int panic_on_unrecovered_nmi; 62 int panic_on_unrecovered_nmi;
63 63
64 asmlinkage int system_call(void); 64 asmlinkage int system_call(void);
65 65
66 /* Do we ignore FPU interrupts ? */ 66 /* Do we ignore FPU interrupts ? */
67 char ignore_fpu_irq = 0; 67 char ignore_fpu_irq = 0;
68 68
69 /* 69 /*
70 * The IDT has to be page-aligned to simplify the Pentium 70 * The IDT has to be page-aligned to simplify the Pentium
71 * F0 0F bug workaround.. We have a special link segment 71 * F0 0F bug workaround.. We have a special link segment
72 * for this. 72 * for this.
73 */ 73 */
74 struct desc_struct idt_table[256] __attribute__((__section__(".data.idt"))) = { {0, 0}, }; 74 struct desc_struct idt_table[256] __attribute__((__section__(".data.idt"))) = { {0, 0}, };
75 75
76 asmlinkage void divide_error(void); 76 asmlinkage void divide_error(void);
77 asmlinkage void debug(void); 77 asmlinkage void debug(void);
78 asmlinkage void nmi(void); 78 asmlinkage void nmi(void);
79 asmlinkage void int3(void); 79 asmlinkage void int3(void);
80 asmlinkage void overflow(void); 80 asmlinkage void overflow(void);
81 asmlinkage void bounds(void); 81 asmlinkage void bounds(void);
82 asmlinkage void invalid_op(void); 82 asmlinkage void invalid_op(void);
83 asmlinkage void device_not_available(void); 83 asmlinkage void device_not_available(void);
84 asmlinkage void coprocessor_segment_overrun(void); 84 asmlinkage void coprocessor_segment_overrun(void);
85 asmlinkage void invalid_TSS(void); 85 asmlinkage void invalid_TSS(void);
86 asmlinkage void segment_not_present(void); 86 asmlinkage void segment_not_present(void);
87 asmlinkage void stack_segment(void); 87 asmlinkage void stack_segment(void);
88 asmlinkage void general_protection(void); 88 asmlinkage void general_protection(void);
89 asmlinkage void page_fault(void); 89 asmlinkage void page_fault(void);
90 asmlinkage void coprocessor_error(void); 90 asmlinkage void coprocessor_error(void);
91 asmlinkage void simd_coprocessor_error(void); 91 asmlinkage void simd_coprocessor_error(void);
92 asmlinkage void alignment_check(void); 92 asmlinkage void alignment_check(void);
93 asmlinkage void spurious_interrupt_bug(void); 93 asmlinkage void spurious_interrupt_bug(void);
94 asmlinkage void machine_check(void); 94 asmlinkage void machine_check(void);
95 95
96 int kstack_depth_to_print = 24; 96 int kstack_depth_to_print = 24;
97 static unsigned int code_bytes = 64; 97 static unsigned int code_bytes = 64;
98 98
99 static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) 99 static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
100 { 100 {
101 return p > (void *)tinfo && 101 return p > (void *)tinfo &&
102 p < (void *)tinfo + THREAD_SIZE - 3; 102 p < (void *)tinfo + THREAD_SIZE - 3;
103 } 103 }
104 104
105 static inline unsigned long print_context_stack(struct thread_info *tinfo, 105 static inline unsigned long print_context_stack(struct thread_info *tinfo,
106 unsigned long *stack, unsigned long ebp, 106 unsigned long *stack, unsigned long ebp,
107 struct stacktrace_ops *ops, void *data) 107 struct stacktrace_ops *ops, void *data)
108 { 108 {
109 unsigned long addr; 109 unsigned long addr;
110 110
111 #ifdef CONFIG_FRAME_POINTER 111 #ifdef CONFIG_FRAME_POINTER
112 while (valid_stack_ptr(tinfo, (void *)ebp)) { 112 while (valid_stack_ptr(tinfo, (void *)ebp)) {
113 unsigned long new_ebp; 113 unsigned long new_ebp;
114 addr = *(unsigned long *)(ebp + 4); 114 addr = *(unsigned long *)(ebp + 4);
115 ops->address(data, addr); 115 ops->address(data, addr);
116 /* 116 /*
117 * break out of recursive entries (such as 117 * break out of recursive entries (such as
118 * end_of_stack_stop_unwind_function). Also, 118 * end_of_stack_stop_unwind_function). Also,
119 * we can never allow a frame pointer to 119 * we can never allow a frame pointer to
120 * move downwards! 120 * move downwards!
121 */ 121 */
122 new_ebp = *(unsigned long *)ebp; 122 new_ebp = *(unsigned long *)ebp;
123 if (new_ebp <= ebp) 123 if (new_ebp <= ebp)
124 break; 124 break;
125 ebp = new_ebp; 125 ebp = new_ebp;
126 } 126 }
127 #else 127 #else
128 while (valid_stack_ptr(tinfo, stack)) { 128 while (valid_stack_ptr(tinfo, stack)) {
129 addr = *stack++; 129 addr = *stack++;
130 if (__kernel_text_address(addr)) 130 if (__kernel_text_address(addr))
131 ops->address(data, addr); 131 ops->address(data, addr);
132 } 132 }
133 #endif 133 #endif
134 return ebp; 134 return ebp;
135 } 135 }
136 136
137 #define MSG(msg) ops->warning(data, msg) 137 #define MSG(msg) ops->warning(data, msg)
138 138
139 void dump_trace(struct task_struct *task, struct pt_regs *regs, 139 void dump_trace(struct task_struct *task, struct pt_regs *regs,
140 unsigned long *stack, 140 unsigned long *stack,
141 struct stacktrace_ops *ops, void *data) 141 struct stacktrace_ops *ops, void *data)
142 { 142 {
143 unsigned long ebp = 0; 143 unsigned long ebp = 0;
144 144
145 if (!task) 145 if (!task)
146 task = current; 146 task = current;
147 147
148 if (!stack) { 148 if (!stack) {
149 unsigned long dummy; 149 unsigned long dummy;
150 stack = &dummy; 150 stack = &dummy;
151 if (task && task != current) 151 if (task && task != current)
152 stack = (unsigned long *)task->thread.esp; 152 stack = (unsigned long *)task->thread.esp;
153 } 153 }
154 154
155 #ifdef CONFIG_FRAME_POINTER 155 #ifdef CONFIG_FRAME_POINTER
156 if (!ebp) { 156 if (!ebp) {
157 if (task == current) { 157 if (task == current) {
158 /* Grab ebp right from our regs */ 158 /* Grab ebp right from our regs */
159 asm ("movl %%ebp, %0" : "=r" (ebp) : ); 159 asm ("movl %%ebp, %0" : "=r" (ebp) : );
160 } else { 160 } else {
161 /* ebp is the last reg pushed by switch_to */ 161 /* ebp is the last reg pushed by switch_to */
162 ebp = *(unsigned long *) task->thread.esp; 162 ebp = *(unsigned long *) task->thread.esp;
163 } 163 }
164 } 164 }
165 #endif 165 #endif
166 166
167 while (1) { 167 while (1) {
168 struct thread_info *context; 168 struct thread_info *context;
169 context = (struct thread_info *) 169 context = (struct thread_info *)
170 ((unsigned long)stack & (~(THREAD_SIZE - 1))); 170 ((unsigned long)stack & (~(THREAD_SIZE - 1)));
171 ebp = print_context_stack(context, stack, ebp, ops, data); 171 ebp = print_context_stack(context, stack, ebp, ops, data);
172 /* Should be after the line below, but somewhere 172 /* Should be after the line below, but somewhere
173 in early boot context comes out corrupted and we 173 in early boot context comes out corrupted and we
174 can't reference it -AK */ 174 can't reference it -AK */
175 if (ops->stack(data, "IRQ") < 0) 175 if (ops->stack(data, "IRQ") < 0)
176 break; 176 break;
177 stack = (unsigned long*)context->previous_esp; 177 stack = (unsigned long*)context->previous_esp;
178 if (!stack) 178 if (!stack)
179 break; 179 break;
180 touch_nmi_watchdog(); 180 touch_nmi_watchdog();
181 } 181 }
182 } 182 }
183 EXPORT_SYMBOL(dump_trace); 183 EXPORT_SYMBOL(dump_trace);
184 184
185 static void 185 static void
186 print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) 186 print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
187 { 187 {
188 printk(data); 188 printk(data);
189 print_symbol(msg, symbol); 189 print_symbol(msg, symbol);
190 printk("\n"); 190 printk("\n");
191 } 191 }
192 192
193 static void print_trace_warning(void *data, char *msg) 193 static void print_trace_warning(void *data, char *msg)
194 { 194 {
195 printk("%s%s\n", (char *)data, msg); 195 printk("%s%s\n", (char *)data, msg);
196 } 196 }
197 197
198 static int print_trace_stack(void *data, char *name) 198 static int print_trace_stack(void *data, char *name)
199 { 199 {
200 return 0; 200 return 0;
201 } 201 }
202 202
203 /* 203 /*
204 * Print one address/symbol entries per line. 204 * Print one address/symbol entries per line.
205 */ 205 */
206 static void print_trace_address(void *data, unsigned long addr) 206 static void print_trace_address(void *data, unsigned long addr)
207 { 207 {
208 printk("%s [<%08lx>] ", (char *)data, addr); 208 printk("%s [<%08lx>] ", (char *)data, addr);
209 print_symbol("%s\n", addr); 209 print_symbol("%s\n", addr);
210 } 210 }
211 211
212 static struct stacktrace_ops print_trace_ops = { 212 static struct stacktrace_ops print_trace_ops = {
213 .warning = print_trace_warning, 213 .warning = print_trace_warning,
214 .warning_symbol = print_trace_warning_symbol, 214 .warning_symbol = print_trace_warning_symbol,
215 .stack = print_trace_stack, 215 .stack = print_trace_stack,
216 .address = print_trace_address, 216 .address = print_trace_address,
217 }; 217 };
218 218
219 static void 219 static void
220 show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, 220 show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
221 unsigned long * stack, char *log_lvl) 221 unsigned long * stack, char *log_lvl)
222 { 222 {
223 dump_trace(task, regs, stack, &print_trace_ops, log_lvl); 223 dump_trace(task, regs, stack, &print_trace_ops, log_lvl);
224 printk("%s =======================\n", log_lvl); 224 printk("%s =======================\n", log_lvl);
225 } 225 }
226 226
227 void show_trace(struct task_struct *task, struct pt_regs *regs, 227 void show_trace(struct task_struct *task, struct pt_regs *regs,
228 unsigned long * stack) 228 unsigned long * stack)
229 { 229 {
230 show_trace_log_lvl(task, regs, stack, ""); 230 show_trace_log_lvl(task, regs, stack, "");
231 } 231 }
232 232
233 static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 233 static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
234 unsigned long *esp, char *log_lvl) 234 unsigned long *esp, char *log_lvl)
235 { 235 {
236 unsigned long *stack; 236 unsigned long *stack;
237 int i; 237 int i;
238 238
239 if (esp == NULL) { 239 if (esp == NULL) {
240 if (task) 240 if (task)
241 esp = (unsigned long*)task->thread.esp; 241 esp = (unsigned long*)task->thread.esp;
242 else 242 else
243 esp = (unsigned long *)&esp; 243 esp = (unsigned long *)&esp;
244 } 244 }
245 245
246 stack = esp; 246 stack = esp;
247 for(i = 0; i < kstack_depth_to_print; i++) { 247 for(i = 0; i < kstack_depth_to_print; i++) {
248 if (kstack_end(stack)) 248 if (kstack_end(stack))
249 break; 249 break;
250 if (i && ((i % 8) == 0)) 250 if (i && ((i % 8) == 0))
251 printk("\n%s ", log_lvl); 251 printk("\n%s ", log_lvl);
252 printk("%08lx ", *stack++); 252 printk("%08lx ", *stack++);
253 } 253 }
254 printk("\n%sCall Trace:\n", log_lvl); 254 printk("\n%sCall Trace:\n", log_lvl);
255 show_trace_log_lvl(task, regs, esp, log_lvl); 255 show_trace_log_lvl(task, regs, esp, log_lvl);
256 } 256 }
257 257
258 void show_stack(struct task_struct *task, unsigned long *esp) 258 void show_stack(struct task_struct *task, unsigned long *esp)
259 { 259 {
260 printk(" "); 260 printk(" ");
261 show_stack_log_lvl(task, NULL, esp, ""); 261 show_stack_log_lvl(task, NULL, esp, "");
262 } 262 }
263 263
264 /* 264 /*
265 * The architecture-independent dump_stack generator 265 * The architecture-independent dump_stack generator
266 */ 266 */
267 void dump_stack(void) 267 void dump_stack(void)
268 { 268 {
269 unsigned long stack; 269 unsigned long stack;
270 270
271 show_trace(current, NULL, &stack); 271 show_trace(current, NULL, &stack);
272 } 272 }
273 273
274 EXPORT_SYMBOL(dump_stack); 274 EXPORT_SYMBOL(dump_stack);
275 275
276 void show_registers(struct pt_regs *regs) 276 void show_registers(struct pt_regs *regs)
277 { 277 {
278 int i; 278 int i;
279 int in_kernel = 1; 279 int in_kernel = 1;
280 unsigned long esp; 280 unsigned long esp;
281 unsigned short ss, gs; 281 unsigned short ss, gs;
282 282
283 esp = (unsigned long) (&regs->esp); 283 esp = (unsigned long) (&regs->esp);
284 savesegment(ss, ss); 284 savesegment(ss, ss);
285 savesegment(gs, gs); 285 savesegment(gs, gs);
286 if (user_mode_vm(regs)) { 286 if (user_mode_vm(regs)) {
287 in_kernel = 0; 287 in_kernel = 0;
288 esp = regs->esp; 288 esp = regs->esp;
289 ss = regs->xss & 0xffff; 289 ss = regs->xss & 0xffff;
290 } 290 }
291 print_modules(); 291 print_modules();
292 printk(KERN_EMERG "CPU: %d\n" 292 printk(KERN_EMERG "CPU: %d\n"
293 KERN_EMERG "EIP: %04x:[<%08lx>] %s VLI\n" 293 KERN_EMERG "EIP: %04x:[<%08lx>] %s VLI\n"
294 KERN_EMERG "EFLAGS: %08lx (%s %.*s)\n", 294 KERN_EMERG "EFLAGS: %08lx (%s %.*s)\n",
295 smp_processor_id(), 0xffff & regs->xcs, regs->eip, 295 smp_processor_id(), 0xffff & regs->xcs, regs->eip,
296 print_tainted(), regs->eflags, init_utsname()->release, 296 print_tainted(), regs->eflags, init_utsname()->release,
297 (int)strcspn(init_utsname()->version, " "), 297 (int)strcspn(init_utsname()->version, " "),
298 init_utsname()->version); 298 init_utsname()->version);
299 print_symbol(KERN_EMERG "EIP is at %s\n", regs->eip); 299 print_symbol(KERN_EMERG "EIP is at %s\n", regs->eip);
300 printk(KERN_EMERG "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", 300 printk(KERN_EMERG "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
301 regs->eax, regs->ebx, regs->ecx, regs->edx); 301 regs->eax, regs->ebx, regs->ecx, regs->edx);
302 printk(KERN_EMERG "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", 302 printk(KERN_EMERG "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
303 regs->esi, regs->edi, regs->ebp, esp); 303 regs->esi, regs->edi, regs->ebp, esp);
304 printk(KERN_EMERG "ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n", 304 printk(KERN_EMERG "ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
305 regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss); 305 regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss);
306 printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", 306 printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
307 TASK_COMM_LEN, current->comm, current->pid, 307 TASK_COMM_LEN, current->comm, current->pid,
308 current_thread_info(), current, task_thread_info(current)); 308 current_thread_info(), current, task_thread_info(current));
309 /* 309 /*
310 * When in-kernel, we also print out the stack and code at the 310 * When in-kernel, we also print out the stack and code at the
311 * time of the fault.. 311 * time of the fault..
312 */ 312 */
313 if (in_kernel) { 313 if (in_kernel) {
314 u8 *eip; 314 u8 *eip;
315 unsigned int code_prologue = code_bytes * 43 / 64; 315 unsigned int code_prologue = code_bytes * 43 / 64;
316 unsigned int code_len = code_bytes; 316 unsigned int code_len = code_bytes;
317 unsigned char c; 317 unsigned char c;
318 318
319 printk("\n" KERN_EMERG "Stack: "); 319 printk("\n" KERN_EMERG "Stack: ");
320 show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG); 320 show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG);
321 321
322 printk(KERN_EMERG "Code: "); 322 printk(KERN_EMERG "Code: ");
323 323
324 eip = (u8 *)regs->eip - code_prologue; 324 eip = (u8 *)regs->eip - code_prologue;
325 if (eip < (u8 *)PAGE_OFFSET || 325 if (eip < (u8 *)PAGE_OFFSET ||
326 probe_kernel_address(eip, c)) { 326 probe_kernel_address(eip, c)) {
327 /* try starting at EIP */ 327 /* try starting at EIP */
328 eip = (u8 *)regs->eip; 328 eip = (u8 *)regs->eip;
329 code_len = code_len - code_prologue + 1; 329 code_len = code_len - code_prologue + 1;
330 } 330 }
331 for (i = 0; i < code_len; i++, eip++) { 331 for (i = 0; i < code_len; i++, eip++) {
332 if (eip < (u8 *)PAGE_OFFSET || 332 if (eip < (u8 *)PAGE_OFFSET ||
333 probe_kernel_address(eip, c)) { 333 probe_kernel_address(eip, c)) {
334 printk(" Bad EIP value."); 334 printk(" Bad EIP value.");
335 break; 335 break;
336 } 336 }
337 if (eip == (u8 *)regs->eip) 337 if (eip == (u8 *)regs->eip)
338 printk("<%02x> ", c); 338 printk("<%02x> ", c);
339 else 339 else
340 printk("%02x ", c); 340 printk("%02x ", c);
341 } 341 }
342 } 342 }
343 printk("\n"); 343 printk("\n");
344 } 344 }
345 345
346 int is_valid_bugaddr(unsigned long eip) 346 int is_valid_bugaddr(unsigned long eip)
347 { 347 {
348 unsigned short ud2; 348 unsigned short ud2;
349 349
350 if (eip < PAGE_OFFSET) 350 if (eip < PAGE_OFFSET)
351 return 0; 351 return 0;
352 if (probe_kernel_address((unsigned short *)eip, ud2)) 352 if (probe_kernel_address((unsigned short *)eip, ud2))
353 return 0; 353 return 0;
354 354
355 return ud2 == 0x0b0f; 355 return ud2 == 0x0b0f;
356 } 356 }
357 357
358 /* 358 /*
359 * This is gone through when something in the kernel has done something bad and 359 * This is gone through when something in the kernel has done something bad and
360 * is about to be terminated. 360 * is about to be terminated.
361 */ 361 */
362 void die(const char * str, struct pt_regs * regs, long err) 362 void die(const char * str, struct pt_regs * regs, long err)
363 { 363 {
364 static struct { 364 static struct {
365 spinlock_t lock; 365 spinlock_t lock;
366 u32 lock_owner; 366 u32 lock_owner;
367 int lock_owner_depth; 367 int lock_owner_depth;
368 } die = { 368 } die = {
369 .lock = __SPIN_LOCK_UNLOCKED(die.lock), 369 .lock = __SPIN_LOCK_UNLOCKED(die.lock),
370 .lock_owner = -1, 370 .lock_owner = -1,
371 .lock_owner_depth = 0 371 .lock_owner_depth = 0
372 }; 372 };
373 static int die_counter; 373 static int die_counter;
374 unsigned long flags; 374 unsigned long flags;
375 375
376 oops_enter(); 376 oops_enter();
377 377
378 if (die.lock_owner != raw_smp_processor_id()) { 378 if (die.lock_owner != raw_smp_processor_id()) {
379 console_verbose(); 379 console_verbose();
380 spin_lock_irqsave(&die.lock, flags); 380 spin_lock_irqsave(&die.lock, flags);
381 die.lock_owner = smp_processor_id(); 381 die.lock_owner = smp_processor_id();
382 die.lock_owner_depth = 0; 382 die.lock_owner_depth = 0;
383 bust_spinlocks(1); 383 bust_spinlocks(1);
384 } 384 }
385 else 385 else
386 local_save_flags(flags); 386 local_save_flags(flags);
387 387
388 if (++die.lock_owner_depth < 3) { 388 if (++die.lock_owner_depth < 3) {
389 int nl = 0; 389 int nl = 0;
390 unsigned long esp; 390 unsigned long esp;
391 unsigned short ss; 391 unsigned short ss;
392 392
393 report_bug(regs->eip); 393 report_bug(regs->eip);
394 394
395 printk(KERN_EMERG "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); 395 printk(KERN_EMERG "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter);
396 #ifdef CONFIG_PREEMPT 396 #ifdef CONFIG_PREEMPT
397 printk(KERN_EMERG "PREEMPT "); 397 printk(KERN_EMERG "PREEMPT ");
398 nl = 1; 398 nl = 1;
399 #endif 399 #endif
400 #ifdef CONFIG_SMP 400 #ifdef CONFIG_SMP
401 if (!nl) 401 if (!nl)
402 printk(KERN_EMERG); 402 printk(KERN_EMERG);
403 printk("SMP "); 403 printk("SMP ");
404 nl = 1; 404 nl = 1;
405 #endif 405 #endif
406 #ifdef CONFIG_DEBUG_PAGEALLOC 406 #ifdef CONFIG_DEBUG_PAGEALLOC
407 if (!nl) 407 if (!nl)
408 printk(KERN_EMERG); 408 printk(KERN_EMERG);
409 printk("DEBUG_PAGEALLOC"); 409 printk("DEBUG_PAGEALLOC");
410 nl = 1; 410 nl = 1;
411 #endif 411 #endif
412 if (nl) 412 if (nl)
413 printk("\n"); 413 printk("\n");
414 if (notify_die(DIE_OOPS, str, regs, err, 414 if (notify_die(DIE_OOPS, str, regs, err,
415 current->thread.trap_no, SIGSEGV) != 415 current->thread.trap_no, SIGSEGV) !=
416 NOTIFY_STOP) { 416 NOTIFY_STOP) {
417 show_registers(regs); 417 show_registers(regs);
418 /* Executive summary in case the oops scrolled away */ 418 /* Executive summary in case the oops scrolled away */
419 esp = (unsigned long) (&regs->esp); 419 esp = (unsigned long) (&regs->esp);
420 savesegment(ss, ss); 420 savesegment(ss, ss);
421 if (user_mode(regs)) { 421 if (user_mode(regs)) {
422 esp = regs->esp; 422 esp = regs->esp;
423 ss = regs->xss & 0xffff; 423 ss = regs->xss & 0xffff;
424 } 424 }
425 printk(KERN_EMERG "EIP: [<%08lx>] ", regs->eip); 425 printk(KERN_EMERG "EIP: [<%08lx>] ", regs->eip);
426 print_symbol("%s", regs->eip); 426 print_symbol("%s", regs->eip);
427 printk(" SS:ESP %04x:%08lx\n", ss, esp); 427 printk(" SS:ESP %04x:%08lx\n", ss, esp);
428 } 428 }
429 else 429 else
430 regs = NULL; 430 regs = NULL;
431 } else 431 } else
432 printk(KERN_EMERG "Recursive die() failure, output suppressed\n"); 432 printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
433 433
434 bust_spinlocks(0); 434 bust_spinlocks(0);
435 die.lock_owner = -1; 435 die.lock_owner = -1;
436 spin_unlock_irqrestore(&die.lock, flags); 436 spin_unlock_irqrestore(&die.lock, flags);
437 437
438 if (!regs) 438 if (!regs)
439 return; 439 return;
440 440
441 if (kexec_should_crash(current)) 441 if (kexec_should_crash(current))
442 crash_kexec(regs); 442 crash_kexec(regs);
443 443
444 if (in_interrupt()) 444 if (in_interrupt())
445 panic("Fatal exception in interrupt"); 445 panic("Fatal exception in interrupt");
446 446
447 if (panic_on_oops) 447 if (panic_on_oops)
448 panic("Fatal exception"); 448 panic("Fatal exception");
449 449
450 oops_exit(); 450 oops_exit();
451 do_exit(SIGSEGV); 451 do_exit(SIGSEGV);
452 } 452 }
453 453
454 static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err) 454 static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err)
455 { 455 {
456 if (!user_mode_vm(regs)) 456 if (!user_mode_vm(regs))
457 die(str, regs, err); 457 die(str, regs, err);
458 } 458 }
459 459
460 static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86, 460 static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86,
461 struct pt_regs * regs, long error_code, 461 struct pt_regs * regs, long error_code,
462 siginfo_t *info) 462 siginfo_t *info)
463 { 463 {
464 struct task_struct *tsk = current; 464 struct task_struct *tsk = current;
465 465
466 if (regs->eflags & VM_MASK) { 466 if (regs->eflags & VM_MASK) {
467 if (vm86) 467 if (vm86)
468 goto vm86_trap; 468 goto vm86_trap;
469 goto trap_signal; 469 goto trap_signal;
470 } 470 }
471 471
472 if (!user_mode(regs)) 472 if (!user_mode(regs))
473 goto kernel_trap; 473 goto kernel_trap;
474 474
475 trap_signal: { 475 trap_signal: {
476 /* 476 /*
477 * We want error_code and trap_no set for userspace faults and 477 * We want error_code and trap_no set for userspace faults and
478 * kernelspace faults which result in die(), but not 478 * kernelspace faults which result in die(), but not
479 * kernelspace faults which are fixed up. die() gives the 479 * kernelspace faults which are fixed up. die() gives the
480 * process no chance to handle the signal and notice the 480 * process no chance to handle the signal and notice the
481 * kernel fault information, so that won't result in polluting 481 * kernel fault information, so that won't result in polluting
482 * the information about previously queued, but not yet 482 * the information about previously queued, but not yet
483 * delivered, faults. See also do_general_protection below. 483 * delivered, faults. See also do_general_protection below.
484 */ 484 */
485 tsk->thread.error_code = error_code; 485 tsk->thread.error_code = error_code;
486 tsk->thread.trap_no = trapnr; 486 tsk->thread.trap_no = trapnr;
487 487
488 if (info) 488 if (info)
489 force_sig_info(signr, info, tsk); 489 force_sig_info(signr, info, tsk);
490 else 490 else
491 force_sig(signr, tsk); 491 force_sig(signr, tsk);
492 return; 492 return;
493 } 493 }
494 494
495 kernel_trap: { 495 kernel_trap: {
496 if (!fixup_exception(regs)) { 496 if (!fixup_exception(regs)) {
497 tsk->thread.error_code = error_code; 497 tsk->thread.error_code = error_code;
498 tsk->thread.trap_no = trapnr; 498 tsk->thread.trap_no = trapnr;
499 die(str, regs, error_code); 499 die(str, regs, error_code);
500 } 500 }
501 return; 501 return;
502 } 502 }
503 503
504 vm86_trap: { 504 vm86_trap: {
505 int ret = handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, trapnr); 505 int ret = handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, trapnr);
506 if (ret) goto trap_signal; 506 if (ret) goto trap_signal;
507 return; 507 return;
508 } 508 }
509 } 509 }
510 510
511 #define DO_ERROR(trapnr, signr, str, name) \ 511 #define DO_ERROR(trapnr, signr, str, name) \
512 fastcall void do_##name(struct pt_regs * regs, long error_code) \ 512 fastcall void do_##name(struct pt_regs * regs, long error_code) \
513 { \ 513 { \
514 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 514 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
515 == NOTIFY_STOP) \ 515 == NOTIFY_STOP) \
516 return; \ 516 return; \
517 do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \ 517 do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \
518 } 518 }
519 519
520 #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ 520 #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
521 fastcall void do_##name(struct pt_regs * regs, long error_code) \ 521 fastcall void do_##name(struct pt_regs * regs, long error_code) \
522 { \ 522 { \
523 siginfo_t info; \ 523 siginfo_t info; \
524 info.si_signo = signr; \ 524 info.si_signo = signr; \
525 info.si_errno = 0; \ 525 info.si_errno = 0; \
526 info.si_code = sicode; \ 526 info.si_code = sicode; \
527 info.si_addr = (void __user *)siaddr; \ 527 info.si_addr = (void __user *)siaddr; \
528 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 528 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
529 == NOTIFY_STOP) \ 529 == NOTIFY_STOP) \
530 return; \ 530 return; \
531 do_trap(trapnr, signr, str, 0, regs, error_code, &info); \ 531 do_trap(trapnr, signr, str, 0, regs, error_code, &info); \
532 } 532 }
533 533
534 #define DO_VM86_ERROR(trapnr, signr, str, name) \ 534 #define DO_VM86_ERROR(trapnr, signr, str, name) \
535 fastcall void do_##name(struct pt_regs * regs, long error_code) \ 535 fastcall void do_##name(struct pt_regs * regs, long error_code) \
536 { \ 536 { \
537 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 537 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
538 == NOTIFY_STOP) \ 538 == NOTIFY_STOP) \
539 return; \ 539 return; \
540 do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \ 540 do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \
541 } 541 }
542 542
543 #define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ 543 #define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
544 fastcall void do_##name(struct pt_regs * regs, long error_code) \ 544 fastcall void do_##name(struct pt_regs * regs, long error_code) \
545 { \ 545 { \
546 siginfo_t info; \ 546 siginfo_t info; \
547 info.si_signo = signr; \ 547 info.si_signo = signr; \
548 info.si_errno = 0; \ 548 info.si_errno = 0; \
549 info.si_code = sicode; \ 549 info.si_code = sicode; \
550 info.si_addr = (void __user *)siaddr; \ 550 info.si_addr = (void __user *)siaddr; \
551 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 551 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
552 == NOTIFY_STOP) \ 552 == NOTIFY_STOP) \
553 return; \ 553 return; \
554 do_trap(trapnr, signr, str, 1, regs, error_code, &info); \ 554 do_trap(trapnr, signr, str, 1, regs, error_code, &info); \
555 } 555 }
556 556
557 DO_VM86_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->eip) 557 DO_VM86_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->eip)
558 #ifndef CONFIG_KPROBES 558 #ifndef CONFIG_KPROBES
559 DO_VM86_ERROR( 3, SIGTRAP, "int3", int3) 559 DO_VM86_ERROR( 3, SIGTRAP, "int3", int3)
560 #endif 560 #endif
561 DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow) 561 DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow)
562 DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds) 562 DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds)
563 DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->eip) 563 DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->eip)
564 DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) 564 DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
565 DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) 565 DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
566 DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) 566 DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
567 DO_ERROR(12, SIGBUS, "stack segment", stack_segment) 567 DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
568 DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) 568 DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
569 DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0) 569 DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0)
570 570
571 fastcall void __kprobes do_general_protection(struct pt_regs * regs, 571 fastcall void __kprobes do_general_protection(struct pt_regs * regs,
572 long error_code) 572 long error_code)
573 { 573 {
574 int cpu = get_cpu(); 574 int cpu = get_cpu();
575 struct tss_struct *tss = &per_cpu(init_tss, cpu); 575 struct tss_struct *tss = &per_cpu(init_tss, cpu);
576 struct thread_struct *thread = &current->thread; 576 struct thread_struct *thread = &current->thread;
577 577
578 /* 578 /*
579 * Perform the lazy TSS's I/O bitmap copy. If the TSS has an 579 * Perform the lazy TSS's I/O bitmap copy. If the TSS has an
580 * invalid offset set (the LAZY one) and the faulting thread has 580 * invalid offset set (the LAZY one) and the faulting thread has
581 * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS 581 * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS
582 * and we set the offset field correctly. Then we let the CPU to 582 * and we set the offset field correctly. Then we let the CPU to
583 * restart the faulting instruction. 583 * restart the faulting instruction.
584 */ 584 */
585 if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY && 585 if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY &&
586 thread->io_bitmap_ptr) { 586 thread->io_bitmap_ptr) {
587 memcpy(tss->io_bitmap, thread->io_bitmap_ptr, 587 memcpy(tss->io_bitmap, thread->io_bitmap_ptr,
588 thread->io_bitmap_max); 588 thread->io_bitmap_max);
589 /* 589 /*
590 * If the previously set map was extending to higher ports 590 * If the previously set map was extending to higher ports
591 * than the current one, pad extra space with 0xff (no access). 591 * than the current one, pad extra space with 0xff (no access).
592 */ 592 */
593 if (thread->io_bitmap_max < tss->io_bitmap_max) 593 if (thread->io_bitmap_max < tss->io_bitmap_max)
594 memset((char *) tss->io_bitmap + 594 memset((char *) tss->io_bitmap +
595 thread->io_bitmap_max, 0xff, 595 thread->io_bitmap_max, 0xff,
596 tss->io_bitmap_max - thread->io_bitmap_max); 596 tss->io_bitmap_max - thread->io_bitmap_max);
597 tss->io_bitmap_max = thread->io_bitmap_max; 597 tss->io_bitmap_max = thread->io_bitmap_max;
598 tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; 598 tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
599 tss->io_bitmap_owner = thread; 599 tss->io_bitmap_owner = thread;
600 put_cpu(); 600 put_cpu();
601 return; 601 return;
602 } 602 }
603 put_cpu(); 603 put_cpu();
604 604
605 if (regs->eflags & VM_MASK) 605 if (regs->eflags & VM_MASK)
606 goto gp_in_vm86; 606 goto gp_in_vm86;
607 607
608 if (!user_mode(regs)) 608 if (!user_mode(regs))
609 goto gp_in_kernel; 609 goto gp_in_kernel;
610 610
611 current->thread.error_code = error_code; 611 current->thread.error_code = error_code;
612 current->thread.trap_no = 13; 612 current->thread.trap_no = 13;
613 force_sig(SIGSEGV, current); 613 force_sig(SIGSEGV, current);
614 return; 614 return;
615 615
616 gp_in_vm86: 616 gp_in_vm86:
617 local_irq_enable(); 617 local_irq_enable();
618 handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); 618 handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
619 return; 619 return;
620 620
621 gp_in_kernel: 621 gp_in_kernel:
622 if (!fixup_exception(regs)) { 622 if (!fixup_exception(regs)) {
623 current->thread.error_code = error_code; 623 current->thread.error_code = error_code;
624 current->thread.trap_no = 13; 624 current->thread.trap_no = 13;
625 if (notify_die(DIE_GPF, "general protection fault", regs, 625 if (notify_die(DIE_GPF, "general protection fault", regs,
626 error_code, 13, SIGSEGV) == NOTIFY_STOP) 626 error_code, 13, SIGSEGV) == NOTIFY_STOP)
627 return; 627 return;
628 die("general protection fault", regs, error_code); 628 die("general protection fault", regs, error_code);
629 } 629 }
630 } 630 }
631 631
632 static __kprobes void 632 static __kprobes void
633 mem_parity_error(unsigned char reason, struct pt_regs * regs) 633 mem_parity_error(unsigned char reason, struct pt_regs * regs)
634 { 634 {
635 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on " 635 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
636 "CPU %d.\n", reason, smp_processor_id()); 636 "CPU %d.\n", reason, smp_processor_id());
637 printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); 637 printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
638 if (panic_on_unrecovered_nmi) 638 if (panic_on_unrecovered_nmi)
639 panic("NMI: Not continuing"); 639 panic("NMI: Not continuing");
640 640
641 printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); 641 printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
642 642
643 /* Clear and disable the memory parity error line. */ 643 /* Clear and disable the memory parity error line. */
644 clear_mem_error(reason); 644 clear_mem_error(reason);
645 } 645 }
646 646
647 static __kprobes void 647 static __kprobes void
648 io_check_error(unsigned char reason, struct pt_regs * regs) 648 io_check_error(unsigned char reason, struct pt_regs * regs)
649 { 649 {
650 unsigned long i; 650 unsigned long i;
651 651
652 printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); 652 printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
653 show_registers(regs); 653 show_registers(regs);
654 654
655 /* Re-enable the IOCK line, wait for a few seconds */ 655 /* Re-enable the IOCK line, wait for a few seconds */
656 reason = (reason & 0xf) | 8; 656 reason = (reason & 0xf) | 8;
657 outb(reason, 0x61); 657 outb(reason, 0x61);
658 i = 2000; 658 i = 2000;
659 while (--i) udelay(1000); 659 while (--i) udelay(1000);
660 reason &= ~8; 660 reason &= ~8;
661 outb(reason, 0x61); 661 outb(reason, 0x61);
662 } 662 }
663 663
664 static __kprobes void 664 static __kprobes void
665 unknown_nmi_error(unsigned char reason, struct pt_regs * regs) 665 unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
666 { 666 {
667 #ifdef CONFIG_MCA 667 #ifdef CONFIG_MCA
668 /* Might actually be able to figure out what the guilty party 668 /* Might actually be able to figure out what the guilty party
669 * is. */ 669 * is. */
670 if( MCA_bus ) { 670 if( MCA_bus ) {
671 mca_handle_nmi(); 671 mca_handle_nmi();
672 return; 672 return;
673 } 673 }
674 #endif 674 #endif
675 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on " 675 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
676 "CPU %d.\n", reason, smp_processor_id()); 676 "CPU %d.\n", reason, smp_processor_id());
677 printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); 677 printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
678 if (panic_on_unrecovered_nmi) 678 if (panic_on_unrecovered_nmi)
679 panic("NMI: Not continuing"); 679 panic("NMI: Not continuing");
680 680
681 printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); 681 printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
682 } 682 }
683 683
684 static DEFINE_SPINLOCK(nmi_print_lock); 684 static DEFINE_SPINLOCK(nmi_print_lock);
685 685
686 void __kprobes die_nmi(struct pt_regs *regs, const char *msg) 686 void __kprobes die_nmi(struct pt_regs *regs, const char *msg)
687 { 687 {
688 if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == 688 if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) ==
689 NOTIFY_STOP) 689 NOTIFY_STOP)
690 return; 690 return;
691 691
692 spin_lock(&nmi_print_lock); 692 spin_lock(&nmi_print_lock);
693 /* 693 /*
694 * We are in trouble anyway, lets at least try 694 * We are in trouble anyway, lets at least try
695 * to get a message out. 695 * to get a message out.
696 */ 696 */
697 bust_spinlocks(1); 697 bust_spinlocks(1);
698 printk(KERN_EMERG "%s", msg); 698 printk(KERN_EMERG "%s", msg);
699 printk(" on CPU%d, eip %08lx, registers:\n", 699 printk(" on CPU%d, eip %08lx, registers:\n",
700 smp_processor_id(), regs->eip); 700 smp_processor_id(), regs->eip);
701 show_registers(regs); 701 show_registers(regs);
702 console_silent(); 702 console_silent();
703 spin_unlock(&nmi_print_lock); 703 spin_unlock(&nmi_print_lock);
704 bust_spinlocks(0); 704 bust_spinlocks(0);
705 705
706 /* If we are in kernel we are probably nested up pretty bad 706 /* If we are in kernel we are probably nested up pretty bad
707 * and might aswell get out now while we still can. 707 * and might aswell get out now while we still can.
708 */ 708 */
709 if (!user_mode_vm(regs)) { 709 if (!user_mode_vm(regs)) {
710 current->thread.trap_no = 2; 710 current->thread.trap_no = 2;
711 crash_kexec(regs); 711 crash_kexec(regs);
712 } 712 }
713 713
714 do_exit(SIGSEGV); 714 do_exit(SIGSEGV);
715 } 715 }
716 716
717 static __kprobes void default_do_nmi(struct pt_regs * regs) 717 static __kprobes void default_do_nmi(struct pt_regs * regs)
718 { 718 {
719 unsigned char reason = 0; 719 unsigned char reason = 0;
720 720
721 /* Only the BSP gets external NMIs from the system. */ 721 /* Only the BSP gets external NMIs from the system. */
722 if (!smp_processor_id()) 722 if (!smp_processor_id())
723 reason = get_nmi_reason(); 723 reason = get_nmi_reason();
724 724
725 if (!(reason & 0xc0)) { 725 if (!(reason & 0xc0)) {
726 if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) 726 if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
727 == NOTIFY_STOP) 727 == NOTIFY_STOP)
728 return; 728 return;
729 #ifdef CONFIG_X86_LOCAL_APIC 729 #ifdef CONFIG_X86_LOCAL_APIC
730 /* 730 /*
731 * Ok, so this is none of the documented NMI sources, 731 * Ok, so this is none of the documented NMI sources,
732 * so it must be the NMI watchdog. 732 * so it must be the NMI watchdog.
733 */ 733 */
734 if (nmi_watchdog_tick(regs, reason)) 734 if (nmi_watchdog_tick(regs, reason))
735 return; 735 return;
736 #endif
737 if (notify_die(DIE_NMI_POST, "nmi_post", regs, reason, 2, 0)
738 == NOTIFY_STOP)
739 return;
740 #ifdef CONFIG_X86_LOCAL_APIC
741 if (!do_nmi_callback(regs, smp_processor_id())) 736 if (!do_nmi_callback(regs, smp_processor_id()))
742 #endif 737 #endif
743 unknown_nmi_error(reason, regs); 738 unknown_nmi_error(reason, regs);
744 739
745 return; 740 return;
746 } 741 }
747 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) 742 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
748 return; 743 return;
749 if (reason & 0x80) 744 if (reason & 0x80)
750 mem_parity_error(reason, regs); 745 mem_parity_error(reason, regs);
751 if (reason & 0x40) 746 if (reason & 0x40)
752 io_check_error(reason, regs); 747 io_check_error(reason, regs);
753 /* 748 /*
754 * Reassert NMI in case it became active meanwhile 749 * Reassert NMI in case it became active meanwhile
755 * as it's edge-triggered. 750 * as it's edge-triggered.
756 */ 751 */
757 reassert_nmi(); 752 reassert_nmi();
758 } 753 }
759 754
760 fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code) 755 fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code)
761 { 756 {
762 int cpu; 757 int cpu;
763 758
764 nmi_enter(); 759 nmi_enter();
765 760
766 cpu = smp_processor_id(); 761 cpu = smp_processor_id();
767 762
768 ++nmi_count(cpu); 763 ++nmi_count(cpu);
769 764
770 default_do_nmi(regs); 765 default_do_nmi(regs);
771 766
772 nmi_exit(); 767 nmi_exit();
773 } 768 }
774 769
775 #ifdef CONFIG_KPROBES 770 #ifdef CONFIG_KPROBES
776 fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code) 771 fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code)
777 { 772 {
778 if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) 773 if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
779 == NOTIFY_STOP) 774 == NOTIFY_STOP)
780 return; 775 return;
781 /* This is an interrupt gate, because kprobes wants interrupts 776 /* This is an interrupt gate, because kprobes wants interrupts
782 disabled. Normal trap handlers don't. */ 777 disabled. Normal trap handlers don't. */
783 restore_interrupts(regs); 778 restore_interrupts(regs);
784 do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL); 779 do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL);
785 } 780 }
786 #endif 781 #endif
787 782
788 /* 783 /*
789 * Our handling of the processor debug registers is non-trivial. 784 * Our handling of the processor debug registers is non-trivial.
790 * We do not clear them on entry and exit from the kernel. Therefore 785 * We do not clear them on entry and exit from the kernel. Therefore
791 * it is possible to get a watchpoint trap here from inside the kernel. 786 * it is possible to get a watchpoint trap here from inside the kernel.
792 * However, the code in ./ptrace.c has ensured that the user can 787 * However, the code in ./ptrace.c has ensured that the user can
793 * only set watchpoints on userspace addresses. Therefore the in-kernel 788 * only set watchpoints on userspace addresses. Therefore the in-kernel
794 * watchpoint trap can only occur in code which is reading/writing 789 * watchpoint trap can only occur in code which is reading/writing
795 * from user space. Such code must not hold kernel locks (since it 790 * from user space. Such code must not hold kernel locks (since it
796 * can equally take a page fault), therefore it is safe to call 791 * can equally take a page fault), therefore it is safe to call
797 * force_sig_info even though that claims and releases locks. 792 * force_sig_info even though that claims and releases locks.
798 * 793 *
799 * Code in ./signal.c ensures that the debug control register 794 * Code in ./signal.c ensures that the debug control register
800 * is restored before we deliver any signal, and therefore that 795 * is restored before we deliver any signal, and therefore that
801 * user code runs with the correct debug control register even though 796 * user code runs with the correct debug control register even though
802 * we clear it here. 797 * we clear it here.
803 * 798 *
804 * Being careful here means that we don't have to be as careful in a 799 * Being careful here means that we don't have to be as careful in a
805 * lot of more complicated places (task switching can be a bit lazy 800 * lot of more complicated places (task switching can be a bit lazy
806 * about restoring all the debug state, and ptrace doesn't have to 801 * about restoring all the debug state, and ptrace doesn't have to
807 * find every occurrence of the TF bit that could be saved away even 802 * find every occurrence of the TF bit that could be saved away even
808 * by user code) 803 * by user code)
809 */ 804 */
810 fastcall void __kprobes do_debug(struct pt_regs * regs, long error_code) 805 fastcall void __kprobes do_debug(struct pt_regs * regs, long error_code)
811 { 806 {
812 unsigned int condition; 807 unsigned int condition;
813 struct task_struct *tsk = current; 808 struct task_struct *tsk = current;
814 809
815 get_debugreg(condition, 6); 810 get_debugreg(condition, 6);
816 811
817 if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, 812 if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
818 SIGTRAP) == NOTIFY_STOP) 813 SIGTRAP) == NOTIFY_STOP)
819 return; 814 return;
820 /* It's safe to allow irq's after DR6 has been saved */ 815 /* It's safe to allow irq's after DR6 has been saved */
821 if (regs->eflags & X86_EFLAGS_IF) 816 if (regs->eflags & X86_EFLAGS_IF)
822 local_irq_enable(); 817 local_irq_enable();
823 818
824 /* Mask out spurious debug traps due to lazy DR7 setting */ 819 /* Mask out spurious debug traps due to lazy DR7 setting */
825 if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { 820 if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
826 if (!tsk->thread.debugreg[7]) 821 if (!tsk->thread.debugreg[7])
827 goto clear_dr7; 822 goto clear_dr7;
828 } 823 }
829 824
830 if (regs->eflags & VM_MASK) 825 if (regs->eflags & VM_MASK)
831 goto debug_vm86; 826 goto debug_vm86;
832 827
833 /* Save debug status register where ptrace can see it */ 828 /* Save debug status register where ptrace can see it */
834 tsk->thread.debugreg[6] = condition; 829 tsk->thread.debugreg[6] = condition;
835 830
836 /* 831 /*
837 * Single-stepping through TF: make sure we ignore any events in 832 * Single-stepping through TF: make sure we ignore any events in
838 * kernel space (but re-enable TF when returning to user mode). 833 * kernel space (but re-enable TF when returning to user mode).
839 */ 834 */
840 if (condition & DR_STEP) { 835 if (condition & DR_STEP) {
841 /* 836 /*
842 * We already checked v86 mode above, so we can 837 * We already checked v86 mode above, so we can
843 * check for kernel mode by just checking the CPL 838 * check for kernel mode by just checking the CPL
844 * of CS. 839 * of CS.
845 */ 840 */
846 if (!user_mode(regs)) 841 if (!user_mode(regs))
847 goto clear_TF_reenable; 842 goto clear_TF_reenable;
848 } 843 }
849 844
850 /* Ok, finally something we can handle */ 845 /* Ok, finally something we can handle */
851 send_sigtrap(tsk, regs, error_code); 846 send_sigtrap(tsk, regs, error_code);
852 847
853 /* Disable additional traps. They'll be re-enabled when 848 /* Disable additional traps. They'll be re-enabled when
854 * the signal is delivered. 849 * the signal is delivered.
855 */ 850 */
856 clear_dr7: 851 clear_dr7:
857 set_debugreg(0, 7); 852 set_debugreg(0, 7);
858 return; 853 return;
859 854
860 debug_vm86: 855 debug_vm86:
861 handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); 856 handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
862 return; 857 return;
863 858
864 clear_TF_reenable: 859 clear_TF_reenable:
865 set_tsk_thread_flag(tsk, TIF_SINGLESTEP); 860 set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
866 regs->eflags &= ~TF_MASK; 861 regs->eflags &= ~TF_MASK;
867 return; 862 return;
868 } 863 }
869 864
870 /* 865 /*
871 * Note that we play around with the 'TS' bit in an attempt to get 866 * Note that we play around with the 'TS' bit in an attempt to get
872 * the correct behaviour even in the presence of the asynchronous 867 * the correct behaviour even in the presence of the asynchronous
873 * IRQ13 behaviour 868 * IRQ13 behaviour
874 */ 869 */
875 void math_error(void __user *eip) 870 void math_error(void __user *eip)
876 { 871 {
877 struct task_struct * task; 872 struct task_struct * task;
878 siginfo_t info; 873 siginfo_t info;
879 unsigned short cwd, swd; 874 unsigned short cwd, swd;
880 875
881 /* 876 /*
882 * Save the info for the exception handler and clear the error. 877 * Save the info for the exception handler and clear the error.
883 */ 878 */
884 task = current; 879 task = current;
885 save_init_fpu(task); 880 save_init_fpu(task);
886 task->thread.trap_no = 16; 881 task->thread.trap_no = 16;
887 task->thread.error_code = 0; 882 task->thread.error_code = 0;
888 info.si_signo = SIGFPE; 883 info.si_signo = SIGFPE;
889 info.si_errno = 0; 884 info.si_errno = 0;
890 info.si_code = __SI_FAULT; 885 info.si_code = __SI_FAULT;
891 info.si_addr = eip; 886 info.si_addr = eip;
892 /* 887 /*
893 * (~cwd & swd) will mask out exceptions that are not set to unmasked 888 * (~cwd & swd) will mask out exceptions that are not set to unmasked
894 * status. 0x3f is the exception bits in these regs, 0x200 is the 889 * status. 0x3f is the exception bits in these regs, 0x200 is the
895 * C1 reg you need in case of a stack fault, 0x040 is the stack 890 * C1 reg you need in case of a stack fault, 0x040 is the stack
896 * fault bit. We should only be taking one exception at a time, 891 * fault bit. We should only be taking one exception at a time,
897 * so if this combination doesn't produce any single exception, 892 * so if this combination doesn't produce any single exception,
898 * then we have a bad program that isn't syncronizing its FPU usage 893 * then we have a bad program that isn't syncronizing its FPU usage
899 * and it will suffer the consequences since we won't be able to 894 * and it will suffer the consequences since we won't be able to
900 * fully reproduce the context of the exception 895 * fully reproduce the context of the exception
901 */ 896 */
902 cwd = get_fpu_cwd(task); 897 cwd = get_fpu_cwd(task);
903 swd = get_fpu_swd(task); 898 swd = get_fpu_swd(task);
904 switch (swd & ~cwd & 0x3f) { 899 switch (swd & ~cwd & 0x3f) {
905 case 0x000: /* No unmasked exception */ 900 case 0x000: /* No unmasked exception */
906 return; 901 return;
907 default: /* Multiple exceptions */ 902 default: /* Multiple exceptions */
908 break; 903 break;
909 case 0x001: /* Invalid Op */ 904 case 0x001: /* Invalid Op */
910 /* 905 /*
911 * swd & 0x240 == 0x040: Stack Underflow 906 * swd & 0x240 == 0x040: Stack Underflow
912 * swd & 0x240 == 0x240: Stack Overflow 907 * swd & 0x240 == 0x240: Stack Overflow
913 * User must clear the SF bit (0x40) if set 908 * User must clear the SF bit (0x40) if set
914 */ 909 */
915 info.si_code = FPE_FLTINV; 910 info.si_code = FPE_FLTINV;
916 break; 911 break;
917 case 0x002: /* Denormalize */ 912 case 0x002: /* Denormalize */
918 case 0x010: /* Underflow */ 913 case 0x010: /* Underflow */
919 info.si_code = FPE_FLTUND; 914 info.si_code = FPE_FLTUND;
920 break; 915 break;
921 case 0x004: /* Zero Divide */ 916 case 0x004: /* Zero Divide */
922 info.si_code = FPE_FLTDIV; 917 info.si_code = FPE_FLTDIV;
923 break; 918 break;
924 case 0x008: /* Overflow */ 919 case 0x008: /* Overflow */
925 info.si_code = FPE_FLTOVF; 920 info.si_code = FPE_FLTOVF;
926 break; 921 break;
927 case 0x020: /* Precision */ 922 case 0x020: /* Precision */
928 info.si_code = FPE_FLTRES; 923 info.si_code = FPE_FLTRES;
929 break; 924 break;
930 } 925 }
931 force_sig_info(SIGFPE, &info, task); 926 force_sig_info(SIGFPE, &info, task);
932 } 927 }
933 928
934 fastcall void do_coprocessor_error(struct pt_regs * regs, long error_code) 929 fastcall void do_coprocessor_error(struct pt_regs * regs, long error_code)
935 { 930 {
936 ignore_fpu_irq = 1; 931 ignore_fpu_irq = 1;
937 math_error((void __user *)regs->eip); 932 math_error((void __user *)regs->eip);
938 } 933 }
939 934
940 static void simd_math_error(void __user *eip) 935 static void simd_math_error(void __user *eip)
941 { 936 {
942 struct task_struct * task; 937 struct task_struct * task;
943 siginfo_t info; 938 siginfo_t info;
944 unsigned short mxcsr; 939 unsigned short mxcsr;
945 940
946 /* 941 /*
947 * Save the info for the exception handler and clear the error. 942 * Save the info for the exception handler and clear the error.
948 */ 943 */
949 task = current; 944 task = current;
950 save_init_fpu(task); 945 save_init_fpu(task);
951 task->thread.trap_no = 19; 946 task->thread.trap_no = 19;
952 task->thread.error_code = 0; 947 task->thread.error_code = 0;
953 info.si_signo = SIGFPE; 948 info.si_signo = SIGFPE;
954 info.si_errno = 0; 949 info.si_errno = 0;
955 info.si_code = __SI_FAULT; 950 info.si_code = __SI_FAULT;
956 info.si_addr = eip; 951 info.si_addr = eip;
957 /* 952 /*
958 * The SIMD FPU exceptions are handled a little differently, as there 953 * The SIMD FPU exceptions are handled a little differently, as there
959 * is only a single status/control register. Thus, to determine which 954 * is only a single status/control register. Thus, to determine which
960 * unmasked exception was caught we must mask the exception mask bits 955 * unmasked exception was caught we must mask the exception mask bits
961 * at 0x1f80, and then use these to mask the exception bits at 0x3f. 956 * at 0x1f80, and then use these to mask the exception bits at 0x3f.
962 */ 957 */
963 mxcsr = get_fpu_mxcsr(task); 958 mxcsr = get_fpu_mxcsr(task);
964 switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { 959 switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
965 case 0x000: 960 case 0x000:
966 default: 961 default:
967 break; 962 break;
968 case 0x001: /* Invalid Op */ 963 case 0x001: /* Invalid Op */
969 info.si_code = FPE_FLTINV; 964 info.si_code = FPE_FLTINV;
970 break; 965 break;
971 case 0x002: /* Denormalize */ 966 case 0x002: /* Denormalize */
972 case 0x010: /* Underflow */ 967 case 0x010: /* Underflow */
973 info.si_code = FPE_FLTUND; 968 info.si_code = FPE_FLTUND;
974 break; 969 break;
975 case 0x004: /* Zero Divide */ 970 case 0x004: /* Zero Divide */
976 info.si_code = FPE_FLTDIV; 971 info.si_code = FPE_FLTDIV;
977 break; 972 break;
978 case 0x008: /* Overflow */ 973 case 0x008: /* Overflow */
979 info.si_code = FPE_FLTOVF; 974 info.si_code = FPE_FLTOVF;
980 break; 975 break;
981 case 0x020: /* Precision */ 976 case 0x020: /* Precision */
982 info.si_code = FPE_FLTRES; 977 info.si_code = FPE_FLTRES;
983 break; 978 break;
984 } 979 }
985 force_sig_info(SIGFPE, &info, task); 980 force_sig_info(SIGFPE, &info, task);
986 } 981 }
987 982
988 fastcall void do_simd_coprocessor_error(struct pt_regs * regs, 983 fastcall void do_simd_coprocessor_error(struct pt_regs * regs,
989 long error_code) 984 long error_code)
990 { 985 {
991 if (cpu_has_xmm) { 986 if (cpu_has_xmm) {
992 /* Handle SIMD FPU exceptions on PIII+ processors. */ 987 /* Handle SIMD FPU exceptions on PIII+ processors. */
993 ignore_fpu_irq = 1; 988 ignore_fpu_irq = 1;
994 simd_math_error((void __user *)regs->eip); 989 simd_math_error((void __user *)regs->eip);
995 } else { 990 } else {
996 /* 991 /*
997 * Handle strange cache flush from user space exception 992 * Handle strange cache flush from user space exception
998 * in all other cases. This is undocumented behaviour. 993 * in all other cases. This is undocumented behaviour.
999 */ 994 */
1000 if (regs->eflags & VM_MASK) { 995 if (regs->eflags & VM_MASK) {
1001 handle_vm86_fault((struct kernel_vm86_regs *)regs, 996 handle_vm86_fault((struct kernel_vm86_regs *)regs,
1002 error_code); 997 error_code);
1003 return; 998 return;
1004 } 999 }
1005 current->thread.trap_no = 19; 1000 current->thread.trap_no = 19;
1006 current->thread.error_code = error_code; 1001 current->thread.error_code = error_code;
1007 die_if_kernel("cache flush denied", regs, error_code); 1002 die_if_kernel("cache flush denied", regs, error_code);
1008 force_sig(SIGSEGV, current); 1003 force_sig(SIGSEGV, current);
1009 } 1004 }
1010 } 1005 }
1011 1006
1012 fastcall void do_spurious_interrupt_bug(struct pt_regs * regs, 1007 fastcall void do_spurious_interrupt_bug(struct pt_regs * regs,
1013 long error_code) 1008 long error_code)
1014 { 1009 {
1015 #if 0 1010 #if 0
1016 /* No need to warn about this any longer. */ 1011 /* No need to warn about this any longer. */
1017 printk("Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); 1012 printk("Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
1018 #endif 1013 #endif
1019 } 1014 }
1020 1015
1021 fastcall unsigned long patch_espfix_desc(unsigned long uesp, 1016 fastcall unsigned long patch_espfix_desc(unsigned long uesp,
1022 unsigned long kesp) 1017 unsigned long kesp)
1023 { 1018 {
1024 struct desc_struct *gdt = __get_cpu_var(gdt_page).gdt; 1019 struct desc_struct *gdt = __get_cpu_var(gdt_page).gdt;
1025 unsigned long base = (kesp - uesp) & -THREAD_SIZE; 1020 unsigned long base = (kesp - uesp) & -THREAD_SIZE;
1026 unsigned long new_kesp = kesp - base; 1021 unsigned long new_kesp = kesp - base;
1027 unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; 1022 unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
1028 __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS]; 1023 __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS];
1029 /* Set up base for espfix segment */ 1024 /* Set up base for espfix segment */
1030 desc &= 0x00f0ff0000000000ULL; 1025 desc &= 0x00f0ff0000000000ULL;
1031 desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) | 1026 desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) |
1032 ((((__u64)base) << 32) & 0xff00000000000000ULL) | 1027 ((((__u64)base) << 32) & 0xff00000000000000ULL) |
1033 ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) | 1028 ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) |
1034 (lim_pages & 0xffff); 1029 (lim_pages & 0xffff);
1035 *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc; 1030 *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc;
1036 return new_kesp; 1031 return new_kesp;
1037 } 1032 }
1038 1033
1039 /* 1034 /*
1040 * 'math_state_restore()' saves the current math information in the 1035 * 'math_state_restore()' saves the current math information in the
1041 * old math state array, and gets the new ones from the current task 1036 * old math state array, and gets the new ones from the current task
1042 * 1037 *
1043 * Careful.. There are problems with IBM-designed IRQ13 behaviour. 1038 * Careful.. There are problems with IBM-designed IRQ13 behaviour.
1044 * Don't touch unless you *really* know how it works. 1039 * Don't touch unless you *really* know how it works.
1045 * 1040 *
1046 * Must be called with kernel preemption disabled (in this case, 1041 * Must be called with kernel preemption disabled (in this case,
1047 * local interrupts are disabled at the call-site in entry.S). 1042 * local interrupts are disabled at the call-site in entry.S).
1048 */ 1043 */
1049 asmlinkage void math_state_restore(void) 1044 asmlinkage void math_state_restore(void)
1050 { 1045 {
1051 struct thread_info *thread = current_thread_info(); 1046 struct thread_info *thread = current_thread_info();
1052 struct task_struct *tsk = thread->task; 1047 struct task_struct *tsk = thread->task;
1053 1048
1054 clts(); /* Allow maths ops (or we recurse) */ 1049 clts(); /* Allow maths ops (or we recurse) */
1055 if (!tsk_used_math(tsk)) 1050 if (!tsk_used_math(tsk))
1056 init_fpu(tsk); 1051 init_fpu(tsk);
1057 restore_fpu(tsk); 1052 restore_fpu(tsk);
1058 thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ 1053 thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
1059 tsk->fpu_counter++; 1054 tsk->fpu_counter++;
1060 } 1055 }
1061 1056
1062 #ifndef CONFIG_MATH_EMULATION 1057 #ifndef CONFIG_MATH_EMULATION
1063 1058
1064 asmlinkage void math_emulate(long arg) 1059 asmlinkage void math_emulate(long arg)
1065 { 1060 {
1066 printk(KERN_EMERG "math-emulation not enabled and no coprocessor found.\n"); 1061 printk(KERN_EMERG "math-emulation not enabled and no coprocessor found.\n");
1067 printk(KERN_EMERG "killing %s.\n",current->comm); 1062 printk(KERN_EMERG "killing %s.\n",current->comm);
1068 force_sig(SIGFPE,current); 1063 force_sig(SIGFPE,current);
1069 schedule(); 1064 schedule();
1070 } 1065 }
1071 1066
1072 #endif /* CONFIG_MATH_EMULATION */ 1067 #endif /* CONFIG_MATH_EMULATION */
1073 1068
1074 #ifdef CONFIG_X86_F00F_BUG 1069 #ifdef CONFIG_X86_F00F_BUG
1075 void __init trap_init_f00f_bug(void) 1070 void __init trap_init_f00f_bug(void)
1076 { 1071 {
1077 __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); 1072 __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO);
1078 1073
1079 /* 1074 /*
1080 * Update the IDT descriptor and reload the IDT so that 1075 * Update the IDT descriptor and reload the IDT so that
1081 * it uses the read-only mapped virtual address. 1076 * it uses the read-only mapped virtual address.
1082 */ 1077 */
1083 idt_descr.address = fix_to_virt(FIX_F00F_IDT); 1078 idt_descr.address = fix_to_virt(FIX_F00F_IDT);
1084 load_idt(&idt_descr); 1079 load_idt(&idt_descr);
1085 } 1080 }
1086 #endif 1081 #endif
1087 1082
1088 /* 1083 /*
1089 * This needs to use 'idt_table' rather than 'idt', and 1084 * This needs to use 'idt_table' rather than 'idt', and
1090 * thus use the _nonmapped_ version of the IDT, as the 1085 * thus use the _nonmapped_ version of the IDT, as the
1091 * Pentium F0 0F bugfix can have resulted in the mapped 1086 * Pentium F0 0F bugfix can have resulted in the mapped
1092 * IDT being write-protected. 1087 * IDT being write-protected.
1093 */ 1088 */
1094 void set_intr_gate(unsigned int n, void *addr) 1089 void set_intr_gate(unsigned int n, void *addr)
1095 { 1090 {
1096 _set_gate(n, DESCTYPE_INT, addr, __KERNEL_CS); 1091 _set_gate(n, DESCTYPE_INT, addr, __KERNEL_CS);
1097 } 1092 }
1098 1093
1099 /* 1094 /*
1100 * This routine sets up an interrupt gate at directory privilege level 3. 1095 * This routine sets up an interrupt gate at directory privilege level 3.
1101 */ 1096 */
1102 static inline void set_system_intr_gate(unsigned int n, void *addr) 1097 static inline void set_system_intr_gate(unsigned int n, void *addr)
1103 { 1098 {
1104 _set_gate(n, DESCTYPE_INT | DESCTYPE_DPL3, addr, __KERNEL_CS); 1099 _set_gate(n, DESCTYPE_INT | DESCTYPE_DPL3, addr, __KERNEL_CS);
1105 } 1100 }
1106 1101
1107 static void __init set_trap_gate(unsigned int n, void *addr) 1102 static void __init set_trap_gate(unsigned int n, void *addr)
1108 { 1103 {
1109 _set_gate(n, DESCTYPE_TRAP, addr, __KERNEL_CS); 1104 _set_gate(n, DESCTYPE_TRAP, addr, __KERNEL_CS);
1110 } 1105 }
1111 1106
1112 static void __init set_system_gate(unsigned int n, void *addr) 1107 static void __init set_system_gate(unsigned int n, void *addr)
1113 { 1108 {
1114 _set_gate(n, DESCTYPE_TRAP | DESCTYPE_DPL3, addr, __KERNEL_CS); 1109 _set_gate(n, DESCTYPE_TRAP | DESCTYPE_DPL3, addr, __KERNEL_CS);
1115 } 1110 }
1116 1111
1117 static void __init set_task_gate(unsigned int n, unsigned int gdt_entry) 1112 static void __init set_task_gate(unsigned int n, unsigned int gdt_entry)
1118 { 1113 {
1119 _set_gate(n, DESCTYPE_TASK, (void *)0, (gdt_entry<<3)); 1114 _set_gate(n, DESCTYPE_TASK, (void *)0, (gdt_entry<<3));
1120 } 1115 }
1121 1116
1122 1117
1123 void __init trap_init(void) 1118 void __init trap_init(void)
1124 { 1119 {
1125 #ifdef CONFIG_EISA 1120 #ifdef CONFIG_EISA
1126 void __iomem *p = ioremap(0x0FFFD9, 4); 1121 void __iomem *p = ioremap(0x0FFFD9, 4);
1127 if (readl(p) == 'E'+('I'<<8)+('S'<<16)+('A'<<24)) { 1122 if (readl(p) == 'E'+('I'<<8)+('S'<<16)+('A'<<24)) {
1128 EISA_bus = 1; 1123 EISA_bus = 1;
1129 } 1124 }
1130 iounmap(p); 1125 iounmap(p);
1131 #endif 1126 #endif
1132 1127
1133 #ifdef CONFIG_X86_LOCAL_APIC 1128 #ifdef CONFIG_X86_LOCAL_APIC
1134 init_apic_mappings(); 1129 init_apic_mappings();
1135 #endif 1130 #endif
1136 1131
1137 set_trap_gate(0,&divide_error); 1132 set_trap_gate(0,&divide_error);
1138 set_intr_gate(1,&debug); 1133 set_intr_gate(1,&debug);
1139 set_intr_gate(2,&nmi); 1134 set_intr_gate(2,&nmi);
1140 set_system_intr_gate(3, &int3); /* int3/4 can be called from all */ 1135 set_system_intr_gate(3, &int3); /* int3/4 can be called from all */
1141 set_system_gate(4,&overflow); 1136 set_system_gate(4,&overflow);
1142 set_trap_gate(5,&bounds); 1137 set_trap_gate(5,&bounds);
1143 set_trap_gate(6,&invalid_op); 1138 set_trap_gate(6,&invalid_op);
1144 set_trap_gate(7,&device_not_available); 1139 set_trap_gate(7,&device_not_available);
1145 set_task_gate(8,GDT_ENTRY_DOUBLEFAULT_TSS); 1140 set_task_gate(8,GDT_ENTRY_DOUBLEFAULT_TSS);
1146 set_trap_gate(9,&coprocessor_segment_overrun); 1141 set_trap_gate(9,&coprocessor_segment_overrun);
1147 set_trap_gate(10,&invalid_TSS); 1142 set_trap_gate(10,&invalid_TSS);
1148 set_trap_gate(11,&segment_not_present); 1143 set_trap_gate(11,&segment_not_present);
1149 set_trap_gate(12,&stack_segment); 1144 set_trap_gate(12,&stack_segment);
1150 set_trap_gate(13,&general_protection); 1145 set_trap_gate(13,&general_protection);
1151 set_intr_gate(14,&page_fault); 1146 set_intr_gate(14,&page_fault);
1152 set_trap_gate(15,&spurious_interrupt_bug); 1147 set_trap_gate(15,&spurious_interrupt_bug);
1153 set_trap_gate(16,&coprocessor_error); 1148 set_trap_gate(16,&coprocessor_error);
1154 set_trap_gate(17,&alignment_check); 1149 set_trap_gate(17,&alignment_check);
1155 #ifdef CONFIG_X86_MCE 1150 #ifdef CONFIG_X86_MCE
1156 set_trap_gate(18,&machine_check); 1151 set_trap_gate(18,&machine_check);
1157 #endif 1152 #endif
1158 set_trap_gate(19,&simd_coprocessor_error); 1153 set_trap_gate(19,&simd_coprocessor_error);
1159 1154
1160 if (cpu_has_fxsr) { 1155 if (cpu_has_fxsr) {
1161 /* 1156 /*
1162 * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned. 1157 * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned.
1163 * Generates a compile-time "error: zero width for bit-field" if 1158 * Generates a compile-time "error: zero width for bit-field" if
1164 * the alignment is wrong. 1159 * the alignment is wrong.
1165 */ 1160 */
1166 struct fxsrAlignAssert { 1161 struct fxsrAlignAssert {
1167 int _:!(offsetof(struct task_struct, 1162 int _:!(offsetof(struct task_struct,
1168 thread.i387.fxsave) & 15); 1163 thread.i387.fxsave) & 15);
1169 }; 1164 };
1170 1165
1171 printk(KERN_INFO "Enabling fast FPU save and restore... "); 1166 printk(KERN_INFO "Enabling fast FPU save and restore... ");
1172 set_in_cr4(X86_CR4_OSFXSR); 1167 set_in_cr4(X86_CR4_OSFXSR);
1173 printk("done.\n"); 1168 printk("done.\n");
1174 } 1169 }
1175 if (cpu_has_xmm) { 1170 if (cpu_has_xmm) {
1176 printk(KERN_INFO "Enabling unmasked SIMD FPU exception " 1171 printk(KERN_INFO "Enabling unmasked SIMD FPU exception "
1177 "support... "); 1172 "support... ");
1178 set_in_cr4(X86_CR4_OSXMMEXCPT); 1173 set_in_cr4(X86_CR4_OSXMMEXCPT);
1179 printk("done.\n"); 1174 printk("done.\n");
1180 } 1175 }
1181 1176
1182 set_system_gate(SYSCALL_VECTOR,&system_call); 1177 set_system_gate(SYSCALL_VECTOR,&system_call);
1183 1178
1184 /* 1179 /*
1185 * Should be a barrier for any external CPU state. 1180 * Should be a barrier for any external CPU state.
1186 */ 1181 */
1187 cpu_init(); 1182 cpu_init();
1188 1183
1189 trap_init_hook(); 1184 trap_init_hook();
1190 } 1185 }
1191 1186
1192 static int __init kstack_setup(char *s) 1187 static int __init kstack_setup(char *s)
1193 { 1188 {
1194 kstack_depth_to_print = simple_strtoul(s, NULL, 0); 1189 kstack_depth_to_print = simple_strtoul(s, NULL, 0);
1195 return 1; 1190 return 1;
1196 } 1191 }
1197 __setup("kstack=", kstack_setup); 1192 __setup("kstack=", kstack_setup);
1198 1193
1199 static int __init code_bytes_setup(char *s) 1194 static int __init code_bytes_setup(char *s)
1200 { 1195 {
1201 code_bytes = simple_strtoul(s, NULL, 0); 1196 code_bytes = simple_strtoul(s, NULL, 0);
1202 if (code_bytes > 8192) 1197 if (code_bytes > 8192)
1203 code_bytes = 8192; 1198 code_bytes = 8192;
1204 1199
1205 return 1; 1200 return 1;
1206 } 1201 }
1207 __setup("code_bytes=", code_bytes_setup); 1202 __setup("code_bytes=", code_bytes_setup);
1208 1203
arch/x86_64/kernel/traps.c
1 /* 1 /*
2 * linux/arch/x86-64/traps.c 2 * linux/arch/x86-64/traps.c
3 * 3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds 4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs 5 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
6 * 6 *
7 * Pentium III FXSR, SSE support 7 * Pentium III FXSR, SSE support
8 * Gareth Hughes <gareth@valinux.com>, May 2000 8 * Gareth Hughes <gareth@valinux.com>, May 2000
9 */ 9 */
10 10
11 /* 11 /*
12 * 'Traps.c' handles hardware traps and faults after we have saved some 12 * 'Traps.c' handles hardware traps and faults after we have saved some
13 * state in 'entry.S'. 13 * state in 'entry.S'.
14 */ 14 */
15 #include <linux/sched.h> 15 #include <linux/sched.h>
16 #include <linux/kernel.h> 16 #include <linux/kernel.h>
17 #include <linux/string.h> 17 #include <linux/string.h>
18 #include <linux/errno.h> 18 #include <linux/errno.h>
19 #include <linux/ptrace.h> 19 #include <linux/ptrace.h>
20 #include <linux/timer.h> 20 #include <linux/timer.h>
21 #include <linux/mm.h> 21 #include <linux/mm.h>
22 #include <linux/init.h> 22 #include <linux/init.h>
23 #include <linux/delay.h> 23 #include <linux/delay.h>
24 #include <linux/spinlock.h> 24 #include <linux/spinlock.h>
25 #include <linux/interrupt.h> 25 #include <linux/interrupt.h>
26 #include <linux/kallsyms.h> 26 #include <linux/kallsyms.h>
27 #include <linux/module.h> 27 #include <linux/module.h>
28 #include <linux/moduleparam.h> 28 #include <linux/moduleparam.h>
29 #include <linux/nmi.h> 29 #include <linux/nmi.h>
30 #include <linux/kprobes.h> 30 #include <linux/kprobes.h>
31 #include <linux/kexec.h> 31 #include <linux/kexec.h>
32 #include <linux/unwind.h> 32 #include <linux/unwind.h>
33 #include <linux/uaccess.h> 33 #include <linux/uaccess.h>
34 #include <linux/bug.h> 34 #include <linux/bug.h>
35 #include <linux/kdebug.h> 35 #include <linux/kdebug.h>
36 36
37 #include <asm/system.h> 37 #include <asm/system.h>
38 #include <asm/io.h> 38 #include <asm/io.h>
39 #include <asm/atomic.h> 39 #include <asm/atomic.h>
40 #include <asm/debugreg.h> 40 #include <asm/debugreg.h>
41 #include <asm/desc.h> 41 #include <asm/desc.h>
42 #include <asm/i387.h> 42 #include <asm/i387.h>
43 #include <asm/processor.h> 43 #include <asm/processor.h>
44 #include <asm/unwind.h> 44 #include <asm/unwind.h>
45 #include <asm/smp.h> 45 #include <asm/smp.h>
46 #include <asm/pgalloc.h> 46 #include <asm/pgalloc.h>
47 #include <asm/pda.h> 47 #include <asm/pda.h>
48 #include <asm/proto.h> 48 #include <asm/proto.h>
49 #include <asm/nmi.h> 49 #include <asm/nmi.h>
50 #include <asm/stacktrace.h> 50 #include <asm/stacktrace.h>
51 51
52 asmlinkage void divide_error(void); 52 asmlinkage void divide_error(void);
53 asmlinkage void debug(void); 53 asmlinkage void debug(void);
54 asmlinkage void nmi(void); 54 asmlinkage void nmi(void);
55 asmlinkage void int3(void); 55 asmlinkage void int3(void);
56 asmlinkage void overflow(void); 56 asmlinkage void overflow(void);
57 asmlinkage void bounds(void); 57 asmlinkage void bounds(void);
58 asmlinkage void invalid_op(void); 58 asmlinkage void invalid_op(void);
59 asmlinkage void device_not_available(void); 59 asmlinkage void device_not_available(void);
60 asmlinkage void double_fault(void); 60 asmlinkage void double_fault(void);
61 asmlinkage void coprocessor_segment_overrun(void); 61 asmlinkage void coprocessor_segment_overrun(void);
62 asmlinkage void invalid_TSS(void); 62 asmlinkage void invalid_TSS(void);
63 asmlinkage void segment_not_present(void); 63 asmlinkage void segment_not_present(void);
64 asmlinkage void stack_segment(void); 64 asmlinkage void stack_segment(void);
65 asmlinkage void general_protection(void); 65 asmlinkage void general_protection(void);
66 asmlinkage void page_fault(void); 66 asmlinkage void page_fault(void);
67 asmlinkage void coprocessor_error(void); 67 asmlinkage void coprocessor_error(void);
68 asmlinkage void simd_coprocessor_error(void); 68 asmlinkage void simd_coprocessor_error(void);
69 asmlinkage void reserved(void); 69 asmlinkage void reserved(void);
70 asmlinkage void alignment_check(void); 70 asmlinkage void alignment_check(void);
71 asmlinkage void machine_check(void); 71 asmlinkage void machine_check(void);
72 asmlinkage void spurious_interrupt_bug(void); 72 asmlinkage void spurious_interrupt_bug(void);
73 73
74 static inline void conditional_sti(struct pt_regs *regs) 74 static inline void conditional_sti(struct pt_regs *regs)
75 { 75 {
76 if (regs->eflags & X86_EFLAGS_IF) 76 if (regs->eflags & X86_EFLAGS_IF)
77 local_irq_enable(); 77 local_irq_enable();
78 } 78 }
79 79
80 static inline void preempt_conditional_sti(struct pt_regs *regs) 80 static inline void preempt_conditional_sti(struct pt_regs *regs)
81 { 81 {
82 preempt_disable(); 82 preempt_disable();
83 if (regs->eflags & X86_EFLAGS_IF) 83 if (regs->eflags & X86_EFLAGS_IF)
84 local_irq_enable(); 84 local_irq_enable();
85 } 85 }
86 86
87 static inline void preempt_conditional_cli(struct pt_regs *regs) 87 static inline void preempt_conditional_cli(struct pt_regs *regs)
88 { 88 {
89 if (regs->eflags & X86_EFLAGS_IF) 89 if (regs->eflags & X86_EFLAGS_IF)
90 local_irq_disable(); 90 local_irq_disable();
91 /* Make sure to not schedule here because we could be running 91 /* Make sure to not schedule here because we could be running
92 on an exception stack. */ 92 on an exception stack. */
93 preempt_enable_no_resched(); 93 preempt_enable_no_resched();
94 } 94 }
95 95
96 int kstack_depth_to_print = 12; 96 int kstack_depth_to_print = 12;
97 97
98 #ifdef CONFIG_KALLSYMS 98 #ifdef CONFIG_KALLSYMS
99 void printk_address(unsigned long address) 99 void printk_address(unsigned long address)
100 { 100 {
101 unsigned long offset = 0, symsize; 101 unsigned long offset = 0, symsize;
102 const char *symname; 102 const char *symname;
103 char *modname; 103 char *modname;
104 char *delim = ":"; 104 char *delim = ":";
105 char namebuf[128]; 105 char namebuf[128];
106 106
107 symname = kallsyms_lookup(address, &symsize, &offset, 107 symname = kallsyms_lookup(address, &symsize, &offset,
108 &modname, namebuf); 108 &modname, namebuf);
109 if (!symname) { 109 if (!symname) {
110 printk(" [<%016lx>]\n", address); 110 printk(" [<%016lx>]\n", address);
111 return; 111 return;
112 } 112 }
113 if (!modname) 113 if (!modname)
114 modname = delim = ""; 114 modname = delim = "";
115 printk(" [<%016lx>] %s%s%s%s+0x%lx/0x%lx\n", 115 printk(" [<%016lx>] %s%s%s%s+0x%lx/0x%lx\n",
116 address, delim, modname, delim, symname, offset, symsize); 116 address, delim, modname, delim, symname, offset, symsize);
117 } 117 }
118 #else 118 #else
119 void printk_address(unsigned long address) 119 void printk_address(unsigned long address)
120 { 120 {
121 printk(" [<%016lx>]\n", address); 121 printk(" [<%016lx>]\n", address);
122 } 122 }
123 #endif 123 #endif
124 124
125 static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, 125 static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
126 unsigned *usedp, char **idp) 126 unsigned *usedp, char **idp)
127 { 127 {
128 static char ids[][8] = { 128 static char ids[][8] = {
129 [DEBUG_STACK - 1] = "#DB", 129 [DEBUG_STACK - 1] = "#DB",
130 [NMI_STACK - 1] = "NMI", 130 [NMI_STACK - 1] = "NMI",
131 [DOUBLEFAULT_STACK - 1] = "#DF", 131 [DOUBLEFAULT_STACK - 1] = "#DF",
132 [STACKFAULT_STACK - 1] = "#SS", 132 [STACKFAULT_STACK - 1] = "#SS",
133 [MCE_STACK - 1] = "#MC", 133 [MCE_STACK - 1] = "#MC",
134 #if DEBUG_STKSZ > EXCEPTION_STKSZ 134 #if DEBUG_STKSZ > EXCEPTION_STKSZ
135 [N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" 135 [N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
136 #endif 136 #endif
137 }; 137 };
138 unsigned k; 138 unsigned k;
139 139
140 /* 140 /*
141 * Iterate over all exception stacks, and figure out whether 141 * Iterate over all exception stacks, and figure out whether
142 * 'stack' is in one of them: 142 * 'stack' is in one of them:
143 */ 143 */
144 for (k = 0; k < N_EXCEPTION_STACKS; k++) { 144 for (k = 0; k < N_EXCEPTION_STACKS; k++) {
145 unsigned long end = per_cpu(orig_ist, cpu).ist[k]; 145 unsigned long end = per_cpu(orig_ist, cpu).ist[k];
146 /* 146 /*
147 * Is 'stack' above this exception frame's end? 147 * Is 'stack' above this exception frame's end?
148 * If yes then skip to the next frame. 148 * If yes then skip to the next frame.
149 */ 149 */
150 if (stack >= end) 150 if (stack >= end)
151 continue; 151 continue;
152 /* 152 /*
153 * Is 'stack' above this exception frame's start address? 153 * Is 'stack' above this exception frame's start address?
154 * If yes then we found the right frame. 154 * If yes then we found the right frame.
155 */ 155 */
156 if (stack >= end - EXCEPTION_STKSZ) { 156 if (stack >= end - EXCEPTION_STKSZ) {
157 /* 157 /*
158 * Make sure we only iterate through an exception 158 * Make sure we only iterate through an exception
159 * stack once. If it comes up for the second time 159 * stack once. If it comes up for the second time
160 * then there's something wrong going on - just 160 * then there's something wrong going on - just
161 * break out and return NULL: 161 * break out and return NULL:
162 */ 162 */
163 if (*usedp & (1U << k)) 163 if (*usedp & (1U << k))
164 break; 164 break;
165 *usedp |= 1U << k; 165 *usedp |= 1U << k;
166 *idp = ids[k]; 166 *idp = ids[k];
167 return (unsigned long *)end; 167 return (unsigned long *)end;
168 } 168 }
169 /* 169 /*
170 * If this is a debug stack, and if it has a larger size than 170 * If this is a debug stack, and if it has a larger size than
171 * the usual exception stacks, then 'stack' might still 171 * the usual exception stacks, then 'stack' might still
172 * be within the lower portion of the debug stack: 172 * be within the lower portion of the debug stack:
173 */ 173 */
174 #if DEBUG_STKSZ > EXCEPTION_STKSZ 174 #if DEBUG_STKSZ > EXCEPTION_STKSZ
175 if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) { 175 if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) {
176 unsigned j = N_EXCEPTION_STACKS - 1; 176 unsigned j = N_EXCEPTION_STACKS - 1;
177 177
178 /* 178 /*
179 * Black magic. A large debug stack is composed of 179 * Black magic. A large debug stack is composed of
180 * multiple exception stack entries, which we 180 * multiple exception stack entries, which we
181 * iterate through now. Dont look: 181 * iterate through now. Dont look:
182 */ 182 */
183 do { 183 do {
184 ++j; 184 ++j;
185 end -= EXCEPTION_STKSZ; 185 end -= EXCEPTION_STKSZ;
186 ids[j][4] = '1' + (j - N_EXCEPTION_STACKS); 186 ids[j][4] = '1' + (j - N_EXCEPTION_STACKS);
187 } while (stack < end - EXCEPTION_STKSZ); 187 } while (stack < end - EXCEPTION_STKSZ);
188 if (*usedp & (1U << j)) 188 if (*usedp & (1U << j))
189 break; 189 break;
190 *usedp |= 1U << j; 190 *usedp |= 1U << j;
191 *idp = ids[j]; 191 *idp = ids[j];
192 return (unsigned long *)end; 192 return (unsigned long *)end;
193 } 193 }
194 #endif 194 #endif
195 } 195 }
196 return NULL; 196 return NULL;
197 } 197 }
198 198
199 #define MSG(txt) ops->warning(data, txt) 199 #define MSG(txt) ops->warning(data, txt)
200 200
201 /* 201 /*
202 * x86-64 can have upto three kernel stacks: 202 * x86-64 can have upto three kernel stacks:
203 * process stack 203 * process stack
204 * interrupt stack 204 * interrupt stack
205 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack 205 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
206 */ 206 */
207 207
208 static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) 208 static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
209 { 209 {
210 void *t = (void *)tinfo; 210 void *t = (void *)tinfo;
211 return p > t && p < t + THREAD_SIZE - 3; 211 return p > t && p < t + THREAD_SIZE - 3;
212 } 212 }
213 213
214 void dump_trace(struct task_struct *tsk, struct pt_regs *regs, 214 void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
215 unsigned long *stack, 215 unsigned long *stack,
216 struct stacktrace_ops *ops, void *data) 216 struct stacktrace_ops *ops, void *data)
217 { 217 {
218 const unsigned cpu = get_cpu(); 218 const unsigned cpu = get_cpu();
219 unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr; 219 unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr;
220 unsigned used = 0; 220 unsigned used = 0;
221 struct thread_info *tinfo; 221 struct thread_info *tinfo;
222 222
223 if (!tsk) 223 if (!tsk)
224 tsk = current; 224 tsk = current;
225 225
226 if (!stack) { 226 if (!stack) {
227 unsigned long dummy; 227 unsigned long dummy;
228 stack = &dummy; 228 stack = &dummy;
229 if (tsk && tsk != current) 229 if (tsk && tsk != current)
230 stack = (unsigned long *)tsk->thread.rsp; 230 stack = (unsigned long *)tsk->thread.rsp;
231 } 231 }
232 232
233 /* 233 /*
234 * Print function call entries within a stack. 'cond' is the 234 * Print function call entries within a stack. 'cond' is the
235 * "end of stackframe" condition, that the 'stack++' 235 * "end of stackframe" condition, that the 'stack++'
236 * iteration will eventually trigger. 236 * iteration will eventually trigger.
237 */ 237 */
238 #define HANDLE_STACK(cond) \ 238 #define HANDLE_STACK(cond) \
239 do while (cond) { \ 239 do while (cond) { \
240 unsigned long addr = *stack++; \ 240 unsigned long addr = *stack++; \
241 /* Use unlocked access here because except for NMIs \ 241 /* Use unlocked access here because except for NMIs \
242 we should be already protected against module unloads */ \ 242 we should be already protected against module unloads */ \
243 if (__kernel_text_address(addr)) { \ 243 if (__kernel_text_address(addr)) { \
244 /* \ 244 /* \
245 * If the address is either in the text segment of the \ 245 * If the address is either in the text segment of the \
246 * kernel, or in the region which contains vmalloc'ed \ 246 * kernel, or in the region which contains vmalloc'ed \
247 * memory, it *may* be the address of a calling \ 247 * memory, it *may* be the address of a calling \
248 * routine; if so, print it so that someone tracing \ 248 * routine; if so, print it so that someone tracing \
249 * down the cause of the crash will be able to figure \ 249 * down the cause of the crash will be able to figure \
250 * out the call path that was taken. \ 250 * out the call path that was taken. \
251 */ \ 251 */ \
252 ops->address(data, addr); \ 252 ops->address(data, addr); \
253 } \ 253 } \
254 } while (0) 254 } while (0)
255 255
256 /* 256 /*
257 * Print function call entries in all stacks, starting at the 257 * Print function call entries in all stacks, starting at the
258 * current stack address. If the stacks consist of nested 258 * current stack address. If the stacks consist of nested
259 * exceptions 259 * exceptions
260 */ 260 */
261 for (;;) { 261 for (;;) {
262 char *id; 262 char *id;
263 unsigned long *estack_end; 263 unsigned long *estack_end;
264 estack_end = in_exception_stack(cpu, (unsigned long)stack, 264 estack_end = in_exception_stack(cpu, (unsigned long)stack,
265 &used, &id); 265 &used, &id);
266 266
267 if (estack_end) { 267 if (estack_end) {
268 if (ops->stack(data, id) < 0) 268 if (ops->stack(data, id) < 0)
269 break; 269 break;
270 HANDLE_STACK (stack < estack_end); 270 HANDLE_STACK (stack < estack_end);
271 ops->stack(data, "<EOE>"); 271 ops->stack(data, "<EOE>");
272 /* 272 /*
273 * We link to the next stack via the 273 * We link to the next stack via the
274 * second-to-last pointer (index -2 to end) in the 274 * second-to-last pointer (index -2 to end) in the
275 * exception stack: 275 * exception stack:
276 */ 276 */
277 stack = (unsigned long *) estack_end[-2]; 277 stack = (unsigned long *) estack_end[-2];
278 continue; 278 continue;
279 } 279 }
280 if (irqstack_end) { 280 if (irqstack_end) {
281 unsigned long *irqstack; 281 unsigned long *irqstack;
282 irqstack = irqstack_end - 282 irqstack = irqstack_end -
283 (IRQSTACKSIZE - 64) / sizeof(*irqstack); 283 (IRQSTACKSIZE - 64) / sizeof(*irqstack);
284 284
285 if (stack >= irqstack && stack < irqstack_end) { 285 if (stack >= irqstack && stack < irqstack_end) {
286 if (ops->stack(data, "IRQ") < 0) 286 if (ops->stack(data, "IRQ") < 0)
287 break; 287 break;
288 HANDLE_STACK (stack < irqstack_end); 288 HANDLE_STACK (stack < irqstack_end);
289 /* 289 /*
290 * We link to the next stack (which would be 290 * We link to the next stack (which would be
291 * the process stack normally) the last 291 * the process stack normally) the last
292 * pointer (index -1 to end) in the IRQ stack: 292 * pointer (index -1 to end) in the IRQ stack:
293 */ 293 */
294 stack = (unsigned long *) (irqstack_end[-1]); 294 stack = (unsigned long *) (irqstack_end[-1]);
295 irqstack_end = NULL; 295 irqstack_end = NULL;
296 ops->stack(data, "EOI"); 296 ops->stack(data, "EOI");
297 continue; 297 continue;
298 } 298 }
299 } 299 }
300 break; 300 break;
301 } 301 }
302 302
303 /* 303 /*
304 * This handles the process stack: 304 * This handles the process stack:
305 */ 305 */
306 tinfo = task_thread_info(tsk); 306 tinfo = task_thread_info(tsk);
307 HANDLE_STACK (valid_stack_ptr(tinfo, stack)); 307 HANDLE_STACK (valid_stack_ptr(tinfo, stack));
308 #undef HANDLE_STACK 308 #undef HANDLE_STACK
309 put_cpu(); 309 put_cpu();
310 } 310 }
311 EXPORT_SYMBOL(dump_trace); 311 EXPORT_SYMBOL(dump_trace);
312 312
313 static void 313 static void
314 print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) 314 print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
315 { 315 {
316 print_symbol(msg, symbol); 316 print_symbol(msg, symbol);
317 printk("\n"); 317 printk("\n");
318 } 318 }
319 319
320 static void print_trace_warning(void *data, char *msg) 320 static void print_trace_warning(void *data, char *msg)
321 { 321 {
322 printk("%s\n", msg); 322 printk("%s\n", msg);
323 } 323 }
324 324
325 static int print_trace_stack(void *data, char *name) 325 static int print_trace_stack(void *data, char *name)
326 { 326 {
327 printk(" <%s> ", name); 327 printk(" <%s> ", name);
328 return 0; 328 return 0;
329 } 329 }
330 330
331 static void print_trace_address(void *data, unsigned long addr) 331 static void print_trace_address(void *data, unsigned long addr)
332 { 332 {
333 printk_address(addr); 333 printk_address(addr);
334 } 334 }
335 335
336 static struct stacktrace_ops print_trace_ops = { 336 static struct stacktrace_ops print_trace_ops = {
337 .warning = print_trace_warning, 337 .warning = print_trace_warning,
338 .warning_symbol = print_trace_warning_symbol, 338 .warning_symbol = print_trace_warning_symbol,
339 .stack = print_trace_stack, 339 .stack = print_trace_stack,
340 .address = print_trace_address, 340 .address = print_trace_address,
341 }; 341 };
342 342
343 void 343 void
344 show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack) 344 show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack)
345 { 345 {
346 printk("\nCall Trace:\n"); 346 printk("\nCall Trace:\n");
347 dump_trace(tsk, regs, stack, &print_trace_ops, NULL); 347 dump_trace(tsk, regs, stack, &print_trace_ops, NULL);
348 printk("\n"); 348 printk("\n");
349 } 349 }
350 350
351 static void 351 static void
352 _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *rsp) 352 _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *rsp)
353 { 353 {
354 unsigned long *stack; 354 unsigned long *stack;
355 int i; 355 int i;
356 const int cpu = smp_processor_id(); 356 const int cpu = smp_processor_id();
357 unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr); 357 unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr);
358 unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); 358 unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
359 359
360 // debugging aid: "show_stack(NULL, NULL);" prints the 360 // debugging aid: "show_stack(NULL, NULL);" prints the
361 // back trace for this cpu. 361 // back trace for this cpu.
362 362
363 if (rsp == NULL) { 363 if (rsp == NULL) {
364 if (tsk) 364 if (tsk)
365 rsp = (unsigned long *)tsk->thread.rsp; 365 rsp = (unsigned long *)tsk->thread.rsp;
366 else 366 else
367 rsp = (unsigned long *)&rsp; 367 rsp = (unsigned long *)&rsp;
368 } 368 }
369 369
370 stack = rsp; 370 stack = rsp;
371 for(i=0; i < kstack_depth_to_print; i++) { 371 for(i=0; i < kstack_depth_to_print; i++) {
372 if (stack >= irqstack && stack <= irqstack_end) { 372 if (stack >= irqstack && stack <= irqstack_end) {
373 if (stack == irqstack_end) { 373 if (stack == irqstack_end) {
374 stack = (unsigned long *) (irqstack_end[-1]); 374 stack = (unsigned long *) (irqstack_end[-1]);
375 printk(" <EOI> "); 375 printk(" <EOI> ");
376 } 376 }
377 } else { 377 } else {
378 if (((long) stack & (THREAD_SIZE-1)) == 0) 378 if (((long) stack & (THREAD_SIZE-1)) == 0)
379 break; 379 break;
380 } 380 }
381 if (i && ((i % 4) == 0)) 381 if (i && ((i % 4) == 0))
382 printk("\n"); 382 printk("\n");
383 printk(" %016lx", *stack++); 383 printk(" %016lx", *stack++);
384 touch_nmi_watchdog(); 384 touch_nmi_watchdog();
385 } 385 }
386 show_trace(tsk, regs, rsp); 386 show_trace(tsk, regs, rsp);
387 } 387 }
388 388
389 void show_stack(struct task_struct *tsk, unsigned long * rsp) 389 void show_stack(struct task_struct *tsk, unsigned long * rsp)
390 { 390 {
391 _show_stack(tsk, NULL, rsp); 391 _show_stack(tsk, NULL, rsp);
392 } 392 }
393 393
394 /* 394 /*
395 * The architecture-independent dump_stack generator 395 * The architecture-independent dump_stack generator
396 */ 396 */
397 void dump_stack(void) 397 void dump_stack(void)
398 { 398 {
399 unsigned long dummy; 399 unsigned long dummy;
400 show_trace(NULL, NULL, &dummy); 400 show_trace(NULL, NULL, &dummy);
401 } 401 }
402 402
403 EXPORT_SYMBOL(dump_stack); 403 EXPORT_SYMBOL(dump_stack);
404 404
405 void show_registers(struct pt_regs *regs) 405 void show_registers(struct pt_regs *regs)
406 { 406 {
407 int i; 407 int i;
408 int in_kernel = !user_mode(regs); 408 int in_kernel = !user_mode(regs);
409 unsigned long rsp; 409 unsigned long rsp;
410 const int cpu = smp_processor_id(); 410 const int cpu = smp_processor_id();
411 struct task_struct *cur = cpu_pda(cpu)->pcurrent; 411 struct task_struct *cur = cpu_pda(cpu)->pcurrent;
412 412
413 rsp = regs->rsp; 413 rsp = regs->rsp;
414 printk("CPU %d ", cpu); 414 printk("CPU %d ", cpu);
415 __show_regs(regs); 415 __show_regs(regs);
416 printk("Process %s (pid: %d, threadinfo %p, task %p)\n", 416 printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
417 cur->comm, cur->pid, task_thread_info(cur), cur); 417 cur->comm, cur->pid, task_thread_info(cur), cur);
418 418
419 /* 419 /*
420 * When in-kernel, we also print out the stack and code at the 420 * When in-kernel, we also print out the stack and code at the
421 * time of the fault.. 421 * time of the fault..
422 */ 422 */
423 if (in_kernel) { 423 if (in_kernel) {
424 printk("Stack: "); 424 printk("Stack: ");
425 _show_stack(NULL, regs, (unsigned long*)rsp); 425 _show_stack(NULL, regs, (unsigned long*)rsp);
426 426
427 printk("\nCode: "); 427 printk("\nCode: ");
428 if (regs->rip < PAGE_OFFSET) 428 if (regs->rip < PAGE_OFFSET)
429 goto bad; 429 goto bad;
430 430
431 for (i=0; i<20; i++) { 431 for (i=0; i<20; i++) {
432 unsigned char c; 432 unsigned char c;
433 if (__get_user(c, &((unsigned char*)regs->rip)[i])) { 433 if (__get_user(c, &((unsigned char*)regs->rip)[i])) {
434 bad: 434 bad:
435 printk(" Bad RIP value."); 435 printk(" Bad RIP value.");
436 break; 436 break;
437 } 437 }
438 printk("%02x ", c); 438 printk("%02x ", c);
439 } 439 }
440 } 440 }
441 printk("\n"); 441 printk("\n");
442 } 442 }
443 443
444 int is_valid_bugaddr(unsigned long rip) 444 int is_valid_bugaddr(unsigned long rip)
445 { 445 {
446 unsigned short ud2; 446 unsigned short ud2;
447 447
448 if (__copy_from_user(&ud2, (const void __user *) rip, sizeof(ud2))) 448 if (__copy_from_user(&ud2, (const void __user *) rip, sizeof(ud2)))
449 return 0; 449 return 0;
450 450
451 return ud2 == 0x0b0f; 451 return ud2 == 0x0b0f;
452 } 452 }
453 453
454 #ifdef CONFIG_BUG 454 #ifdef CONFIG_BUG
455 void out_of_line_bug(void) 455 void out_of_line_bug(void)
456 { 456 {
457 BUG(); 457 BUG();
458 } 458 }
459 EXPORT_SYMBOL(out_of_line_bug); 459 EXPORT_SYMBOL(out_of_line_bug);
460 #endif 460 #endif
461 461
462 static DEFINE_SPINLOCK(die_lock); 462 static DEFINE_SPINLOCK(die_lock);
463 static int die_owner = -1; 463 static int die_owner = -1;
464 static unsigned int die_nest_count; 464 static unsigned int die_nest_count;
465 465
466 unsigned __kprobes long oops_begin(void) 466 unsigned __kprobes long oops_begin(void)
467 { 467 {
468 int cpu = smp_processor_id(); 468 int cpu = smp_processor_id();
469 unsigned long flags; 469 unsigned long flags;
470 470
471 oops_enter(); 471 oops_enter();
472 472
473 /* racy, but better than risking deadlock. */ 473 /* racy, but better than risking deadlock. */
474 local_irq_save(flags); 474 local_irq_save(flags);
475 if (!spin_trylock(&die_lock)) { 475 if (!spin_trylock(&die_lock)) {
476 if (cpu == die_owner) 476 if (cpu == die_owner)
477 /* nested oops. should stop eventually */; 477 /* nested oops. should stop eventually */;
478 else 478 else
479 spin_lock(&die_lock); 479 spin_lock(&die_lock);
480 } 480 }
481 die_nest_count++; 481 die_nest_count++;
482 die_owner = cpu; 482 die_owner = cpu;
483 console_verbose(); 483 console_verbose();
484 bust_spinlocks(1); 484 bust_spinlocks(1);
485 return flags; 485 return flags;
486 } 486 }
487 487
488 void __kprobes oops_end(unsigned long flags) 488 void __kprobes oops_end(unsigned long flags)
489 { 489 {
490 die_owner = -1; 490 die_owner = -1;
491 bust_spinlocks(0); 491 bust_spinlocks(0);
492 die_nest_count--; 492 die_nest_count--;
493 if (die_nest_count) 493 if (die_nest_count)
494 /* We still own the lock */ 494 /* We still own the lock */
495 local_irq_restore(flags); 495 local_irq_restore(flags);
496 else 496 else
497 /* Nest count reaches zero, release the lock. */ 497 /* Nest count reaches zero, release the lock. */
498 spin_unlock_irqrestore(&die_lock, flags); 498 spin_unlock_irqrestore(&die_lock, flags);
499 if (panic_on_oops) 499 if (panic_on_oops)
500 panic("Fatal exception"); 500 panic("Fatal exception");
501 oops_exit(); 501 oops_exit();
502 } 502 }
503 503
504 void __kprobes __die(const char * str, struct pt_regs * regs, long err) 504 void __kprobes __die(const char * str, struct pt_regs * regs, long err)
505 { 505 {
506 static int die_counter; 506 static int die_counter;
507 printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff,++die_counter); 507 printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff,++die_counter);
508 #ifdef CONFIG_PREEMPT 508 #ifdef CONFIG_PREEMPT
509 printk("PREEMPT "); 509 printk("PREEMPT ");
510 #endif 510 #endif
511 #ifdef CONFIG_SMP 511 #ifdef CONFIG_SMP
512 printk("SMP "); 512 printk("SMP ");
513 #endif 513 #endif
514 #ifdef CONFIG_DEBUG_PAGEALLOC 514 #ifdef CONFIG_DEBUG_PAGEALLOC
515 printk("DEBUG_PAGEALLOC"); 515 printk("DEBUG_PAGEALLOC");
516 #endif 516 #endif
517 printk("\n"); 517 printk("\n");
518 notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV); 518 notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV);
519 show_registers(regs); 519 show_registers(regs);
520 /* Executive summary in case the oops scrolled away */ 520 /* Executive summary in case the oops scrolled away */
521 printk(KERN_ALERT "RIP "); 521 printk(KERN_ALERT "RIP ");
522 printk_address(regs->rip); 522 printk_address(regs->rip);
523 printk(" RSP <%016lx>\n", regs->rsp); 523 printk(" RSP <%016lx>\n", regs->rsp);
524 if (kexec_should_crash(current)) 524 if (kexec_should_crash(current))
525 crash_kexec(regs); 525 crash_kexec(regs);
526 } 526 }
527 527
528 void die(const char * str, struct pt_regs * regs, long err) 528 void die(const char * str, struct pt_regs * regs, long err)
529 { 529 {
530 unsigned long flags = oops_begin(); 530 unsigned long flags = oops_begin();
531 531
532 if (!user_mode(regs)) 532 if (!user_mode(regs))
533 report_bug(regs->rip); 533 report_bug(regs->rip);
534 534
535 __die(str, regs, err); 535 __die(str, regs, err);
536 oops_end(flags); 536 oops_end(flags);
537 do_exit(SIGSEGV); 537 do_exit(SIGSEGV);
538 } 538 }
539 539
540 void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic) 540 void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic)
541 { 541 {
542 unsigned long flags = oops_begin(); 542 unsigned long flags = oops_begin();
543 543
544 /* 544 /*
545 * We are in trouble anyway, lets at least try 545 * We are in trouble anyway, lets at least try
546 * to get a message out. 546 * to get a message out.
547 */ 547 */
548 printk(str, smp_processor_id()); 548 printk(str, smp_processor_id());
549 show_registers(regs); 549 show_registers(regs);
550 if (kexec_should_crash(current)) 550 if (kexec_should_crash(current))
551 crash_kexec(regs); 551 crash_kexec(regs);
552 if (do_panic || panic_on_oops) 552 if (do_panic || panic_on_oops)
553 panic("Non maskable interrupt"); 553 panic("Non maskable interrupt");
554 oops_end(flags); 554 oops_end(flags);
555 nmi_exit(); 555 nmi_exit();
556 local_irq_enable(); 556 local_irq_enable();
557 do_exit(SIGSEGV); 557 do_exit(SIGSEGV);
558 } 558 }
559 559
560 static void __kprobes do_trap(int trapnr, int signr, char *str, 560 static void __kprobes do_trap(int trapnr, int signr, char *str,
561 struct pt_regs * regs, long error_code, 561 struct pt_regs * regs, long error_code,
562 siginfo_t *info) 562 siginfo_t *info)
563 { 563 {
564 struct task_struct *tsk = current; 564 struct task_struct *tsk = current;
565 565
566 if (user_mode(regs)) { 566 if (user_mode(regs)) {
567 /* 567 /*
568 * We want error_code and trap_no set for userspace 568 * We want error_code and trap_no set for userspace
569 * faults and kernelspace faults which result in 569 * faults and kernelspace faults which result in
570 * die(), but not kernelspace faults which are fixed 570 * die(), but not kernelspace faults which are fixed
571 * up. die() gives the process no chance to handle 571 * up. die() gives the process no chance to handle
572 * the signal and notice the kernel fault information, 572 * the signal and notice the kernel fault information,
573 * so that won't result in polluting the information 573 * so that won't result in polluting the information
574 * about previously queued, but not yet delivered, 574 * about previously queued, but not yet delivered,
575 * faults. See also do_general_protection below. 575 * faults. See also do_general_protection below.
576 */ 576 */
577 tsk->thread.error_code = error_code; 577 tsk->thread.error_code = error_code;
578 tsk->thread.trap_no = trapnr; 578 tsk->thread.trap_no = trapnr;
579 579
580 if (exception_trace && unhandled_signal(tsk, signr)) 580 if (exception_trace && unhandled_signal(tsk, signr))
581 printk(KERN_INFO 581 printk(KERN_INFO
582 "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n", 582 "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n",
583 tsk->comm, tsk->pid, str, 583 tsk->comm, tsk->pid, str,
584 regs->rip, regs->rsp, error_code); 584 regs->rip, regs->rsp, error_code);
585 585
586 if (info) 586 if (info)
587 force_sig_info(signr, info, tsk); 587 force_sig_info(signr, info, tsk);
588 else 588 else
589 force_sig(signr, tsk); 589 force_sig(signr, tsk);
590 return; 590 return;
591 } 591 }
592 592
593 593
594 /* kernel trap */ 594 /* kernel trap */
595 { 595 {
596 const struct exception_table_entry *fixup; 596 const struct exception_table_entry *fixup;
597 fixup = search_exception_tables(regs->rip); 597 fixup = search_exception_tables(regs->rip);
598 if (fixup) 598 if (fixup)
599 regs->rip = fixup->fixup; 599 regs->rip = fixup->fixup;
600 else { 600 else {
601 tsk->thread.error_code = error_code; 601 tsk->thread.error_code = error_code;
602 tsk->thread.trap_no = trapnr; 602 tsk->thread.trap_no = trapnr;
603 die(str, regs, error_code); 603 die(str, regs, error_code);
604 } 604 }
605 return; 605 return;
606 } 606 }
607 } 607 }
608 608
609 #define DO_ERROR(trapnr, signr, str, name) \ 609 #define DO_ERROR(trapnr, signr, str, name) \
610 asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ 610 asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
611 { \ 611 { \
612 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 612 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
613 == NOTIFY_STOP) \ 613 == NOTIFY_STOP) \
614 return; \ 614 return; \
615 conditional_sti(regs); \ 615 conditional_sti(regs); \
616 do_trap(trapnr, signr, str, regs, error_code, NULL); \ 616 do_trap(trapnr, signr, str, regs, error_code, NULL); \
617 } 617 }
618 618
619 #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ 619 #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
620 asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ 620 asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
621 { \ 621 { \
622 siginfo_t info; \ 622 siginfo_t info; \
623 info.si_signo = signr; \ 623 info.si_signo = signr; \
624 info.si_errno = 0; \ 624 info.si_errno = 0; \
625 info.si_code = sicode; \ 625 info.si_code = sicode; \
626 info.si_addr = (void __user *)siaddr; \ 626 info.si_addr = (void __user *)siaddr; \
627 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 627 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
628 == NOTIFY_STOP) \ 628 == NOTIFY_STOP) \
629 return; \ 629 return; \
630 conditional_sti(regs); \ 630 conditional_sti(regs); \
631 do_trap(trapnr, signr, str, regs, error_code, &info); \ 631 do_trap(trapnr, signr, str, regs, error_code, &info); \
632 } 632 }
633 633
634 DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->rip) 634 DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->rip)
635 DO_ERROR( 4, SIGSEGV, "overflow", overflow) 635 DO_ERROR( 4, SIGSEGV, "overflow", overflow)
636 DO_ERROR( 5, SIGSEGV, "bounds", bounds) 636 DO_ERROR( 5, SIGSEGV, "bounds", bounds)
637 DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->rip) 637 DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->rip)
638 DO_ERROR( 7, SIGSEGV, "device not available", device_not_available) 638 DO_ERROR( 7, SIGSEGV, "device not available", device_not_available)
639 DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) 639 DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
640 DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) 640 DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
641 DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) 641 DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
642 DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) 642 DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
643 DO_ERROR(18, SIGSEGV, "reserved", reserved) 643 DO_ERROR(18, SIGSEGV, "reserved", reserved)
644 644
645 /* Runs on IST stack */ 645 /* Runs on IST stack */
646 asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code) 646 asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code)
647 { 647 {
648 if (notify_die(DIE_TRAP, "stack segment", regs, error_code, 648 if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
649 12, SIGBUS) == NOTIFY_STOP) 649 12, SIGBUS) == NOTIFY_STOP)
650 return; 650 return;
651 preempt_conditional_sti(regs); 651 preempt_conditional_sti(regs);
652 do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL); 652 do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL);
653 preempt_conditional_cli(regs); 653 preempt_conditional_cli(regs);
654 } 654 }
655 655
656 asmlinkage void do_double_fault(struct pt_regs * regs, long error_code) 656 asmlinkage void do_double_fault(struct pt_regs * regs, long error_code)
657 { 657 {
658 static const char str[] = "double fault"; 658 static const char str[] = "double fault";
659 struct task_struct *tsk = current; 659 struct task_struct *tsk = current;
660 660
661 /* Return not checked because double check cannot be ignored */ 661 /* Return not checked because double check cannot be ignored */
662 notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV); 662 notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV);
663 663
664 tsk->thread.error_code = error_code; 664 tsk->thread.error_code = error_code;
665 tsk->thread.trap_no = 8; 665 tsk->thread.trap_no = 8;
666 666
667 /* This is always a kernel trap and never fixable (and thus must 667 /* This is always a kernel trap and never fixable (and thus must
668 never return). */ 668 never return). */
669 for (;;) 669 for (;;)
670 die(str, regs, error_code); 670 die(str, regs, error_code);
671 } 671 }
672 672
673 asmlinkage void __kprobes do_general_protection(struct pt_regs * regs, 673 asmlinkage void __kprobes do_general_protection(struct pt_regs * regs,
674 long error_code) 674 long error_code)
675 { 675 {
676 struct task_struct *tsk = current; 676 struct task_struct *tsk = current;
677 677
678 conditional_sti(regs); 678 conditional_sti(regs);
679 679
680 if (user_mode(regs)) { 680 if (user_mode(regs)) {
681 tsk->thread.error_code = error_code; 681 tsk->thread.error_code = error_code;
682 tsk->thread.trap_no = 13; 682 tsk->thread.trap_no = 13;
683 683
684 if (exception_trace && unhandled_signal(tsk, SIGSEGV)) 684 if (exception_trace && unhandled_signal(tsk, SIGSEGV))
685 printk(KERN_INFO 685 printk(KERN_INFO
686 "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n", 686 "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n",
687 tsk->comm, tsk->pid, 687 tsk->comm, tsk->pid,
688 regs->rip, regs->rsp, error_code); 688 regs->rip, regs->rsp, error_code);
689 689
690 force_sig(SIGSEGV, tsk); 690 force_sig(SIGSEGV, tsk);
691 return; 691 return;
692 } 692 }
693 693
694 /* kernel gp */ 694 /* kernel gp */
695 { 695 {
696 const struct exception_table_entry *fixup; 696 const struct exception_table_entry *fixup;
697 fixup = search_exception_tables(regs->rip); 697 fixup = search_exception_tables(regs->rip);
698 if (fixup) { 698 if (fixup) {
699 regs->rip = fixup->fixup; 699 regs->rip = fixup->fixup;
700 return; 700 return;
701 } 701 }
702 702
703 tsk->thread.error_code = error_code; 703 tsk->thread.error_code = error_code;
704 tsk->thread.trap_no = 13; 704 tsk->thread.trap_no = 13;
705 if (notify_die(DIE_GPF, "general protection fault", regs, 705 if (notify_die(DIE_GPF, "general protection fault", regs,
706 error_code, 13, SIGSEGV) == NOTIFY_STOP) 706 error_code, 13, SIGSEGV) == NOTIFY_STOP)
707 return; 707 return;
708 die("general protection fault", regs, error_code); 708 die("general protection fault", regs, error_code);
709 } 709 }
710 } 710 }
711 711
712 static __kprobes void 712 static __kprobes void
713 mem_parity_error(unsigned char reason, struct pt_regs * regs) 713 mem_parity_error(unsigned char reason, struct pt_regs * regs)
714 { 714 {
715 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", 715 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
716 reason); 716 reason);
717 printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); 717 printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
718 718
719 if (panic_on_unrecovered_nmi) 719 if (panic_on_unrecovered_nmi)
720 panic("NMI: Not continuing"); 720 panic("NMI: Not continuing");
721 721
722 printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); 722 printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
723 723
724 /* Clear and disable the memory parity error line. */ 724 /* Clear and disable the memory parity error line. */
725 reason = (reason & 0xf) | 4; 725 reason = (reason & 0xf) | 4;
726 outb(reason, 0x61); 726 outb(reason, 0x61);
727 } 727 }
728 728
729 static __kprobes void 729 static __kprobes void
730 io_check_error(unsigned char reason, struct pt_regs * regs) 730 io_check_error(unsigned char reason, struct pt_regs * regs)
731 { 731 {
732 printk("NMI: IOCK error (debug interrupt?)\n"); 732 printk("NMI: IOCK error (debug interrupt?)\n");
733 show_registers(regs); 733 show_registers(regs);
734 734
735 /* Re-enable the IOCK line, wait for a few seconds */ 735 /* Re-enable the IOCK line, wait for a few seconds */
736 reason = (reason & 0xf) | 8; 736 reason = (reason & 0xf) | 8;
737 outb(reason, 0x61); 737 outb(reason, 0x61);
738 mdelay(2000); 738 mdelay(2000);
739 reason &= ~8; 739 reason &= ~8;
740 outb(reason, 0x61); 740 outb(reason, 0x61);
741 } 741 }
742 742
743 static __kprobes void 743 static __kprobes void
744 unknown_nmi_error(unsigned char reason, struct pt_regs * regs) 744 unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
745 { 745 {
746 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", 746 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
747 reason); 747 reason);
748 printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); 748 printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
749 749
750 if (panic_on_unrecovered_nmi) 750 if (panic_on_unrecovered_nmi)
751 panic("NMI: Not continuing"); 751 panic("NMI: Not continuing");
752 752
753 printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); 753 printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
754 } 754 }
755 755
756 /* Runs on IST stack. This code must keep interrupts off all the time. 756 /* Runs on IST stack. This code must keep interrupts off all the time.
757 Nested NMIs are prevented by the CPU. */ 757 Nested NMIs are prevented by the CPU. */
758 asmlinkage __kprobes void default_do_nmi(struct pt_regs *regs) 758 asmlinkage __kprobes void default_do_nmi(struct pt_regs *regs)
759 { 759 {
760 unsigned char reason = 0; 760 unsigned char reason = 0;
761 int cpu; 761 int cpu;
762 762
763 cpu = smp_processor_id(); 763 cpu = smp_processor_id();
764 764
765 /* Only the BSP gets external NMIs from the system. */ 765 /* Only the BSP gets external NMIs from the system. */
766 if (!cpu) 766 if (!cpu)
767 reason = get_nmi_reason(); 767 reason = get_nmi_reason();
768 768
769 if (!(reason & 0xc0)) { 769 if (!(reason & 0xc0)) {
770 if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) 770 if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
771 == NOTIFY_STOP) 771 == NOTIFY_STOP)
772 return; 772 return;
773 /* 773 /*
774 * Ok, so this is none of the documented NMI sources, 774 * Ok, so this is none of the documented NMI sources,
775 * so it must be the NMI watchdog. 775 * so it must be the NMI watchdog.
776 */ 776 */
777 if (nmi_watchdog_tick(regs,reason)) 777 if (nmi_watchdog_tick(regs,reason))
778 return; 778 return;
779 if (notify_die(DIE_NMI_POST, "nmi_post", regs, reason, 2, 0)
780 == NOTIFY_STOP)
781 return;
782 if (!do_nmi_callback(regs,cpu)) 779 if (!do_nmi_callback(regs,cpu))
783 unknown_nmi_error(reason, regs); 780 unknown_nmi_error(reason, regs);
784 781
785 return; 782 return;
786 } 783 }
787 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) 784 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
788 return; 785 return;
789 786
790 /* AK: following checks seem to be broken on modern chipsets. FIXME */ 787 /* AK: following checks seem to be broken on modern chipsets. FIXME */
791 788
792 if (reason & 0x80) 789 if (reason & 0x80)
793 mem_parity_error(reason, regs); 790 mem_parity_error(reason, regs);
794 if (reason & 0x40) 791 if (reason & 0x40)
795 io_check_error(reason, regs); 792 io_check_error(reason, regs);
796 } 793 }
797 794
798 /* runs on IST stack. */ 795 /* runs on IST stack. */
799 asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code) 796 asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code)
800 { 797 {
801 if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) { 798 if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) {
802 return; 799 return;
803 } 800 }
804 preempt_conditional_sti(regs); 801 preempt_conditional_sti(regs);
805 do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); 802 do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
806 preempt_conditional_cli(regs); 803 preempt_conditional_cli(regs);
807 } 804 }
808 805
809 /* Help handler running on IST stack to switch back to user stack 806 /* Help handler running on IST stack to switch back to user stack
810 for scheduling or signal handling. The actual stack switch is done in 807 for scheduling or signal handling. The actual stack switch is done in
811 entry.S */ 808 entry.S */
812 asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) 809 asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
813 { 810 {
814 struct pt_regs *regs = eregs; 811 struct pt_regs *regs = eregs;
815 /* Did already sync */ 812 /* Did already sync */
816 if (eregs == (struct pt_regs *)eregs->rsp) 813 if (eregs == (struct pt_regs *)eregs->rsp)
817 ; 814 ;
818 /* Exception from user space */ 815 /* Exception from user space */
819 else if (user_mode(eregs)) 816 else if (user_mode(eregs))
820 regs = task_pt_regs(current); 817 regs = task_pt_regs(current);
821 /* Exception from kernel and interrupts are enabled. Move to 818 /* Exception from kernel and interrupts are enabled. Move to
822 kernel process stack. */ 819 kernel process stack. */
823 else if (eregs->eflags & X86_EFLAGS_IF) 820 else if (eregs->eflags & X86_EFLAGS_IF)
824 regs = (struct pt_regs *)(eregs->rsp -= sizeof(struct pt_regs)); 821 regs = (struct pt_regs *)(eregs->rsp -= sizeof(struct pt_regs));
825 if (eregs != regs) 822 if (eregs != regs)
826 *regs = *eregs; 823 *regs = *eregs;
827 return regs; 824 return regs;
828 } 825 }
829 826
830 /* runs on IST stack. */ 827 /* runs on IST stack. */
831 asmlinkage void __kprobes do_debug(struct pt_regs * regs, 828 asmlinkage void __kprobes do_debug(struct pt_regs * regs,
832 unsigned long error_code) 829 unsigned long error_code)
833 { 830 {
834 unsigned long condition; 831 unsigned long condition;
835 struct task_struct *tsk = current; 832 struct task_struct *tsk = current;
836 siginfo_t info; 833 siginfo_t info;
837 834
838 get_debugreg(condition, 6); 835 get_debugreg(condition, 6);
839 836
840 if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, 837 if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
841 SIGTRAP) == NOTIFY_STOP) 838 SIGTRAP) == NOTIFY_STOP)
842 return; 839 return;
843 840
844 preempt_conditional_sti(regs); 841 preempt_conditional_sti(regs);
845 842
846 /* Mask out spurious debug traps due to lazy DR7 setting */ 843 /* Mask out spurious debug traps due to lazy DR7 setting */
847 if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { 844 if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
848 if (!tsk->thread.debugreg7) { 845 if (!tsk->thread.debugreg7) {
849 goto clear_dr7; 846 goto clear_dr7;
850 } 847 }
851 } 848 }
852 849
853 tsk->thread.debugreg6 = condition; 850 tsk->thread.debugreg6 = condition;
854 851
855 /* Mask out spurious TF errors due to lazy TF clearing */ 852 /* Mask out spurious TF errors due to lazy TF clearing */
856 if (condition & DR_STEP) { 853 if (condition & DR_STEP) {
857 /* 854 /*
858 * The TF error should be masked out only if the current 855 * The TF error should be masked out only if the current
859 * process is not traced and if the TRAP flag has been set 856 * process is not traced and if the TRAP flag has been set
860 * previously by a tracing process (condition detected by 857 * previously by a tracing process (condition detected by
861 * the PT_DTRACE flag); remember that the i386 TRAP flag 858 * the PT_DTRACE flag); remember that the i386 TRAP flag
862 * can be modified by the process itself in user mode, 859 * can be modified by the process itself in user mode,
863 * allowing programs to debug themselves without the ptrace() 860 * allowing programs to debug themselves without the ptrace()
864 * interface. 861 * interface.
865 */ 862 */
866 if (!user_mode(regs)) 863 if (!user_mode(regs))
867 goto clear_TF_reenable; 864 goto clear_TF_reenable;
868 /* 865 /*
869 * Was the TF flag set by a debugger? If so, clear it now, 866 * Was the TF flag set by a debugger? If so, clear it now,
870 * so that register information is correct. 867 * so that register information is correct.
871 */ 868 */
872 if (tsk->ptrace & PT_DTRACE) { 869 if (tsk->ptrace & PT_DTRACE) {
873 regs->eflags &= ~TF_MASK; 870 regs->eflags &= ~TF_MASK;
874 tsk->ptrace &= ~PT_DTRACE; 871 tsk->ptrace &= ~PT_DTRACE;
875 } 872 }
876 } 873 }
877 874
878 /* Ok, finally something we can handle */ 875 /* Ok, finally something we can handle */
879 tsk->thread.trap_no = 1; 876 tsk->thread.trap_no = 1;
880 tsk->thread.error_code = error_code; 877 tsk->thread.error_code = error_code;
881 info.si_signo = SIGTRAP; 878 info.si_signo = SIGTRAP;
882 info.si_errno = 0; 879 info.si_errno = 0;
883 info.si_code = TRAP_BRKPT; 880 info.si_code = TRAP_BRKPT;
884 info.si_addr = user_mode(regs) ? (void __user *)regs->rip : NULL; 881 info.si_addr = user_mode(regs) ? (void __user *)regs->rip : NULL;
885 force_sig_info(SIGTRAP, &info, tsk); 882 force_sig_info(SIGTRAP, &info, tsk);
886 883
887 clear_dr7: 884 clear_dr7:
888 set_debugreg(0UL, 7); 885 set_debugreg(0UL, 7);
889 preempt_conditional_cli(regs); 886 preempt_conditional_cli(regs);
890 return; 887 return;
891 888
892 clear_TF_reenable: 889 clear_TF_reenable:
893 set_tsk_thread_flag(tsk, TIF_SINGLESTEP); 890 set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
894 regs->eflags &= ~TF_MASK; 891 regs->eflags &= ~TF_MASK;
895 preempt_conditional_cli(regs); 892 preempt_conditional_cli(regs);
896 } 893 }
897 894
898 static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) 895 static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr)
899 { 896 {
900 const struct exception_table_entry *fixup; 897 const struct exception_table_entry *fixup;
901 fixup = search_exception_tables(regs->rip); 898 fixup = search_exception_tables(regs->rip);
902 if (fixup) { 899 if (fixup) {
903 regs->rip = fixup->fixup; 900 regs->rip = fixup->fixup;
904 return 1; 901 return 1;
905 } 902 }
906 notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE); 903 notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE);
907 /* Illegal floating point operation in the kernel */ 904 /* Illegal floating point operation in the kernel */
908 current->thread.trap_no = trapnr; 905 current->thread.trap_no = trapnr;
909 die(str, regs, 0); 906 die(str, regs, 0);
910 return 0; 907 return 0;
911 } 908 }
912 909
913 /* 910 /*
914 * Note that we play around with the 'TS' bit in an attempt to get 911 * Note that we play around with the 'TS' bit in an attempt to get
915 * the correct behaviour even in the presence of the asynchronous 912 * the correct behaviour even in the presence of the asynchronous
916 * IRQ13 behaviour 913 * IRQ13 behaviour
917 */ 914 */
918 asmlinkage void do_coprocessor_error(struct pt_regs *regs) 915 asmlinkage void do_coprocessor_error(struct pt_regs *regs)
919 { 916 {
920 void __user *rip = (void __user *)(regs->rip); 917 void __user *rip = (void __user *)(regs->rip);
921 struct task_struct * task; 918 struct task_struct * task;
922 siginfo_t info; 919 siginfo_t info;
923 unsigned short cwd, swd; 920 unsigned short cwd, swd;
924 921
925 conditional_sti(regs); 922 conditional_sti(regs);
926 if (!user_mode(regs) && 923 if (!user_mode(regs) &&
927 kernel_math_error(regs, "kernel x87 math error", 16)) 924 kernel_math_error(regs, "kernel x87 math error", 16))
928 return; 925 return;
929 926
930 /* 927 /*
931 * Save the info for the exception handler and clear the error. 928 * Save the info for the exception handler and clear the error.
932 */ 929 */
933 task = current; 930 task = current;
934 save_init_fpu(task); 931 save_init_fpu(task);
935 task->thread.trap_no = 16; 932 task->thread.trap_no = 16;
936 task->thread.error_code = 0; 933 task->thread.error_code = 0;
937 info.si_signo = SIGFPE; 934 info.si_signo = SIGFPE;
938 info.si_errno = 0; 935 info.si_errno = 0;
939 info.si_code = __SI_FAULT; 936 info.si_code = __SI_FAULT;
940 info.si_addr = rip; 937 info.si_addr = rip;
941 /* 938 /*
942 * (~cwd & swd) will mask out exceptions that are not set to unmasked 939 * (~cwd & swd) will mask out exceptions that are not set to unmasked
943 * status. 0x3f is the exception bits in these regs, 0x200 is the 940 * status. 0x3f is the exception bits in these regs, 0x200 is the
944 * C1 reg you need in case of a stack fault, 0x040 is the stack 941 * C1 reg you need in case of a stack fault, 0x040 is the stack
945 * fault bit. We should only be taking one exception at a time, 942 * fault bit. We should only be taking one exception at a time,
946 * so if this combination doesn't produce any single exception, 943 * so if this combination doesn't produce any single exception,
947 * then we have a bad program that isn't synchronizing its FPU usage 944 * then we have a bad program that isn't synchronizing its FPU usage
948 * and it will suffer the consequences since we won't be able to 945 * and it will suffer the consequences since we won't be able to
949 * fully reproduce the context of the exception 946 * fully reproduce the context of the exception
950 */ 947 */
951 cwd = get_fpu_cwd(task); 948 cwd = get_fpu_cwd(task);
952 swd = get_fpu_swd(task); 949 swd = get_fpu_swd(task);
953 switch (swd & ~cwd & 0x3f) { 950 switch (swd & ~cwd & 0x3f) {
954 case 0x000: 951 case 0x000:
955 default: 952 default:
956 break; 953 break;
957 case 0x001: /* Invalid Op */ 954 case 0x001: /* Invalid Op */
958 /* 955 /*
959 * swd & 0x240 == 0x040: Stack Underflow 956 * swd & 0x240 == 0x040: Stack Underflow
960 * swd & 0x240 == 0x240: Stack Overflow 957 * swd & 0x240 == 0x240: Stack Overflow
961 * User must clear the SF bit (0x40) if set 958 * User must clear the SF bit (0x40) if set
962 */ 959 */
963 info.si_code = FPE_FLTINV; 960 info.si_code = FPE_FLTINV;
964 break; 961 break;
965 case 0x002: /* Denormalize */ 962 case 0x002: /* Denormalize */
966 case 0x010: /* Underflow */ 963 case 0x010: /* Underflow */
967 info.si_code = FPE_FLTUND; 964 info.si_code = FPE_FLTUND;
968 break; 965 break;
969 case 0x004: /* Zero Divide */ 966 case 0x004: /* Zero Divide */
970 info.si_code = FPE_FLTDIV; 967 info.si_code = FPE_FLTDIV;
971 break; 968 break;
972 case 0x008: /* Overflow */ 969 case 0x008: /* Overflow */
973 info.si_code = FPE_FLTOVF; 970 info.si_code = FPE_FLTOVF;
974 break; 971 break;
975 case 0x020: /* Precision */ 972 case 0x020: /* Precision */
976 info.si_code = FPE_FLTRES; 973 info.si_code = FPE_FLTRES;
977 break; 974 break;
978 } 975 }
979 force_sig_info(SIGFPE, &info, task); 976 force_sig_info(SIGFPE, &info, task);
980 } 977 }
981 978
982 asmlinkage void bad_intr(void) 979 asmlinkage void bad_intr(void)
983 { 980 {
984 printk("bad interrupt"); 981 printk("bad interrupt");
985 } 982 }
986 983
987 asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) 984 asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
988 { 985 {
989 void __user *rip = (void __user *)(regs->rip); 986 void __user *rip = (void __user *)(regs->rip);
990 struct task_struct * task; 987 struct task_struct * task;
991 siginfo_t info; 988 siginfo_t info;
992 unsigned short mxcsr; 989 unsigned short mxcsr;
993 990
994 conditional_sti(regs); 991 conditional_sti(regs);
995 if (!user_mode(regs) && 992 if (!user_mode(regs) &&
996 kernel_math_error(regs, "kernel simd math error", 19)) 993 kernel_math_error(regs, "kernel simd math error", 19))
997 return; 994 return;
998 995
999 /* 996 /*
1000 * Save the info for the exception handler and clear the error. 997 * Save the info for the exception handler and clear the error.
1001 */ 998 */
1002 task = current; 999 task = current;
1003 save_init_fpu(task); 1000 save_init_fpu(task);
1004 task->thread.trap_no = 19; 1001 task->thread.trap_no = 19;
1005 task->thread.error_code = 0; 1002 task->thread.error_code = 0;
1006 info.si_signo = SIGFPE; 1003 info.si_signo = SIGFPE;
1007 info.si_errno = 0; 1004 info.si_errno = 0;
1008 info.si_code = __SI_FAULT; 1005 info.si_code = __SI_FAULT;
1009 info.si_addr = rip; 1006 info.si_addr = rip;
1010 /* 1007 /*
1011 * The SIMD FPU exceptions are handled a little differently, as there 1008 * The SIMD FPU exceptions are handled a little differently, as there
1012 * is only a single status/control register. Thus, to determine which 1009 * is only a single status/control register. Thus, to determine which
1013 * unmasked exception was caught we must mask the exception mask bits 1010 * unmasked exception was caught we must mask the exception mask bits
1014 * at 0x1f80, and then use these to mask the exception bits at 0x3f. 1011 * at 0x1f80, and then use these to mask the exception bits at 0x3f.
1015 */ 1012 */
1016 mxcsr = get_fpu_mxcsr(task); 1013 mxcsr = get_fpu_mxcsr(task);
1017 switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { 1014 switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
1018 case 0x000: 1015 case 0x000:
1019 default: 1016 default:
1020 break; 1017 break;
1021 case 0x001: /* Invalid Op */ 1018 case 0x001: /* Invalid Op */
1022 info.si_code = FPE_FLTINV; 1019 info.si_code = FPE_FLTINV;
1023 break; 1020 break;
1024 case 0x002: /* Denormalize */ 1021 case 0x002: /* Denormalize */
1025 case 0x010: /* Underflow */ 1022 case 0x010: /* Underflow */
1026 info.si_code = FPE_FLTUND; 1023 info.si_code = FPE_FLTUND;
1027 break; 1024 break;
1028 case 0x004: /* Zero Divide */ 1025 case 0x004: /* Zero Divide */
1029 info.si_code = FPE_FLTDIV; 1026 info.si_code = FPE_FLTDIV;
1030 break; 1027 break;
1031 case 0x008: /* Overflow */ 1028 case 0x008: /* Overflow */
1032 info.si_code = FPE_FLTOVF; 1029 info.si_code = FPE_FLTOVF;
1033 break; 1030 break;
1034 case 0x020: /* Precision */ 1031 case 0x020: /* Precision */
1035 info.si_code = FPE_FLTRES; 1032 info.si_code = FPE_FLTRES;
1036 break; 1033 break;
1037 } 1034 }
1038 force_sig_info(SIGFPE, &info, task); 1035 force_sig_info(SIGFPE, &info, task);
1039 } 1036 }
1040 1037
1041 asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs) 1038 asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs)
1042 { 1039 {
1043 } 1040 }
1044 1041
1045 asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) 1042 asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
1046 { 1043 {
1047 } 1044 }
1048 1045
1049 asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) 1046 asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
1050 { 1047 {
1051 } 1048 }
1052 1049
1053 /* 1050 /*
1054 * 'math_state_restore()' saves the current math information in the 1051 * 'math_state_restore()' saves the current math information in the
1055 * old math state array, and gets the new ones from the current task 1052 * old math state array, and gets the new ones from the current task
1056 * 1053 *
1057 * Careful.. There are problems with IBM-designed IRQ13 behaviour. 1054 * Careful.. There are problems with IBM-designed IRQ13 behaviour.
1058 * Don't touch unless you *really* know how it works. 1055 * Don't touch unless you *really* know how it works.
1059 */ 1056 */
1060 asmlinkage void math_state_restore(void) 1057 asmlinkage void math_state_restore(void)
1061 { 1058 {
1062 struct task_struct *me = current; 1059 struct task_struct *me = current;
1063 clts(); /* Allow maths ops (or we recurse) */ 1060 clts(); /* Allow maths ops (or we recurse) */
1064 1061
1065 if (!used_math()) 1062 if (!used_math())
1066 init_fpu(me); 1063 init_fpu(me);
1067 restore_fpu_checking(&me->thread.i387.fxsave); 1064 restore_fpu_checking(&me->thread.i387.fxsave);
1068 task_thread_info(me)->status |= TS_USEDFPU; 1065 task_thread_info(me)->status |= TS_USEDFPU;
1069 me->fpu_counter++; 1066 me->fpu_counter++;
1070 } 1067 }
1071 1068
1072 void __init trap_init(void) 1069 void __init trap_init(void)
1073 { 1070 {
1074 set_intr_gate(0,&divide_error); 1071 set_intr_gate(0,&divide_error);
1075 set_intr_gate_ist(1,&debug,DEBUG_STACK); 1072 set_intr_gate_ist(1,&debug,DEBUG_STACK);
1076 set_intr_gate_ist(2,&nmi,NMI_STACK); 1073 set_intr_gate_ist(2,&nmi,NMI_STACK);
1077 set_system_gate_ist(3,&int3,DEBUG_STACK); /* int3 can be called from all */ 1074 set_system_gate_ist(3,&int3,DEBUG_STACK); /* int3 can be called from all */
1078 set_system_gate(4,&overflow); /* int4 can be called from all */ 1075 set_system_gate(4,&overflow); /* int4 can be called from all */
1079 set_intr_gate(5,&bounds); 1076 set_intr_gate(5,&bounds);
1080 set_intr_gate(6,&invalid_op); 1077 set_intr_gate(6,&invalid_op);
1081 set_intr_gate(7,&device_not_available); 1078 set_intr_gate(7,&device_not_available);
1082 set_intr_gate_ist(8,&double_fault, DOUBLEFAULT_STACK); 1079 set_intr_gate_ist(8,&double_fault, DOUBLEFAULT_STACK);
1083 set_intr_gate(9,&coprocessor_segment_overrun); 1080 set_intr_gate(9,&coprocessor_segment_overrun);
1084 set_intr_gate(10,&invalid_TSS); 1081 set_intr_gate(10,&invalid_TSS);
1085 set_intr_gate(11,&segment_not_present); 1082 set_intr_gate(11,&segment_not_present);
1086 set_intr_gate_ist(12,&stack_segment,STACKFAULT_STACK); 1083 set_intr_gate_ist(12,&stack_segment,STACKFAULT_STACK);
1087 set_intr_gate(13,&general_protection); 1084 set_intr_gate(13,&general_protection);
1088 set_intr_gate(14,&page_fault); 1085 set_intr_gate(14,&page_fault);
1089 set_intr_gate(15,&spurious_interrupt_bug); 1086 set_intr_gate(15,&spurious_interrupt_bug);
1090 set_intr_gate(16,&coprocessor_error); 1087 set_intr_gate(16,&coprocessor_error);
1091 set_intr_gate(17,&alignment_check); 1088 set_intr_gate(17,&alignment_check);
1092 #ifdef CONFIG_X86_MCE 1089 #ifdef CONFIG_X86_MCE
1093 set_intr_gate_ist(18,&machine_check, MCE_STACK); 1090 set_intr_gate_ist(18,&machine_check, MCE_STACK);
1094 #endif 1091 #endif
1095 set_intr_gate(19,&simd_coprocessor_error); 1092 set_intr_gate(19,&simd_coprocessor_error);
1096 1093
1097 #ifdef CONFIG_IA32_EMULATION 1094 #ifdef CONFIG_IA32_EMULATION
1098 set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall); 1095 set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
1099 #endif 1096 #endif
1100 1097
1101 /* 1098 /*
1102 * Should be a barrier for any external CPU state. 1099 * Should be a barrier for any external CPU state.
1103 */ 1100 */
1104 cpu_init(); 1101 cpu_init();
1105 } 1102 }
1106 1103
1107 1104
1108 static int __init oops_setup(char *s) 1105 static int __init oops_setup(char *s)
1109 { 1106 {
1110 if (!s) 1107 if (!s)
1111 return -EINVAL; 1108 return -EINVAL;
1112 if (!strcmp(s, "panic")) 1109 if (!strcmp(s, "panic"))
1113 panic_on_oops = 1; 1110 panic_on_oops = 1;
1114 return 0; 1111 return 0;
1115 } 1112 }
1116 early_param("oops", oops_setup); 1113 early_param("oops", oops_setup);
1117 1114
1118 static int __init kstack_setup(char *s) 1115 static int __init kstack_setup(char *s)
1119 { 1116 {
1120 if (!s) 1117 if (!s)
1121 return -EINVAL; 1118 return -EINVAL;
1122 kstack_depth_to_print = simple_strtoul(s,NULL,0); 1119 kstack_depth_to_print = simple_strtoul(s,NULL,0);
1123 return 0; 1120 return 0;
1124 } 1121 }
1125 early_param("kstack", kstack_setup); 1122 early_param("kstack", kstack_setup);
1126 1123
drivers/char/ipmi/ipmi_watchdog.c
1 /* 1 /*
2 * ipmi_watchdog.c 2 * ipmi_watchdog.c
3 * 3 *
4 * A watchdog timer based upon the IPMI interface. 4 * A watchdog timer based upon the IPMI interface.
5 * 5 *
6 * Author: MontaVista Software, Inc. 6 * Author: MontaVista Software, Inc.
7 * Corey Minyard <minyard@mvista.com> 7 * Corey Minyard <minyard@mvista.com>
8 * source@mvista.com 8 * source@mvista.com
9 * 9 *
10 * Copyright 2002 MontaVista Software Inc. 10 * Copyright 2002 MontaVista Software Inc.
11 * 11 *
12 * This program is free software; you can redistribute it and/or modify it 12 * This program is free software; you can redistribute it and/or modify it
13 * under the terms of the GNU General Public License as published by the 13 * under the terms of the GNU General Public License as published by the
14 * Free Software Foundation; either version 2 of the License, or (at your 14 * Free Software Foundation; either version 2 of the License, or (at your
15 * option) any later version. 15 * option) any later version.
16 * 16 *
17 * 17 *
18 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED 18 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
19 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 19 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
20 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
24 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR 25 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
26 * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 26 * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
27 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 * 28 *
29 * You should have received a copy of the GNU General Public License along 29 * You should have received a copy of the GNU General Public License along
30 * with this program; if not, write to the Free Software Foundation, Inc., 30 * with this program; if not, write to the Free Software Foundation, Inc.,
31 * 675 Mass Ave, Cambridge, MA 02139, USA. 31 * 675 Mass Ave, Cambridge, MA 02139, USA.
32 */ 32 */
33 33
34 #include <linux/module.h> 34 #include <linux/module.h>
35 #include <linux/moduleparam.h> 35 #include <linux/moduleparam.h>
36 #include <linux/ipmi.h> 36 #include <linux/ipmi.h>
37 #include <linux/ipmi_smi.h> 37 #include <linux/ipmi_smi.h>
38 #include <linux/watchdog.h> 38 #include <linux/watchdog.h>
39 #include <linux/miscdevice.h> 39 #include <linux/miscdevice.h>
40 #include <linux/init.h> 40 #include <linux/init.h>
41 #include <linux/completion.h> 41 #include <linux/completion.h>
42 #include <linux/kdebug.h> 42 #include <linux/kdebug.h>
43 #include <linux/rwsem.h> 43 #include <linux/rwsem.h>
44 #include <linux/errno.h> 44 #include <linux/errno.h>
45 #include <asm/uaccess.h> 45 #include <asm/uaccess.h>
46 #include <linux/notifier.h> 46 #include <linux/notifier.h>
47 #include <linux/nmi.h> 47 #include <linux/nmi.h>
48 #include <linux/reboot.h> 48 #include <linux/reboot.h>
49 #include <linux/wait.h> 49 #include <linux/wait.h>
50 #include <linux/poll.h> 50 #include <linux/poll.h>
51 #include <linux/string.h> 51 #include <linux/string.h>
52 #include <linux/ctype.h> 52 #include <linux/ctype.h>
53 #include <linux/delay.h>
54 #include <asm/atomic.h> 53 #include <asm/atomic.h>
55 54
56 #ifdef CONFIG_X86 55 #ifdef CONFIG_X86_LOCAL_APIC
57 /* This is ugly, but I've determined that x86 is the only architecture 56 #include <asm/apic.h>
58 that can reasonably support the IPMI NMI watchdog timeout at this
59 time. If another architecture adds this capability somehow, it
60 will have to be a somewhat different mechanism and I have no idea
61 how it will work. So in the unlikely event that another
62 architecture supports this, we can figure out a good generic
63 mechanism for it at that time. */
64 #define HAVE_DIE_NMI_POST
65 #endif 57 #endif
66 58
67 #define PFX "IPMI Watchdog: " 59 #define PFX "IPMI Watchdog: "
68 60
69 /* 61 /*
70 * The IPMI command/response information for the watchdog timer. 62 * The IPMI command/response information for the watchdog timer.
71 */ 63 */
72 64
73 /* values for byte 1 of the set command, byte 2 of the get response. */ 65 /* values for byte 1 of the set command, byte 2 of the get response. */
74 #define WDOG_DONT_LOG (1 << 7) 66 #define WDOG_DONT_LOG (1 << 7)
75 #define WDOG_DONT_STOP_ON_SET (1 << 6) 67 #define WDOG_DONT_STOP_ON_SET (1 << 6)
76 #define WDOG_SET_TIMER_USE(byte, use) \ 68 #define WDOG_SET_TIMER_USE(byte, use) \
77 byte = ((byte) & 0xf8) | ((use) & 0x7) 69 byte = ((byte) & 0xf8) | ((use) & 0x7)
78 #define WDOG_GET_TIMER_USE(byte) ((byte) & 0x7) 70 #define WDOG_GET_TIMER_USE(byte) ((byte) & 0x7)
79 #define WDOG_TIMER_USE_BIOS_FRB2 1 71 #define WDOG_TIMER_USE_BIOS_FRB2 1
80 #define WDOG_TIMER_USE_BIOS_POST 2 72 #define WDOG_TIMER_USE_BIOS_POST 2
81 #define WDOG_TIMER_USE_OS_LOAD 3 73 #define WDOG_TIMER_USE_OS_LOAD 3
82 #define WDOG_TIMER_USE_SMS_OS 4 74 #define WDOG_TIMER_USE_SMS_OS 4
83 #define WDOG_TIMER_USE_OEM 5 75 #define WDOG_TIMER_USE_OEM 5
84 76
85 /* values for byte 2 of the set command, byte 3 of the get response. */ 77 /* values for byte 2 of the set command, byte 3 of the get response. */
86 #define WDOG_SET_PRETIMEOUT_ACT(byte, use) \ 78 #define WDOG_SET_PRETIMEOUT_ACT(byte, use) \
87 byte = ((byte) & 0x8f) | (((use) & 0x7) << 4) 79 byte = ((byte) & 0x8f) | (((use) & 0x7) << 4)
88 #define WDOG_GET_PRETIMEOUT_ACT(byte) (((byte) >> 4) & 0x7) 80 #define WDOG_GET_PRETIMEOUT_ACT(byte) (((byte) >> 4) & 0x7)
89 #define WDOG_PRETIMEOUT_NONE 0 81 #define WDOG_PRETIMEOUT_NONE 0
90 #define WDOG_PRETIMEOUT_SMI 1 82 #define WDOG_PRETIMEOUT_SMI 1
91 #define WDOG_PRETIMEOUT_NMI 2 83 #define WDOG_PRETIMEOUT_NMI 2
92 #define WDOG_PRETIMEOUT_MSG_INT 3 84 #define WDOG_PRETIMEOUT_MSG_INT 3
93 85
94 /* Operations that can be performed on a pretimout. */ 86 /* Operations that can be performed on a pretimout. */
95 #define WDOG_PREOP_NONE 0 87 #define WDOG_PREOP_NONE 0
96 #define WDOG_PREOP_PANIC 1 88 #define WDOG_PREOP_PANIC 1
97 #define WDOG_PREOP_GIVE_DATA 2 /* Cause data to be available to 89 #define WDOG_PREOP_GIVE_DATA 2 /* Cause data to be available to
98 read. Doesn't work in NMI 90 read. Doesn't work in NMI
99 mode. */ 91 mode. */
100 92
101 /* Actions to perform on a full timeout. */ 93 /* Actions to perform on a full timeout. */
102 #define WDOG_SET_TIMEOUT_ACT(byte, use) \ 94 #define WDOG_SET_TIMEOUT_ACT(byte, use) \
103 byte = ((byte) & 0xf8) | ((use) & 0x7) 95 byte = ((byte) & 0xf8) | ((use) & 0x7)
104 #define WDOG_GET_TIMEOUT_ACT(byte) ((byte) & 0x7) 96 #define WDOG_GET_TIMEOUT_ACT(byte) ((byte) & 0x7)
105 #define WDOG_TIMEOUT_NONE 0 97 #define WDOG_TIMEOUT_NONE 0
106 #define WDOG_TIMEOUT_RESET 1 98 #define WDOG_TIMEOUT_RESET 1
107 #define WDOG_TIMEOUT_POWER_DOWN 2 99 #define WDOG_TIMEOUT_POWER_DOWN 2
108 #define WDOG_TIMEOUT_POWER_CYCLE 3 100 #define WDOG_TIMEOUT_POWER_CYCLE 3
109 101
110 /* Byte 3 of the get command, byte 4 of the get response is the 102 /* Byte 3 of the get command, byte 4 of the get response is the
111 pre-timeout in seconds. */ 103 pre-timeout in seconds. */
112 104
113 /* Bits for setting byte 4 of the set command, byte 5 of the get response. */ 105 /* Bits for setting byte 4 of the set command, byte 5 of the get response. */
114 #define WDOG_EXPIRE_CLEAR_BIOS_FRB2 (1 << 1) 106 #define WDOG_EXPIRE_CLEAR_BIOS_FRB2 (1 << 1)
115 #define WDOG_EXPIRE_CLEAR_BIOS_POST (1 << 2) 107 #define WDOG_EXPIRE_CLEAR_BIOS_POST (1 << 2)
116 #define WDOG_EXPIRE_CLEAR_OS_LOAD (1 << 3) 108 #define WDOG_EXPIRE_CLEAR_OS_LOAD (1 << 3)
117 #define WDOG_EXPIRE_CLEAR_SMS_OS (1 << 4) 109 #define WDOG_EXPIRE_CLEAR_SMS_OS (1 << 4)
118 #define WDOG_EXPIRE_CLEAR_OEM (1 << 5) 110 #define WDOG_EXPIRE_CLEAR_OEM (1 << 5)
119 111
120 /* Setting/getting the watchdog timer value. This is for bytes 5 and 112 /* Setting/getting the watchdog timer value. This is for bytes 5 and
121 6 (the timeout time) of the set command, and bytes 6 and 7 (the 113 6 (the timeout time) of the set command, and bytes 6 and 7 (the
122 timeout time) and 8 and 9 (the current countdown value) of the 114 timeout time) and 8 and 9 (the current countdown value) of the
123 response. The timeout value is given in seconds (in the command it 115 response. The timeout value is given in seconds (in the command it
124 is 100ms intervals). */ 116 is 100ms intervals). */
125 #define WDOG_SET_TIMEOUT(byte1, byte2, val) \ 117 #define WDOG_SET_TIMEOUT(byte1, byte2, val) \
126 (byte1) = (((val) * 10) & 0xff), (byte2) = (((val) * 10) >> 8) 118 (byte1) = (((val) * 10) & 0xff), (byte2) = (((val) * 10) >> 8)
127 #define WDOG_GET_TIMEOUT(byte1, byte2) \ 119 #define WDOG_GET_TIMEOUT(byte1, byte2) \
128 (((byte1) | ((byte2) << 8)) / 10) 120 (((byte1) | ((byte2) << 8)) / 10)
129 121
130 #define IPMI_WDOG_RESET_TIMER 0x22 122 #define IPMI_WDOG_RESET_TIMER 0x22
131 #define IPMI_WDOG_SET_TIMER 0x24 123 #define IPMI_WDOG_SET_TIMER 0x24
132 #define IPMI_WDOG_GET_TIMER 0x25 124 #define IPMI_WDOG_GET_TIMER 0x25
133 125
134 /* These are here until the real ones get into the watchdog.h interface. */ 126 /* These are here until the real ones get into the watchdog.h interface. */
135 #ifndef WDIOC_GETTIMEOUT 127 #ifndef WDIOC_GETTIMEOUT
136 #define WDIOC_GETTIMEOUT _IOW(WATCHDOG_IOCTL_BASE, 20, int) 128 #define WDIOC_GETTIMEOUT _IOW(WATCHDOG_IOCTL_BASE, 20, int)
137 #endif 129 #endif
138 #ifndef WDIOC_SET_PRETIMEOUT 130 #ifndef WDIOC_SET_PRETIMEOUT
139 #define WDIOC_SET_PRETIMEOUT _IOW(WATCHDOG_IOCTL_BASE, 21, int) 131 #define WDIOC_SET_PRETIMEOUT _IOW(WATCHDOG_IOCTL_BASE, 21, int)
140 #endif 132 #endif
141 #ifndef WDIOC_GET_PRETIMEOUT 133 #ifndef WDIOC_GET_PRETIMEOUT
142 #define WDIOC_GET_PRETIMEOUT _IOW(WATCHDOG_IOCTL_BASE, 22, int) 134 #define WDIOC_GET_PRETIMEOUT _IOW(WATCHDOG_IOCTL_BASE, 22, int)
143 #endif 135 #endif
144 136
145 static int nowayout = WATCHDOG_NOWAYOUT; 137 static int nowayout = WATCHDOG_NOWAYOUT;
146 138
147 static ipmi_user_t watchdog_user; 139 static ipmi_user_t watchdog_user;
148 static int watchdog_ifnum; 140 static int watchdog_ifnum;
149 141
150 /* Default the timeout to 10 seconds. */ 142 /* Default the timeout to 10 seconds. */
151 static int timeout = 10; 143 static int timeout = 10;
152 144
153 /* The pre-timeout is disabled by default. */ 145 /* The pre-timeout is disabled by default. */
154 static int pretimeout; 146 static int pretimeout;
155 147
156 /* Default action is to reset the board on a timeout. */ 148 /* Default action is to reset the board on a timeout. */
157 static unsigned char action_val = WDOG_TIMEOUT_RESET; 149 static unsigned char action_val = WDOG_TIMEOUT_RESET;
158 150
159 static char action[16] = "reset"; 151 static char action[16] = "reset";
160 152
161 static unsigned char preaction_val = WDOG_PRETIMEOUT_NONE; 153 static unsigned char preaction_val = WDOG_PRETIMEOUT_NONE;
162 154
163 static char preaction[16] = "pre_none"; 155 static char preaction[16] = "pre_none";
164 156
165 static unsigned char preop_val = WDOG_PREOP_NONE; 157 static unsigned char preop_val = WDOG_PREOP_NONE;
166 158
167 static char preop[16] = "preop_none"; 159 static char preop[16] = "preop_none";
168 static DEFINE_SPINLOCK(ipmi_read_lock); 160 static DEFINE_SPINLOCK(ipmi_read_lock);
169 static char data_to_read; 161 static char data_to_read;
170 static DECLARE_WAIT_QUEUE_HEAD(read_q); 162 static DECLARE_WAIT_QUEUE_HEAD(read_q);
171 static struct fasync_struct *fasync_q; 163 static struct fasync_struct *fasync_q;
172 static char pretimeout_since_last_heartbeat; 164 static char pretimeout_since_last_heartbeat;
173 static char expect_close; 165 static char expect_close;
174 166
175 static int ifnum_to_use = -1; 167 static int ifnum_to_use = -1;
176 168
177 static DECLARE_RWSEM(register_sem); 169 static DECLARE_RWSEM(register_sem);
178 170
179 /* Parameters to ipmi_set_timeout */ 171 /* Parameters to ipmi_set_timeout */
180 #define IPMI_SET_TIMEOUT_NO_HB 0 172 #define IPMI_SET_TIMEOUT_NO_HB 0
181 #define IPMI_SET_TIMEOUT_HB_IF_NECESSARY 1 173 #define IPMI_SET_TIMEOUT_HB_IF_NECESSARY 1
182 #define IPMI_SET_TIMEOUT_FORCE_HB 2 174 #define IPMI_SET_TIMEOUT_FORCE_HB 2
183 175
184 static int ipmi_set_timeout(int do_heartbeat); 176 static int ipmi_set_timeout(int do_heartbeat);
185 static void ipmi_register_watchdog(int ipmi_intf); 177 static void ipmi_register_watchdog(int ipmi_intf);
186 static void ipmi_unregister_watchdog(int ipmi_intf); 178 static void ipmi_unregister_watchdog(int ipmi_intf);
187 179
188 /* If true, the driver will start running as soon as it is configured 180 /* If true, the driver will start running as soon as it is configured
189 and ready. */ 181 and ready. */
190 static int start_now; 182 static int start_now;
191 183
192 static int set_param_int(const char *val, struct kernel_param *kp) 184 static int set_param_int(const char *val, struct kernel_param *kp)
193 { 185 {
194 char *endp; 186 char *endp;
195 int l; 187 int l;
196 int rv = 0; 188 int rv = 0;
197 189
198 if (!val) 190 if (!val)
199 return -EINVAL; 191 return -EINVAL;
200 l = simple_strtoul(val, &endp, 0); 192 l = simple_strtoul(val, &endp, 0);
201 if (endp == val) 193 if (endp == val)
202 return -EINVAL; 194 return -EINVAL;
203 195
204 down_read(&register_sem); 196 down_read(&register_sem);
205 *((int *)kp->arg) = l; 197 *((int *)kp->arg) = l;
206 if (watchdog_user) 198 if (watchdog_user)
207 rv = ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY); 199 rv = ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY);
208 up_read(&register_sem); 200 up_read(&register_sem);
209 201
210 return rv; 202 return rv;
211 } 203 }
212 204
213 static int get_param_int(char *buffer, struct kernel_param *kp) 205 static int get_param_int(char *buffer, struct kernel_param *kp)
214 { 206 {
215 return sprintf(buffer, "%i", *((int *)kp->arg)); 207 return sprintf(buffer, "%i", *((int *)kp->arg));
216 } 208 }
217 209
218 typedef int (*action_fn)(const char *intval, char *outval); 210 typedef int (*action_fn)(const char *intval, char *outval);
219 211
220 static int action_op(const char *inval, char *outval); 212 static int action_op(const char *inval, char *outval);
221 static int preaction_op(const char *inval, char *outval); 213 static int preaction_op(const char *inval, char *outval);
222 static int preop_op(const char *inval, char *outval); 214 static int preop_op(const char *inval, char *outval);
223 static void check_parms(void); 215 static void check_parms(void);
224 216
225 static int set_param_str(const char *val, struct kernel_param *kp) 217 static int set_param_str(const char *val, struct kernel_param *kp)
226 { 218 {
227 action_fn fn = (action_fn) kp->arg; 219 action_fn fn = (action_fn) kp->arg;
228 int rv = 0; 220 int rv = 0;
229 char valcp[16]; 221 char valcp[16];
230 char *s; 222 char *s;
231 223
232 strncpy(valcp, val, 16); 224 strncpy(valcp, val, 16);
233 valcp[15] = '\0'; 225 valcp[15] = '\0';
234 226
235 s = strstrip(valcp); 227 s = strstrip(valcp);
236 228
237 down_read(&register_sem); 229 down_read(&register_sem);
238 rv = fn(s, NULL); 230 rv = fn(s, NULL);
239 if (rv) 231 if (rv)
240 goto out_unlock; 232 goto out_unlock;
241 233
242 check_parms(); 234 check_parms();
243 if (watchdog_user) 235 if (watchdog_user)
244 rv = ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY); 236 rv = ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY);
245 237
246 out_unlock: 238 out_unlock:
247 up_read(&register_sem); 239 up_read(&register_sem);
248 return rv; 240 return rv;
249 } 241 }
250 242
251 static int get_param_str(char *buffer, struct kernel_param *kp) 243 static int get_param_str(char *buffer, struct kernel_param *kp)
252 { 244 {
253 action_fn fn = (action_fn) kp->arg; 245 action_fn fn = (action_fn) kp->arg;
254 int rv; 246 int rv;
255 247
256 rv = fn(NULL, buffer); 248 rv = fn(NULL, buffer);
257 if (rv) 249 if (rv)
258 return rv; 250 return rv;
259 return strlen(buffer); 251 return strlen(buffer);
260 } 252 }
261 253
262 254
263 static int set_param_wdog_ifnum(const char *val, struct kernel_param *kp) 255 static int set_param_wdog_ifnum(const char *val, struct kernel_param *kp)
264 { 256 {
265 int rv = param_set_int(val, kp); 257 int rv = param_set_int(val, kp);
266 if (rv) 258 if (rv)
267 return rv; 259 return rv;
268 if ((ifnum_to_use < 0) || (ifnum_to_use == watchdog_ifnum)) 260 if ((ifnum_to_use < 0) || (ifnum_to_use == watchdog_ifnum))
269 return 0; 261 return 0;
270 262
271 ipmi_unregister_watchdog(watchdog_ifnum); 263 ipmi_unregister_watchdog(watchdog_ifnum);
272 ipmi_register_watchdog(ifnum_to_use); 264 ipmi_register_watchdog(ifnum_to_use);
273 return 0; 265 return 0;
274 } 266 }
275 267
276 module_param_call(ifnum_to_use, set_param_wdog_ifnum, get_param_int, 268 module_param_call(ifnum_to_use, set_param_wdog_ifnum, get_param_int,
277 &ifnum_to_use, 0644); 269 &ifnum_to_use, 0644);
278 MODULE_PARM_DESC(ifnum_to_use, "The interface number to use for the watchdog " 270 MODULE_PARM_DESC(ifnum_to_use, "The interface number to use for the watchdog "
279 "timer. Setting to -1 defaults to the first registered " 271 "timer. Setting to -1 defaults to the first registered "
280 "interface"); 272 "interface");
281 273
282 module_param_call(timeout, set_param_int, get_param_int, &timeout, 0644); 274 module_param_call(timeout, set_param_int, get_param_int, &timeout, 0644);
283 MODULE_PARM_DESC(timeout, "Timeout value in seconds."); 275 MODULE_PARM_DESC(timeout, "Timeout value in seconds.");
284 276
285 module_param_call(pretimeout, set_param_int, get_param_int, &pretimeout, 0644); 277 module_param_call(pretimeout, set_param_int, get_param_int, &pretimeout, 0644);
286 MODULE_PARM_DESC(pretimeout, "Pretimeout value in seconds."); 278 MODULE_PARM_DESC(pretimeout, "Pretimeout value in seconds.");
287 279
288 module_param_call(action, set_param_str, get_param_str, action_op, 0644); 280 module_param_call(action, set_param_str, get_param_str, action_op, 0644);
289 MODULE_PARM_DESC(action, "Timeout action. One of: " 281 MODULE_PARM_DESC(action, "Timeout action. One of: "
290 "reset, none, power_cycle, power_off."); 282 "reset, none, power_cycle, power_off.");
291 283
292 module_param_call(preaction, set_param_str, get_param_str, preaction_op, 0644); 284 module_param_call(preaction, set_param_str, get_param_str, preaction_op, 0644);
293 MODULE_PARM_DESC(preaction, "Pretimeout action. One of: " 285 MODULE_PARM_DESC(preaction, "Pretimeout action. One of: "
294 "pre_none, pre_smi, pre_nmi, pre_int."); 286 "pre_none, pre_smi, pre_nmi, pre_int.");
295 287
296 module_param_call(preop, set_param_str, get_param_str, preop_op, 0644); 288 module_param_call(preop, set_param_str, get_param_str, preop_op, 0644);
297 MODULE_PARM_DESC(preop, "Pretimeout driver operation. One of: " 289 MODULE_PARM_DESC(preop, "Pretimeout driver operation. One of: "
298 "preop_none, preop_panic, preop_give_data."); 290 "preop_none, preop_panic, preop_give_data.");
299 291
300 module_param(start_now, int, 0444); 292 module_param(start_now, int, 0444);
301 MODULE_PARM_DESC(start_now, "Set to 1 to start the watchdog as" 293 MODULE_PARM_DESC(start_now, "Set to 1 to start the watchdog as"
302 "soon as the driver is loaded."); 294 "soon as the driver is loaded.");
303 295
304 module_param(nowayout, int, 0644); 296 module_param(nowayout, int, 0644);
305 MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started " 297 MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started "
306 "(default=CONFIG_WATCHDOG_NOWAYOUT)"); 298 "(default=CONFIG_WATCHDOG_NOWAYOUT)");
307 299
308 /* Default state of the timer. */ 300 /* Default state of the timer. */
309 static unsigned char ipmi_watchdog_state = WDOG_TIMEOUT_NONE; 301 static unsigned char ipmi_watchdog_state = WDOG_TIMEOUT_NONE;
310 302
311 /* If shutting down via IPMI, we ignore the heartbeat. */ 303 /* If shutting down via IPMI, we ignore the heartbeat. */
312 static int ipmi_ignore_heartbeat; 304 static int ipmi_ignore_heartbeat;
313 305
314 /* Is someone using the watchdog? Only one user is allowed. */ 306 /* Is someone using the watchdog? Only one user is allowed. */
315 static unsigned long ipmi_wdog_open; 307 static unsigned long ipmi_wdog_open;
316 308
317 /* If set to 1, the heartbeat command will set the state to reset and 309 /* If set to 1, the heartbeat command will set the state to reset and
318 start the timer. The timer doesn't normally run when the driver is 310 start the timer. The timer doesn't normally run when the driver is
319 first opened until the heartbeat is set the first time, this 311 first opened until the heartbeat is set the first time, this
320 variable is used to accomplish this. */ 312 variable is used to accomplish this. */
321 static int ipmi_start_timer_on_heartbeat; 313 static int ipmi_start_timer_on_heartbeat;
322 314
323 /* IPMI version of the BMC. */ 315 /* IPMI version of the BMC. */
324 static unsigned char ipmi_version_major; 316 static unsigned char ipmi_version_major;
325 static unsigned char ipmi_version_minor; 317 static unsigned char ipmi_version_minor;
326 318
327 /* If a pretimeout occurs, this is used to allow only one panic to happen. */ 319 /* If a pretimeout occurs, this is used to allow only one panic to happen. */
328 static atomic_t preop_panic_excl = ATOMIC_INIT(-1); 320 static atomic_t preop_panic_excl = ATOMIC_INIT(-1);
329 321
330 #ifdef HAVE_DIE_NMI_POST
331 static int testing_nmi;
332 static int nmi_handler_registered;
333 #endif
334
335 static int ipmi_heartbeat(void); 322 static int ipmi_heartbeat(void);
336 static void panic_halt_ipmi_heartbeat(void); 323 static void panic_halt_ipmi_heartbeat(void);
337 324
338 325
339 /* We use a mutex to make sure that only one thing can send a set 326 /* We use a mutex to make sure that only one thing can send a set
340 timeout at one time, because we only have one copy of the data. 327 timeout at one time, because we only have one copy of the data.
341 The mutex is claimed when the set_timeout is sent and freed 328 The mutex is claimed when the set_timeout is sent and freed
342 when both messages are free. */ 329 when both messages are free. */
343 static atomic_t set_timeout_tofree = ATOMIC_INIT(0); 330 static atomic_t set_timeout_tofree = ATOMIC_INIT(0);
344 static DEFINE_MUTEX(set_timeout_lock); 331 static DEFINE_MUTEX(set_timeout_lock);
345 static DECLARE_COMPLETION(set_timeout_wait); 332 static DECLARE_COMPLETION(set_timeout_wait);
346 static void set_timeout_free_smi(struct ipmi_smi_msg *msg) 333 static void set_timeout_free_smi(struct ipmi_smi_msg *msg)
347 { 334 {
348 if (atomic_dec_and_test(&set_timeout_tofree)) 335 if (atomic_dec_and_test(&set_timeout_tofree))
349 complete(&set_timeout_wait); 336 complete(&set_timeout_wait);
350 } 337 }
351 static void set_timeout_free_recv(struct ipmi_recv_msg *msg) 338 static void set_timeout_free_recv(struct ipmi_recv_msg *msg)
352 { 339 {
353 if (atomic_dec_and_test(&set_timeout_tofree)) 340 if (atomic_dec_and_test(&set_timeout_tofree))
354 complete(&set_timeout_wait); 341 complete(&set_timeout_wait);
355 } 342 }
356 static struct ipmi_smi_msg set_timeout_smi_msg = 343 static struct ipmi_smi_msg set_timeout_smi_msg =
357 { 344 {
358 .done = set_timeout_free_smi 345 .done = set_timeout_free_smi
359 }; 346 };
360 static struct ipmi_recv_msg set_timeout_recv_msg = 347 static struct ipmi_recv_msg set_timeout_recv_msg =
361 { 348 {
362 .done = set_timeout_free_recv 349 .done = set_timeout_free_recv
363 }; 350 };
364 351
365 static int i_ipmi_set_timeout(struct ipmi_smi_msg *smi_msg, 352 static int i_ipmi_set_timeout(struct ipmi_smi_msg *smi_msg,
366 struct ipmi_recv_msg *recv_msg, 353 struct ipmi_recv_msg *recv_msg,
367 int *send_heartbeat_now) 354 int *send_heartbeat_now)
368 { 355 {
369 struct kernel_ipmi_msg msg; 356 struct kernel_ipmi_msg msg;
370 unsigned char data[6]; 357 unsigned char data[6];
371 int rv; 358 int rv;
372 struct ipmi_system_interface_addr addr; 359 struct ipmi_system_interface_addr addr;
373 int hbnow = 0; 360 int hbnow = 0;
374 361
375 362
376 /* These can be cleared as we are setting the timeout. */
377 ipmi_start_timer_on_heartbeat = 0;
378 pretimeout_since_last_heartbeat = 0;
379
380 data[0] = 0; 363 data[0] = 0;
381 WDOG_SET_TIMER_USE(data[0], WDOG_TIMER_USE_SMS_OS); 364 WDOG_SET_TIMER_USE(data[0], WDOG_TIMER_USE_SMS_OS);
382 365
383 if ((ipmi_version_major > 1) 366 if ((ipmi_version_major > 1)
384 || ((ipmi_version_major == 1) && (ipmi_version_minor >= 5))) 367 || ((ipmi_version_major == 1) && (ipmi_version_minor >= 5)))
385 { 368 {
386 /* This is an IPMI 1.5-only feature. */ 369 /* This is an IPMI 1.5-only feature. */
387 data[0] |= WDOG_DONT_STOP_ON_SET; 370 data[0] |= WDOG_DONT_STOP_ON_SET;
388 } else if (ipmi_watchdog_state != WDOG_TIMEOUT_NONE) { 371 } else if (ipmi_watchdog_state != WDOG_TIMEOUT_NONE) {
389 /* In ipmi 1.0, setting the timer stops the watchdog, we 372 /* In ipmi 1.0, setting the timer stops the watchdog, we
390 need to start it back up again. */ 373 need to start it back up again. */
391 hbnow = 1; 374 hbnow = 1;
392 } 375 }
393 376
394 data[1] = 0; 377 data[1] = 0;
395 WDOG_SET_TIMEOUT_ACT(data[1], ipmi_watchdog_state); 378 WDOG_SET_TIMEOUT_ACT(data[1], ipmi_watchdog_state);
396 if ((pretimeout > 0) && (ipmi_watchdog_state != WDOG_TIMEOUT_NONE)) { 379 if ((pretimeout > 0) && (ipmi_watchdog_state != WDOG_TIMEOUT_NONE)) {
397 WDOG_SET_PRETIMEOUT_ACT(data[1], preaction_val); 380 WDOG_SET_PRETIMEOUT_ACT(data[1], preaction_val);
398 data[2] = pretimeout; 381 data[2] = pretimeout;
399 } else { 382 } else {
400 WDOG_SET_PRETIMEOUT_ACT(data[1], WDOG_PRETIMEOUT_NONE); 383 WDOG_SET_PRETIMEOUT_ACT(data[1], WDOG_PRETIMEOUT_NONE);
401 data[2] = 0; /* No pretimeout. */ 384 data[2] = 0; /* No pretimeout. */
402 } 385 }
403 data[3] = 0; 386 data[3] = 0;
404 WDOG_SET_TIMEOUT(data[4], data[5], timeout); 387 WDOG_SET_TIMEOUT(data[4], data[5], timeout);
405 388
406 addr.addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE; 389 addr.addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE;
407 addr.channel = IPMI_BMC_CHANNEL; 390 addr.channel = IPMI_BMC_CHANNEL;
408 addr.lun = 0; 391 addr.lun = 0;
409 392
410 msg.netfn = 0x06; 393 msg.netfn = 0x06;
411 msg.cmd = IPMI_WDOG_SET_TIMER; 394 msg.cmd = IPMI_WDOG_SET_TIMER;
412 msg.data = data; 395 msg.data = data;
413 msg.data_len = sizeof(data); 396 msg.data_len = sizeof(data);
414 rv = ipmi_request_supply_msgs(watchdog_user, 397 rv = ipmi_request_supply_msgs(watchdog_user,
415 (struct ipmi_addr *) &addr, 398 (struct ipmi_addr *) &addr,
416 0, 399 0,
417 &msg, 400 &msg,
418 NULL, 401 NULL,
419 smi_msg, 402 smi_msg,
420 recv_msg, 403 recv_msg,
421 1); 404 1);
422 if (rv) { 405 if (rv) {
423 printk(KERN_WARNING PFX "set timeout error: %d\n", 406 printk(KERN_WARNING PFX "set timeout error: %d\n",
424 rv); 407 rv);
425 } 408 }
426 409
427 if (send_heartbeat_now) 410 if (send_heartbeat_now)
428 *send_heartbeat_now = hbnow; 411 *send_heartbeat_now = hbnow;
429 412
430 return rv; 413 return rv;
431 } 414 }
432 415
433 static int ipmi_set_timeout(int do_heartbeat) 416 static int ipmi_set_timeout(int do_heartbeat)
434 { 417 {
435 int send_heartbeat_now; 418 int send_heartbeat_now;
436 int rv; 419 int rv;
437 420
438 421
439 /* We can only send one of these at a time. */ 422 /* We can only send one of these at a time. */
440 mutex_lock(&set_timeout_lock); 423 mutex_lock(&set_timeout_lock);
441 424
442 atomic_set(&set_timeout_tofree, 2); 425 atomic_set(&set_timeout_tofree, 2);
443 426
444 rv = i_ipmi_set_timeout(&set_timeout_smi_msg, 427 rv = i_ipmi_set_timeout(&set_timeout_smi_msg,
445 &set_timeout_recv_msg, 428 &set_timeout_recv_msg,
446 &send_heartbeat_now); 429 &send_heartbeat_now);
447 if (rv) { 430 if (rv) {
448 mutex_unlock(&set_timeout_lock); 431 mutex_unlock(&set_timeout_lock);
449 goto out; 432 goto out;
450 } 433 }
451 434
452 wait_for_completion(&set_timeout_wait); 435 wait_for_completion(&set_timeout_wait);
453 436
454 mutex_unlock(&set_timeout_lock);
455
456 if ((do_heartbeat == IPMI_SET_TIMEOUT_FORCE_HB) 437 if ((do_heartbeat == IPMI_SET_TIMEOUT_FORCE_HB)
457 || ((send_heartbeat_now) 438 || ((send_heartbeat_now)
458 && (do_heartbeat == IPMI_SET_TIMEOUT_HB_IF_NECESSARY))) 439 && (do_heartbeat == IPMI_SET_TIMEOUT_HB_IF_NECESSARY)))
440 {
459 rv = ipmi_heartbeat(); 441 rv = ipmi_heartbeat();
442 }
443 mutex_unlock(&set_timeout_lock);
460 444
461 out: 445 out:
462 return rv; 446 return rv;
463 } 447 }
464 448
465 static void dummy_smi_free(struct ipmi_smi_msg *msg) 449 static void dummy_smi_free(struct ipmi_smi_msg *msg)
466 { 450 {
467 } 451 }
468 static void dummy_recv_free(struct ipmi_recv_msg *msg) 452 static void dummy_recv_free(struct ipmi_recv_msg *msg)
469 { 453 {
470 } 454 }
471 static struct ipmi_smi_msg panic_halt_smi_msg = 455 static struct ipmi_smi_msg panic_halt_smi_msg =
472 { 456 {
473 .done = dummy_smi_free 457 .done = dummy_smi_free
474 }; 458 };
475 static struct ipmi_recv_msg panic_halt_recv_msg = 459 static struct ipmi_recv_msg panic_halt_recv_msg =
476 { 460 {
477 .done = dummy_recv_free 461 .done = dummy_recv_free
478 }; 462 };
479 463
480 /* Special call, doesn't claim any locks. This is only to be called 464 /* Special call, doesn't claim any locks. This is only to be called
481 at panic or halt time, in run-to-completion mode, when the caller 465 at panic or halt time, in run-to-completion mode, when the caller
482 is the only CPU and the only thing that will be going is these IPMI 466 is the only CPU and the only thing that will be going is these IPMI
483 calls. */ 467 calls. */
484 static void panic_halt_ipmi_set_timeout(void) 468 static void panic_halt_ipmi_set_timeout(void)
485 { 469 {
486 int send_heartbeat_now; 470 int send_heartbeat_now;
487 int rv; 471 int rv;
488 472
489 rv = i_ipmi_set_timeout(&panic_halt_smi_msg, 473 rv = i_ipmi_set_timeout(&panic_halt_smi_msg,
490 &panic_halt_recv_msg, 474 &panic_halt_recv_msg,
491 &send_heartbeat_now); 475 &send_heartbeat_now);
492 if (!rv) { 476 if (!rv) {
493 if (send_heartbeat_now) 477 if (send_heartbeat_now)
494 panic_halt_ipmi_heartbeat(); 478 panic_halt_ipmi_heartbeat();
495 } 479 }
496 } 480 }
497 481
498 /* We use a semaphore to make sure that only one thing can send a 482 /* We use a semaphore to make sure that only one thing can send a
499 heartbeat at one time, because we only have one copy of the data. 483 heartbeat at one time, because we only have one copy of the data.
500 The semaphore is claimed when the set_timeout is sent and freed 484 The semaphore is claimed when the set_timeout is sent and freed
501 when both messages are free. */ 485 when both messages are free. */
502 static atomic_t heartbeat_tofree = ATOMIC_INIT(0); 486 static atomic_t heartbeat_tofree = ATOMIC_INIT(0);
503 static DEFINE_MUTEX(heartbeat_lock); 487 static DEFINE_MUTEX(heartbeat_lock);
504 static DECLARE_COMPLETION(heartbeat_wait); 488 static DECLARE_COMPLETION(heartbeat_wait);
505 static void heartbeat_free_smi(struct ipmi_smi_msg *msg) 489 static void heartbeat_free_smi(struct ipmi_smi_msg *msg)
506 { 490 {
507 if (atomic_dec_and_test(&heartbeat_tofree)) 491 if (atomic_dec_and_test(&heartbeat_tofree))
508 complete(&heartbeat_wait); 492 complete(&heartbeat_wait);
509 } 493 }
510 static void heartbeat_free_recv(struct ipmi_recv_msg *msg) 494 static void heartbeat_free_recv(struct ipmi_recv_msg *msg)
511 { 495 {
512 if (atomic_dec_and_test(&heartbeat_tofree)) 496 if (atomic_dec_and_test(&heartbeat_tofree))
513 complete(&heartbeat_wait); 497 complete(&heartbeat_wait);
514 } 498 }
515 static struct ipmi_smi_msg heartbeat_smi_msg = 499 static struct ipmi_smi_msg heartbeat_smi_msg =
516 { 500 {
517 .done = heartbeat_free_smi 501 .done = heartbeat_free_smi
518 }; 502 };
519 static struct ipmi_recv_msg heartbeat_recv_msg = 503 static struct ipmi_recv_msg heartbeat_recv_msg =
520 { 504 {
521 .done = heartbeat_free_recv 505 .done = heartbeat_free_recv
522 }; 506 };
523 507
524 static struct ipmi_smi_msg panic_halt_heartbeat_smi_msg = 508 static struct ipmi_smi_msg panic_halt_heartbeat_smi_msg =
525 { 509 {
526 .done = dummy_smi_free 510 .done = dummy_smi_free
527 }; 511 };
528 static struct ipmi_recv_msg panic_halt_heartbeat_recv_msg = 512 static struct ipmi_recv_msg panic_halt_heartbeat_recv_msg =
529 { 513 {
530 .done = dummy_recv_free 514 .done = dummy_recv_free
531 }; 515 };
532 516
533 static int ipmi_heartbeat(void) 517 static int ipmi_heartbeat(void)
534 { 518 {
535 struct kernel_ipmi_msg msg; 519 struct kernel_ipmi_msg msg;
536 int rv; 520 int rv;
537 struct ipmi_system_interface_addr addr; 521 struct ipmi_system_interface_addr addr;
538 522
539 if (ipmi_ignore_heartbeat) 523 if (ipmi_ignore_heartbeat) {
540 return 0; 524 return 0;
525 }
541 526
542 if (ipmi_start_timer_on_heartbeat) { 527 if (ipmi_start_timer_on_heartbeat) {
528 ipmi_start_timer_on_heartbeat = 0;
543 ipmi_watchdog_state = action_val; 529 ipmi_watchdog_state = action_val;
544 return ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB); 530 return ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB);
545 } else if (pretimeout_since_last_heartbeat) { 531 } else if (pretimeout_since_last_heartbeat) {
546 /* A pretimeout occurred, make sure we set the timeout. 532 /* A pretimeout occurred, make sure we set the timeout.
547 We don't want to set the action, though, we want to 533 We don't want to set the action, though, we want to
548 leave that alone (thus it can't be combined with the 534 leave that alone (thus it can't be combined with the
549 above operation. */ 535 above operation. */
536 pretimeout_since_last_heartbeat = 0;
550 return ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY); 537 return ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY);
551 } 538 }
552 539
553 mutex_lock(&heartbeat_lock); 540 mutex_lock(&heartbeat_lock);
554 541
555 atomic_set(&heartbeat_tofree, 2); 542 atomic_set(&heartbeat_tofree, 2);
556 543
557 /* Don't reset the timer if we have the timer turned off, that 544 /* Don't reset the timer if we have the timer turned off, that
558 re-enables the watchdog. */ 545 re-enables the watchdog. */
559 if (ipmi_watchdog_state == WDOG_TIMEOUT_NONE) { 546 if (ipmi_watchdog_state == WDOG_TIMEOUT_NONE) {
560 mutex_unlock(&heartbeat_lock); 547 mutex_unlock(&heartbeat_lock);
561 return 0; 548 return 0;
562 } 549 }
563 550
564 addr.addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE; 551 addr.addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE;
565 addr.channel = IPMI_BMC_CHANNEL; 552 addr.channel = IPMI_BMC_CHANNEL;
566 addr.lun = 0; 553 addr.lun = 0;
567 554
568 msg.netfn = 0x06; 555 msg.netfn = 0x06;
569 msg.cmd = IPMI_WDOG_RESET_TIMER; 556 msg.cmd = IPMI_WDOG_RESET_TIMER;
570 msg.data = NULL; 557 msg.data = NULL;
571 msg.data_len = 0; 558 msg.data_len = 0;
572 rv = ipmi_request_supply_msgs(watchdog_user, 559 rv = ipmi_request_supply_msgs(watchdog_user,
573 (struct ipmi_addr *) &addr, 560 (struct ipmi_addr *) &addr,
574 0, 561 0,
575 &msg, 562 &msg,
576 NULL, 563 NULL,
577 &heartbeat_smi_msg, 564 &heartbeat_smi_msg,
578 &heartbeat_recv_msg, 565 &heartbeat_recv_msg,
579 1); 566 1);
580 if (rv) { 567 if (rv) {
581 mutex_unlock(&heartbeat_lock); 568 mutex_unlock(&heartbeat_lock);
582 printk(KERN_WARNING PFX "heartbeat failure: %d\n", 569 printk(KERN_WARNING PFX "heartbeat failure: %d\n",
583 rv); 570 rv);
584 return rv; 571 return rv;
585 } 572 }
586 573
587 /* Wait for the heartbeat to be sent. */ 574 /* Wait for the heartbeat to be sent. */
588 wait_for_completion(&heartbeat_wait); 575 wait_for_completion(&heartbeat_wait);
589 576
590 if (heartbeat_recv_msg.msg.data[0] != 0) { 577 if (heartbeat_recv_msg.msg.data[0] != 0) {
591 /* Got an error in the heartbeat response. It was already 578 /* Got an error in the heartbeat response. It was already
592 reported in ipmi_wdog_msg_handler, but we should return 579 reported in ipmi_wdog_msg_handler, but we should return
593 an error here. */ 580 an error here. */
594 rv = -EINVAL; 581 rv = -EINVAL;
595 } 582 }
596 583
597 mutex_unlock(&heartbeat_lock); 584 mutex_unlock(&heartbeat_lock);
598 585
599 return rv; 586 return rv;
600 } 587 }
601 588
602 static void panic_halt_ipmi_heartbeat(void) 589 static void panic_halt_ipmi_heartbeat(void)
603 { 590 {
604 struct kernel_ipmi_msg msg; 591 struct kernel_ipmi_msg msg;
605 struct ipmi_system_interface_addr addr; 592 struct ipmi_system_interface_addr addr;
606 593
607 594
608 /* Don't reset the timer if we have the timer turned off, that 595 /* Don't reset the timer if we have the timer turned off, that
609 re-enables the watchdog. */ 596 re-enables the watchdog. */
610 if (ipmi_watchdog_state == WDOG_TIMEOUT_NONE) 597 if (ipmi_watchdog_state == WDOG_TIMEOUT_NONE)
611 return; 598 return;
612 599
613 addr.addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE; 600 addr.addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE;
614 addr.channel = IPMI_BMC_CHANNEL; 601 addr.channel = IPMI_BMC_CHANNEL;
615 addr.lun = 0; 602 addr.lun = 0;
616 603
617 msg.netfn = 0x06; 604 msg.netfn = 0x06;
618 msg.cmd = IPMI_WDOG_RESET_TIMER; 605 msg.cmd = IPMI_WDOG_RESET_TIMER;
619 msg.data = NULL; 606 msg.data = NULL;
620 msg.data_len = 0; 607 msg.data_len = 0;
621 ipmi_request_supply_msgs(watchdog_user, 608 ipmi_request_supply_msgs(watchdog_user,
622 (struct ipmi_addr *) &addr, 609 (struct ipmi_addr *) &addr,
623 0, 610 0,
624 &msg, 611 &msg,
625 NULL, 612 NULL,
626 &panic_halt_heartbeat_smi_msg, 613 &panic_halt_heartbeat_smi_msg,
627 &panic_halt_heartbeat_recv_msg, 614 &panic_halt_heartbeat_recv_msg,
628 1); 615 1);
629 } 616 }
630 617
631 static struct watchdog_info ident = 618 static struct watchdog_info ident =
632 { 619 {
633 .options = 0, /* WDIOF_SETTIMEOUT, */ 620 .options = 0, /* WDIOF_SETTIMEOUT, */
634 .firmware_version = 1, 621 .firmware_version = 1,
635 .identity = "IPMI" 622 .identity = "IPMI"
636 }; 623 };
637 624
638 static int ipmi_ioctl(struct inode *inode, struct file *file, 625 static int ipmi_ioctl(struct inode *inode, struct file *file,
639 unsigned int cmd, unsigned long arg) 626 unsigned int cmd, unsigned long arg)
640 { 627 {
641 void __user *argp = (void __user *)arg; 628 void __user *argp = (void __user *)arg;
642 int i; 629 int i;
643 int val; 630 int val;
644 631
645 switch(cmd) { 632 switch(cmd) {
646 case WDIOC_GETSUPPORT: 633 case WDIOC_GETSUPPORT:
647 i = copy_to_user(argp, &ident, sizeof(ident)); 634 i = copy_to_user(argp, &ident, sizeof(ident));
648 return i ? -EFAULT : 0; 635 return i ? -EFAULT : 0;
649 636
650 case WDIOC_SETTIMEOUT: 637 case WDIOC_SETTIMEOUT:
651 i = copy_from_user(&val, argp, sizeof(int)); 638 i = copy_from_user(&val, argp, sizeof(int));
652 if (i) 639 if (i)
653 return -EFAULT; 640 return -EFAULT;
654 timeout = val; 641 timeout = val;
655 return ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY); 642 return ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY);
656 643
657 case WDIOC_GETTIMEOUT: 644 case WDIOC_GETTIMEOUT:
658 i = copy_to_user(argp, &timeout, sizeof(timeout)); 645 i = copy_to_user(argp, &timeout, sizeof(timeout));
659 if (i) 646 if (i)
660 return -EFAULT; 647 return -EFAULT;
661 return 0; 648 return 0;
662 649
663 case WDIOC_SET_PRETIMEOUT: 650 case WDIOC_SET_PRETIMEOUT:
664 i = copy_from_user(&val, argp, sizeof(int)); 651 i = copy_from_user(&val, argp, sizeof(int));
665 if (i) 652 if (i)
666 return -EFAULT; 653 return -EFAULT;
667 pretimeout = val; 654 pretimeout = val;
668 return ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY); 655 return ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY);
669 656
670 case WDIOC_GET_PRETIMEOUT: 657 case WDIOC_GET_PRETIMEOUT:
671 i = copy_to_user(argp, &pretimeout, sizeof(pretimeout)); 658 i = copy_to_user(argp, &pretimeout, sizeof(pretimeout));
672 if (i) 659 if (i)
673 return -EFAULT; 660 return -EFAULT;
674 return 0; 661 return 0;
675 662
676 case WDIOC_KEEPALIVE: 663 case WDIOC_KEEPALIVE:
677 return ipmi_heartbeat(); 664 return ipmi_heartbeat();
678 665
679 case WDIOC_SETOPTIONS: 666 case WDIOC_SETOPTIONS:
680 i = copy_from_user(&val, argp, sizeof(int)); 667 i = copy_from_user(&val, argp, sizeof(int));
681 if (i) 668 if (i)
682 return -EFAULT; 669 return -EFAULT;
683 if (val & WDIOS_DISABLECARD) 670 if (val & WDIOS_DISABLECARD)
684 { 671 {
685 ipmi_watchdog_state = WDOG_TIMEOUT_NONE; 672 ipmi_watchdog_state = WDOG_TIMEOUT_NONE;
686 ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB); 673 ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB);
687 ipmi_start_timer_on_heartbeat = 0; 674 ipmi_start_timer_on_heartbeat = 0;
688 } 675 }
689 676
690 if (val & WDIOS_ENABLECARD) 677 if (val & WDIOS_ENABLECARD)
691 { 678 {
692 ipmi_watchdog_state = action_val; 679 ipmi_watchdog_state = action_val;
693 ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB); 680 ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB);
694 } 681 }
695 return 0; 682 return 0;
696 683
697 case WDIOC_GETSTATUS: 684 case WDIOC_GETSTATUS:
698 val = 0; 685 val = 0;
699 i = copy_to_user(argp, &val, sizeof(val)); 686 i = copy_to_user(argp, &val, sizeof(val));
700 if (i) 687 if (i)
701 return -EFAULT; 688 return -EFAULT;
702 return 0; 689 return 0;
703 690
704 default: 691 default:
705 return -ENOIOCTLCMD; 692 return -ENOIOCTLCMD;
706 } 693 }
707 } 694 }
708 695
709 static ssize_t ipmi_write(struct file *file, 696 static ssize_t ipmi_write(struct file *file,
710 const char __user *buf, 697 const char __user *buf,
711 size_t len, 698 size_t len,
712 loff_t *ppos) 699 loff_t *ppos)
713 { 700 {
714 int rv; 701 int rv;
715 702
716 if (len) { 703 if (len) {
717 if (!nowayout) { 704 if (!nowayout) {
718 size_t i; 705 size_t i;
719 706
720 /* In case it was set long ago */ 707 /* In case it was set long ago */
721 expect_close = 0; 708 expect_close = 0;
722 709
723 for (i = 0; i != len; i++) { 710 for (i = 0; i != len; i++) {
724 char c; 711 char c;
725 712
726 if (get_user(c, buf + i)) 713 if (get_user(c, buf + i))
727 return -EFAULT; 714 return -EFAULT;
728 if (c == 'V') 715 if (c == 'V')
729 expect_close = 42; 716 expect_close = 42;
730 } 717 }
731 } 718 }
732 rv = ipmi_heartbeat(); 719 rv = ipmi_heartbeat();
733 if (rv) 720 if (rv)
734 return rv; 721 return rv;
735 return 1; 722 return 1;
736 } 723 }
737 return 0; 724 return 0;
738 } 725 }
739 726
740 static ssize_t ipmi_read(struct file *file, 727 static ssize_t ipmi_read(struct file *file,
741 char __user *buf, 728 char __user *buf,
742 size_t count, 729 size_t count,
743 loff_t *ppos) 730 loff_t *ppos)
744 { 731 {
745 int rv = 0; 732 int rv = 0;
746 wait_queue_t wait; 733 wait_queue_t wait;
747 734
748 if (count <= 0) 735 if (count <= 0)
749 return 0; 736 return 0;
750 737
751 /* Reading returns if the pretimeout has gone off, and it only does 738 /* Reading returns if the pretimeout has gone off, and it only does
752 it once per pretimeout. */ 739 it once per pretimeout. */
753 spin_lock(&ipmi_read_lock); 740 spin_lock(&ipmi_read_lock);
754 if (!data_to_read) { 741 if (!data_to_read) {
755 if (file->f_flags & O_NONBLOCK) { 742 if (file->f_flags & O_NONBLOCK) {
756 rv = -EAGAIN; 743 rv = -EAGAIN;
757 goto out; 744 goto out;
758 } 745 }
759 746
760 init_waitqueue_entry(&wait, current); 747 init_waitqueue_entry(&wait, current);
761 add_wait_queue(&read_q, &wait); 748 add_wait_queue(&read_q, &wait);
762 while (!data_to_read) { 749 while (!data_to_read) {
763 set_current_state(TASK_INTERRUPTIBLE); 750 set_current_state(TASK_INTERRUPTIBLE);
764 spin_unlock(&ipmi_read_lock); 751 spin_unlock(&ipmi_read_lock);
765 schedule(); 752 schedule();
766 spin_lock(&ipmi_read_lock); 753 spin_lock(&ipmi_read_lock);
767 } 754 }
768 remove_wait_queue(&read_q, &wait); 755 remove_wait_queue(&read_q, &wait);
769 756
770 if (signal_pending(current)) { 757 if (signal_pending(current)) {
771 rv = -ERESTARTSYS; 758 rv = -ERESTARTSYS;
772 goto out; 759 goto out;
773 } 760 }
774 } 761 }
775 data_to_read = 0; 762 data_to_read = 0;
776 763
777 out: 764 out:
778 spin_unlock(&ipmi_read_lock); 765 spin_unlock(&ipmi_read_lock);
779 766
780 if (rv == 0) { 767 if (rv == 0) {
781 if (copy_to_user(buf, &data_to_read, 1)) 768 if (copy_to_user(buf, &data_to_read, 1))
782 rv = -EFAULT; 769 rv = -EFAULT;
783 else 770 else
784 rv = 1; 771 rv = 1;
785 } 772 }
786 773
787 return rv; 774 return rv;
788 } 775 }
789 776
790 static int ipmi_open(struct inode *ino, struct file *filep) 777 static int ipmi_open(struct inode *ino, struct file *filep)
791 { 778 {
792 switch (iminor(ino)) { 779 switch (iminor(ino)) {
793 case WATCHDOG_MINOR: 780 case WATCHDOG_MINOR:
794 if (test_and_set_bit(0, &ipmi_wdog_open)) 781 if (test_and_set_bit(0, &ipmi_wdog_open))
795 return -EBUSY; 782 return -EBUSY;
796 783
797 /* Don't start the timer now, let it start on the 784 /* Don't start the timer now, let it start on the
798 first heartbeat. */ 785 first heartbeat. */
799 ipmi_start_timer_on_heartbeat = 1; 786 ipmi_start_timer_on_heartbeat = 1;
800 return nonseekable_open(ino, filep); 787 return nonseekable_open(ino, filep);
801 788
802 default: 789 default:
803 return (-ENODEV); 790 return (-ENODEV);
804 } 791 }
805 } 792 }
806 793
807 static unsigned int ipmi_poll(struct file *file, poll_table *wait) 794 static unsigned int ipmi_poll(struct file *file, poll_table *wait)
808 { 795 {
809 unsigned int mask = 0; 796 unsigned int mask = 0;
810 797
811 poll_wait(file, &read_q, wait); 798 poll_wait(file, &read_q, wait);
812 799
813 spin_lock(&ipmi_read_lock); 800 spin_lock(&ipmi_read_lock);
814 if (data_to_read) 801 if (data_to_read)
815 mask |= (POLLIN | POLLRDNORM); 802 mask |= (POLLIN | POLLRDNORM);
816 spin_unlock(&ipmi_read_lock); 803 spin_unlock(&ipmi_read_lock);
817 804
818 return mask; 805 return mask;
819 } 806 }
820 807
821 static int ipmi_fasync(int fd, struct file *file, int on) 808 static int ipmi_fasync(int fd, struct file *file, int on)
822 { 809 {
823 int result; 810 int result;
824 811
825 result = fasync_helper(fd, file, on, &fasync_q); 812 result = fasync_helper(fd, file, on, &fasync_q);
826 813
827 return (result); 814 return (result);
828 } 815 }
829 816
830 static int ipmi_close(struct inode *ino, struct file *filep) 817 static int ipmi_close(struct inode *ino, struct file *filep)
831 { 818 {
832 if (iminor(ino) == WATCHDOG_MINOR) { 819 if (iminor(ino) == WATCHDOG_MINOR) {
833 if (expect_close == 42) { 820 if (expect_close == 42) {
834 ipmi_watchdog_state = WDOG_TIMEOUT_NONE; 821 ipmi_watchdog_state = WDOG_TIMEOUT_NONE;
835 ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB); 822 ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB);
836 } else { 823 } else {
837 printk(KERN_CRIT PFX 824 printk(KERN_CRIT PFX
838 "Unexpected close, not stopping watchdog!\n"); 825 "Unexpected close, not stopping watchdog!\n");
839 ipmi_heartbeat(); 826 ipmi_heartbeat();
840 } 827 }
841 clear_bit(0, &ipmi_wdog_open); 828 clear_bit(0, &ipmi_wdog_open);
842 } 829 }
843 830
844 ipmi_fasync (-1, filep, 0); 831 ipmi_fasync (-1, filep, 0);
845 expect_close = 0; 832 expect_close = 0;
846 833
847 return 0; 834 return 0;
848 } 835 }
849 836
850 static const struct file_operations ipmi_wdog_fops = { 837 static const struct file_operations ipmi_wdog_fops = {
851 .owner = THIS_MODULE, 838 .owner = THIS_MODULE,
852 .read = ipmi_read, 839 .read = ipmi_read,
853 .poll = ipmi_poll, 840 .poll = ipmi_poll,
854 .write = ipmi_write, 841 .write = ipmi_write,
855 .ioctl = ipmi_ioctl, 842 .ioctl = ipmi_ioctl,
856 .open = ipmi_open, 843 .open = ipmi_open,
857 .release = ipmi_close, 844 .release = ipmi_close,
858 .fasync = ipmi_fasync, 845 .fasync = ipmi_fasync,
859 }; 846 };
860 847
861 static struct miscdevice ipmi_wdog_miscdev = { 848 static struct miscdevice ipmi_wdog_miscdev = {
862 .minor = WATCHDOG_MINOR, 849 .minor = WATCHDOG_MINOR,
863 .name = "watchdog", 850 .name = "watchdog",
864 .fops = &ipmi_wdog_fops 851 .fops = &ipmi_wdog_fops
865 }; 852 };
866 853
867 static void ipmi_wdog_msg_handler(struct ipmi_recv_msg *msg, 854 static void ipmi_wdog_msg_handler(struct ipmi_recv_msg *msg,
868 void *handler_data) 855 void *handler_data)
869 { 856 {
870 if (msg->msg.data[0] != 0) { 857 if (msg->msg.data[0] != 0) {
871 printk(KERN_ERR PFX "response: Error %x on cmd %x\n", 858 printk(KERN_ERR PFX "response: Error %x on cmd %x\n",
872 msg->msg.data[0], 859 msg->msg.data[0],
873 msg->msg.cmd); 860 msg->msg.cmd);
874 } 861 }
875 862
876 ipmi_free_recv_msg(msg); 863 ipmi_free_recv_msg(msg);
877 } 864 }
878 865
879 static void ipmi_wdog_pretimeout_handler(void *handler_data) 866 static void ipmi_wdog_pretimeout_handler(void *handler_data)
880 { 867 {
881 if (preaction_val != WDOG_PRETIMEOUT_NONE) { 868 if (preaction_val != WDOG_PRETIMEOUT_NONE) {
882 if (preop_val == WDOG_PREOP_PANIC) { 869 if (preop_val == WDOG_PREOP_PANIC) {
883 if (atomic_inc_and_test(&preop_panic_excl)) 870 if (atomic_inc_and_test(&preop_panic_excl))
884 panic("Watchdog pre-timeout"); 871 panic("Watchdog pre-timeout");
885 } else if (preop_val == WDOG_PREOP_GIVE_DATA) { 872 } else if (preop_val == WDOG_PREOP_GIVE_DATA) {
886 spin_lock(&ipmi_read_lock); 873 spin_lock(&ipmi_read_lock);
887 data_to_read = 1; 874 data_to_read = 1;
888 wake_up_interruptible(&read_q); 875 wake_up_interruptible(&read_q);
889 kill_fasync(&fasync_q, SIGIO, POLL_IN); 876 kill_fasync(&fasync_q, SIGIO, POLL_IN);
890 877
891 spin_unlock(&ipmi_read_lock); 878 spin_unlock(&ipmi_read_lock);
892 } 879 }
893 } 880 }
894 881
895 /* On some machines, the heartbeat will give 882 /* On some machines, the heartbeat will give
896 an error and not work unless we re-enable 883 an error and not work unless we re-enable
897 the timer. So do so. */ 884 the timer. So do so. */
898 pretimeout_since_last_heartbeat = 1; 885 pretimeout_since_last_heartbeat = 1;
899 } 886 }
900 887
901 static struct ipmi_user_hndl ipmi_hndlrs = 888 static struct ipmi_user_hndl ipmi_hndlrs =
902 { 889 {
903 .ipmi_recv_hndl = ipmi_wdog_msg_handler, 890 .ipmi_recv_hndl = ipmi_wdog_msg_handler,
904 .ipmi_watchdog_pretimeout = ipmi_wdog_pretimeout_handler 891 .ipmi_watchdog_pretimeout = ipmi_wdog_pretimeout_handler
905 }; 892 };
906 893
907 static void ipmi_register_watchdog(int ipmi_intf) 894 static void ipmi_register_watchdog(int ipmi_intf)
908 { 895 {
909 int rv = -EBUSY; 896 int rv = -EBUSY;
910 897
911 down_write(&register_sem); 898 down_write(&register_sem);
912 if (watchdog_user) 899 if (watchdog_user)
913 goto out; 900 goto out;
914 901
915 if ((ifnum_to_use >= 0) && (ifnum_to_use != ipmi_intf)) 902 if ((ifnum_to_use >= 0) && (ifnum_to_use != ipmi_intf))
916 goto out; 903 goto out;
917 904
918 watchdog_ifnum = ipmi_intf; 905 watchdog_ifnum = ipmi_intf;
919 906
920 rv = ipmi_create_user(ipmi_intf, &ipmi_hndlrs, NULL, &watchdog_user); 907 rv = ipmi_create_user(ipmi_intf, &ipmi_hndlrs, NULL, &watchdog_user);
921 if (rv < 0) { 908 if (rv < 0) {
922 printk(KERN_CRIT PFX "Unable to register with ipmi\n"); 909 printk(KERN_CRIT PFX "Unable to register with ipmi\n");
923 goto out; 910 goto out;
924 } 911 }
925 912
926 ipmi_get_version(watchdog_user, 913 ipmi_get_version(watchdog_user,
927 &ipmi_version_major, 914 &ipmi_version_major,
928 &ipmi_version_minor); 915 &ipmi_version_minor);
929 916
930 rv = misc_register(&ipmi_wdog_miscdev); 917 rv = misc_register(&ipmi_wdog_miscdev);
931 if (rv < 0) { 918 if (rv < 0) {
932 ipmi_destroy_user(watchdog_user); 919 ipmi_destroy_user(watchdog_user);
933 watchdog_user = NULL; 920 watchdog_user = NULL;
934 printk(KERN_CRIT PFX "Unable to register misc device\n"); 921 printk(KERN_CRIT PFX "Unable to register misc device\n");
935 } 922 }
936 923
937 #ifdef HAVE_DIE_NMI_POST
938 if (nmi_handler_registered) {
939 int old_pretimeout = pretimeout;
940 int old_timeout = timeout;
941 int old_preop_val = preop_val;
942
943 /* Set the pretimeout to go off in a second and give
944 ourselves plenty of time to stop the timer. */
945 ipmi_watchdog_state = WDOG_TIMEOUT_RESET;
946 preop_val = WDOG_PREOP_NONE; /* Make sure nothing happens */
947 pretimeout = 99;
948 timeout = 100;
949
950 testing_nmi = 1;
951
952 rv = ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB);
953 if (rv) {
954 printk(KERN_WARNING PFX "Error starting timer to"
955 " test NMI: 0x%x. The NMI pretimeout will"
956 " likely not work\n", rv);
957 rv = 0;
958 goto out_restore;
959 }
960
961 msleep(1500);
962
963 if (testing_nmi != 2) {
964 printk(KERN_WARNING PFX "IPMI NMI didn't seem to"
965 " occur. The NMI pretimeout will"
966 " likely not work\n");
967 }
968 out_restore:
969 testing_nmi = 0;
970 preop_val = old_preop_val;
971 pretimeout = old_pretimeout;
972 timeout = old_timeout;
973 }
974 #endif
975
976 out: 924 out:
977 up_write(&register_sem); 925 up_write(&register_sem);
978 926
979 if ((start_now) && (rv == 0)) { 927 if ((start_now) && (rv == 0)) {
980 /* Run from startup, so start the timer now. */ 928 /* Run from startup, so start the timer now. */
981 start_now = 0; /* Disable this function after first startup. */ 929 start_now = 0; /* Disable this function after first startup. */
982 ipmi_watchdog_state = action_val; 930 ipmi_watchdog_state = action_val;
983 ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB); 931 ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB);
984 printk(KERN_INFO PFX "Starting now!\n"); 932 printk(KERN_INFO PFX "Starting now!\n");
985 } else {
986 /* Stop the timer now. */
987 ipmi_watchdog_state = WDOG_TIMEOUT_NONE;
988 ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB);
989 } 933 }
990 } 934 }
991 935
992 static void ipmi_unregister_watchdog(int ipmi_intf) 936 static void ipmi_unregister_watchdog(int ipmi_intf)
993 { 937 {
994 int rv; 938 int rv;
995 939
996 down_write(&register_sem); 940 down_write(&register_sem);
997 941
998 if (!watchdog_user) 942 if (!watchdog_user)
999 goto out; 943 goto out;
1000 944
1001 if (watchdog_ifnum != ipmi_intf) 945 if (watchdog_ifnum != ipmi_intf)
1002 goto out; 946 goto out;
1003 947
1004 /* Make sure no one can call us any more. */ 948 /* Make sure no one can call us any more. */
1005 misc_deregister(&ipmi_wdog_miscdev); 949 misc_deregister(&ipmi_wdog_miscdev);
1006 950
1007 /* Wait to make sure the message makes it out. The lower layer has 951 /* Wait to make sure the message makes it out. The lower layer has
1008 pointers to our buffers, we want to make sure they are done before 952 pointers to our buffers, we want to make sure they are done before
1009 we release our memory. */ 953 we release our memory. */
1010 while (atomic_read(&set_timeout_tofree)) 954 while (atomic_read(&set_timeout_tofree))
1011 schedule_timeout_uninterruptible(1); 955 schedule_timeout_uninterruptible(1);
1012 956
1013 /* Disconnect from IPMI. */ 957 /* Disconnect from IPMI. */
1014 rv = ipmi_destroy_user(watchdog_user); 958 rv = ipmi_destroy_user(watchdog_user);
1015 if (rv) { 959 if (rv) {
1016 printk(KERN_WARNING PFX "error unlinking from IPMI: %d\n", 960 printk(KERN_WARNING PFX "error unlinking from IPMI: %d\n",
1017 rv); 961 rv);
1018 } 962 }
1019 watchdog_user = NULL; 963 watchdog_user = NULL;
1020 964
1021 out: 965 out:
1022 up_write(&register_sem); 966 up_write(&register_sem);
1023 } 967 }
1024 968
1025 #ifdef HAVE_DIE_NMI_POST 969 #ifdef HAVE_NMI_HANDLER
1026 static int 970 static int
1027 ipmi_nmi(struct notifier_block *self, unsigned long val, void *data) 971 ipmi_nmi(void *dev_id, int cpu, int handled)
1028 { 972 {
1029 if (val != DIE_NMI_POST)
1030 return NOTIFY_OK;
1031
1032 if (testing_nmi) {
1033 testing_nmi = 2;
1034 return NOTIFY_STOP;
1035 }
1036
1037 /* If we are not expecting a timeout, ignore it. */ 973 /* If we are not expecting a timeout, ignore it. */
1038 if (ipmi_watchdog_state == WDOG_TIMEOUT_NONE) 974 if (ipmi_watchdog_state == WDOG_TIMEOUT_NONE)
1039 return NOTIFY_OK; 975 return NOTIFY_DONE;
1040 976
1041 if (preaction_val != WDOG_PRETIMEOUT_NMI)
1042 return NOTIFY_OK;
1043
1044 /* If no one else handled the NMI, we assume it was the IPMI 977 /* If no one else handled the NMI, we assume it was the IPMI
1045 watchdog. */ 978 watchdog. */
1046 if (preop_val == WDOG_PREOP_PANIC) { 979 if ((!handled) && (preop_val == WDOG_PREOP_PANIC)) {
1047 /* On some machines, the heartbeat will give 980 /* On some machines, the heartbeat will give
1048 an error and not work unless we re-enable 981 an error and not work unless we re-enable
1049 the timer. So do so. */ 982 the timer. So do so. */
1050 pretimeout_since_last_heartbeat = 1; 983 pretimeout_since_last_heartbeat = 1;
1051 if (atomic_inc_and_test(&preop_panic_excl)) 984 if (atomic_inc_and_test(&preop_panic_excl))
1052 panic(PFX "pre-timeout"); 985 panic(PFX "pre-timeout");
1053 } 986 }
1054 987
1055 return NOTIFY_STOP; 988 return NOTIFY_DONE;
1056 } 989 }
1057 990
1058 static struct notifier_block ipmi_nmi_handler = { 991 static struct nmi_handler ipmi_nmi_handler =
1059 .notifier_call = ipmi_nmi 992 {
993 .link = LIST_HEAD_INIT(ipmi_nmi_handler.link),
994 .dev_name = "ipmi_watchdog",
995 .dev_id = NULL,
996 .handler = ipmi_nmi,
997 .priority = 0, /* Call us last. */
1060 }; 998 };
999 int nmi_handler_registered;
1061 #endif 1000 #endif
1062 1001
1063 static int wdog_reboot_handler(struct notifier_block *this, 1002 static int wdog_reboot_handler(struct notifier_block *this,
1064 unsigned long code, 1003 unsigned long code,
1065 void *unused) 1004 void *unused)
1066 { 1005 {
1067 static int reboot_event_handled = 0; 1006 static int reboot_event_handled = 0;
1068 1007
1069 if ((watchdog_user) && (!reboot_event_handled)) { 1008 if ((watchdog_user) && (!reboot_event_handled)) {
1070 /* Make sure we only do this once. */ 1009 /* Make sure we only do this once. */
1071 reboot_event_handled = 1; 1010 reboot_event_handled = 1;
1072 1011
1073 if (code == SYS_DOWN || code == SYS_HALT) { 1012 if (code == SYS_DOWN || code == SYS_HALT) {
1074 /* Disable the WDT if we are shutting down. */ 1013 /* Disable the WDT if we are shutting down. */
1075 ipmi_watchdog_state = WDOG_TIMEOUT_NONE; 1014 ipmi_watchdog_state = WDOG_TIMEOUT_NONE;
1076 panic_halt_ipmi_set_timeout(); 1015 panic_halt_ipmi_set_timeout();
1077 } else if (ipmi_watchdog_state != WDOG_TIMEOUT_NONE) { 1016 } else if (ipmi_watchdog_state != WDOG_TIMEOUT_NONE) {
1078 /* Set a long timer to let the reboot happens, but 1017 /* Set a long timer to let the reboot happens, but
1079 reboot if it hangs, but only if the watchdog 1018 reboot if it hangs, but only if the watchdog
1080 timer was already running. */ 1019 timer was already running. */
1081 timeout = 120; 1020 timeout = 120;
1082 pretimeout = 0; 1021 pretimeout = 0;
1083 ipmi_watchdog_state = WDOG_TIMEOUT_RESET; 1022 ipmi_watchdog_state = WDOG_TIMEOUT_RESET;
1084 panic_halt_ipmi_set_timeout(); 1023 panic_halt_ipmi_set_timeout();
1085 } 1024 }
1086 } 1025 }
1087 return NOTIFY_OK; 1026 return NOTIFY_OK;
1088 } 1027 }
1089 1028
1090 static struct notifier_block wdog_reboot_notifier = { 1029 static struct notifier_block wdog_reboot_notifier = {
1091 .notifier_call = wdog_reboot_handler, 1030 .notifier_call = wdog_reboot_handler,
1092 .next = NULL, 1031 .next = NULL,
1093 .priority = 0 1032 .priority = 0
1094 }; 1033 };
1095 1034
1096 static int wdog_panic_handler(struct notifier_block *this, 1035 static int wdog_panic_handler(struct notifier_block *this,
1097 unsigned long event, 1036 unsigned long event,
1098 void *unused) 1037 void *unused)
1099 { 1038 {
1100 static int panic_event_handled = 0; 1039 static int panic_event_handled = 0;
1101 1040
1102 /* On a panic, if we have a panic timeout, make sure to extend 1041 /* On a panic, if we have a panic timeout, make sure to extend
1103 the watchdog timer to a reasonable value to complete the 1042 the watchdog timer to a reasonable value to complete the
1104 panic, if the watchdog timer is running. Plus the 1043 panic, if the watchdog timer is running. Plus the
1105 pretimeout is meaningless at panic time. */ 1044 pretimeout is meaningless at panic time. */
1106 if (watchdog_user && !panic_event_handled && 1045 if (watchdog_user && !panic_event_handled &&
1107 ipmi_watchdog_state != WDOG_TIMEOUT_NONE) { 1046 ipmi_watchdog_state != WDOG_TIMEOUT_NONE) {
1108 /* Make sure we do this only once. */ 1047 /* Make sure we do this only once. */
1109 panic_event_handled = 1; 1048 panic_event_handled = 1;
1110 1049
1111 timeout = 255; 1050 timeout = 255;
1112 pretimeout = 0; 1051 pretimeout = 0;
1113 panic_halt_ipmi_set_timeout(); 1052 panic_halt_ipmi_set_timeout();
1114 } 1053 }
1115 1054
1116 return NOTIFY_OK; 1055 return NOTIFY_OK;
1117 } 1056 }
1118 1057
1119 static struct notifier_block wdog_panic_notifier = { 1058 static struct notifier_block wdog_panic_notifier = {
1120 .notifier_call = wdog_panic_handler, 1059 .notifier_call = wdog_panic_handler,
1121 .next = NULL, 1060 .next = NULL,
1122 .priority = 150 /* priority: INT_MAX >= x >= 0 */ 1061 .priority = 150 /* priority: INT_MAX >= x >= 0 */
1123 }; 1062 };
1124 1063
1125 1064
1126 static void ipmi_new_smi(int if_num, struct device *device) 1065 static void ipmi_new_smi(int if_num, struct device *device)
1127 { 1066 {
1128 ipmi_register_watchdog(if_num); 1067 ipmi_register_watchdog(if_num);
1129 } 1068 }
1130 1069
1131 static void ipmi_smi_gone(int if_num) 1070 static void ipmi_smi_gone(int if_num)
1132 { 1071 {
1133 ipmi_unregister_watchdog(if_num); 1072 ipmi_unregister_watchdog(if_num);
1134 } 1073 }
1135 1074
1136 static struct ipmi_smi_watcher smi_watcher = 1075 static struct ipmi_smi_watcher smi_watcher =
1137 { 1076 {
1138 .owner = THIS_MODULE, 1077 .owner = THIS_MODULE,
1139 .new_smi = ipmi_new_smi, 1078 .new_smi = ipmi_new_smi,
1140 .smi_gone = ipmi_smi_gone 1079 .smi_gone = ipmi_smi_gone
1141 }; 1080 };
1142 1081
1143 static int action_op(const char *inval, char *outval) 1082 static int action_op(const char *inval, char *outval)
1144 { 1083 {
1145 if (outval) 1084 if (outval)
1146 strcpy(outval, action); 1085 strcpy(outval, action);
1147 1086
1148 if (!inval) 1087 if (!inval)
1149 return 0; 1088 return 0;
1150 1089
1151 if (strcmp(inval, "reset") == 0) 1090 if (strcmp(inval, "reset") == 0)
1152 action_val = WDOG_TIMEOUT_RESET; 1091 action_val = WDOG_TIMEOUT_RESET;
1153 else if (strcmp(inval, "none") == 0) 1092 else if (strcmp(inval, "none") == 0)
1154 action_val = WDOG_TIMEOUT_NONE; 1093 action_val = WDOG_TIMEOUT_NONE;
1155 else if (strcmp(inval, "power_cycle") == 0) 1094 else if (strcmp(inval, "power_cycle") == 0)
1156 action_val = WDOG_TIMEOUT_POWER_CYCLE; 1095 action_val = WDOG_TIMEOUT_POWER_CYCLE;
1157 else if (strcmp(inval, "power_off") == 0) 1096 else if (strcmp(inval, "power_off") == 0)
1158 action_val = WDOG_TIMEOUT_POWER_DOWN; 1097 action_val = WDOG_TIMEOUT_POWER_DOWN;
1159 else 1098 else
1160 return -EINVAL; 1099 return -EINVAL;
1161 strcpy(action, inval); 1100 strcpy(action, inval);
1162 return 0; 1101 return 0;
1163 } 1102 }
1164 1103
1165 static int preaction_op(const char *inval, char *outval) 1104 static int preaction_op(const char *inval, char *outval)
1166 { 1105 {
1167 if (outval) 1106 if (outval)
1168 strcpy(outval, preaction); 1107 strcpy(outval, preaction);
1169 1108
1170 if (!inval) 1109 if (!inval)
1171 return 0; 1110 return 0;
1172 1111
1173 if (strcmp(inval, "pre_none") == 0) 1112 if (strcmp(inval, "pre_none") == 0)
1174 preaction_val = WDOG_PRETIMEOUT_NONE; 1113 preaction_val = WDOG_PRETIMEOUT_NONE;
1175 else if (strcmp(inval, "pre_smi") == 0) 1114 else if (strcmp(inval, "pre_smi") == 0)
1176 preaction_val = WDOG_PRETIMEOUT_SMI; 1115 preaction_val = WDOG_PRETIMEOUT_SMI;
1177 #ifdef HAVE_DIE_NMI_POST 1116 #ifdef HAVE_NMI_HANDLER
1178 else if (strcmp(inval, "pre_nmi") == 0) 1117 else if (strcmp(inval, "pre_nmi") == 0)
1179 preaction_val = WDOG_PRETIMEOUT_NMI; 1118 preaction_val = WDOG_PRETIMEOUT_NMI;
1180 #endif 1119 #endif
1181 else if (strcmp(inval, "pre_int") == 0) 1120 else if (strcmp(inval, "pre_int") == 0)
1182 preaction_val = WDOG_PRETIMEOUT_MSG_INT; 1121 preaction_val = WDOG_PRETIMEOUT_MSG_INT;
1183 else 1122 else
1184 return -EINVAL; 1123 return -EINVAL;
1185 strcpy(preaction, inval); 1124 strcpy(preaction, inval);
1186 return 0; 1125 return 0;
1187 } 1126 }
1188 1127
1189 static int preop_op(const char *inval, char *outval) 1128 static int preop_op(const char *inval, char *outval)
1190 { 1129 {
1191 if (outval) 1130 if (outval)
1192 strcpy(outval, preop); 1131 strcpy(outval, preop);
1193 1132
1194 if (!inval) 1133 if (!inval)
1195 return 0; 1134 return 0;
1196 1135
1197 if (strcmp(inval, "preop_none") == 0) 1136 if (strcmp(inval, "preop_none") == 0)
1198 preop_val = WDOG_PREOP_NONE; 1137 preop_val = WDOG_PREOP_NONE;
1199 else if (strcmp(inval, "preop_panic") == 0) 1138 else if (strcmp(inval, "preop_panic") == 0)
1200 preop_val = WDOG_PREOP_PANIC; 1139 preop_val = WDOG_PREOP_PANIC;
1201 else if (strcmp(inval, "preop_give_data") == 0) 1140 else if (strcmp(inval, "preop_give_data") == 0)
1202 preop_val = WDOG_PREOP_GIVE_DATA; 1141 preop_val = WDOG_PREOP_GIVE_DATA;
1203 else 1142 else
1204 return -EINVAL; 1143 return -EINVAL;
1205 strcpy(preop, inval); 1144 strcpy(preop, inval);
1206 return 0; 1145 return 0;
1207 } 1146 }
1208 1147
1209 static void check_parms(void) 1148 static void check_parms(void)
1210 { 1149 {
1211 #ifdef HAVE_DIE_NMI_POST 1150 #ifdef HAVE_NMI_HANDLER
1212 int do_nmi = 0; 1151 int do_nmi = 0;
1213 int rv; 1152 int rv;
1214 1153
1215 if (preaction_val == WDOG_PRETIMEOUT_NMI) { 1154 if (preaction_val == WDOG_PRETIMEOUT_NMI) {
1216 do_nmi = 1; 1155 do_nmi = 1;
1217 if (preop_val == WDOG_PREOP_GIVE_DATA) { 1156 if (preop_val == WDOG_PREOP_GIVE_DATA) {
1218 printk(KERN_WARNING PFX "Pretimeout op is to give data" 1157 printk(KERN_WARNING PFX "Pretimeout op is to give data"
1219 " but NMI pretimeout is enabled, setting" 1158 " but NMI pretimeout is enabled, setting"
1220 " pretimeout op to none\n"); 1159 " pretimeout op to none\n");
1221 preop_op("preop_none", NULL); 1160 preop_op("preop_none", NULL);
1222 do_nmi = 0; 1161 do_nmi = 0;
1223 } 1162 }
1163 #ifdef CONFIG_X86_LOCAL_APIC
1164 if (nmi_watchdog == NMI_IO_APIC) {
1165 printk(KERN_WARNING PFX "nmi_watchdog is set to IO APIC"
1166 " mode (value is %d), that is incompatible"
1167 " with using NMI in the IPMI watchdog."
1168 " Disabling IPMI nmi pretimeout.\n",
1169 nmi_watchdog);
1170 preaction_val = WDOG_PRETIMEOUT_NONE;
1171 do_nmi = 0;
1172 }
1173 #endif
1224 } 1174 }
1225 if (do_nmi && !nmi_handler_registered) { 1175 if (do_nmi && !nmi_handler_registered) {
1226 rv = register_die_notifier(&ipmi_nmi_handler); 1176 rv = request_nmi(&ipmi_nmi_handler);
1227 if (rv) { 1177 if (rv) {
1228 printk(KERN_WARNING PFX 1178 printk(KERN_WARNING PFX
1229 "Can't register nmi handler\n"); 1179 "Can't register nmi handler\n");
1230 return; 1180 return;
1231 } else 1181 } else
1232 nmi_handler_registered = 1; 1182 nmi_handler_registered = 1;
1233 } else if (!do_nmi && nmi_handler_registered) { 1183 } else if (!do_nmi && nmi_handler_registered) {
1234 unregister_die_notifier(&ipmi_nmi_handler); 1184 release_nmi(&ipmi_nmi_handler);
1235 nmi_handler_registered = 0; 1185 nmi_handler_registered = 0;
1236 } 1186 }
1237 #endif 1187 #endif
1238 } 1188 }
1239 1189
1240 static int __init ipmi_wdog_init(void) 1190 static int __init ipmi_wdog_init(void)
1241 { 1191 {
1242 int rv; 1192 int rv;
1243 1193
1244 if (action_op(action, NULL)) { 1194 if (action_op(action, NULL)) {
1245 action_op("reset", NULL); 1195 action_op("reset", NULL);
1246 printk(KERN_INFO PFX "Unknown action '%s', defaulting to" 1196 printk(KERN_INFO PFX "Unknown action '%s', defaulting to"
1247 " reset\n", action); 1197 " reset\n", action);
1248 } 1198 }
1249 1199
1250 if (preaction_op(preaction, NULL)) { 1200 if (preaction_op(preaction, NULL)) {
1251 preaction_op("pre_none", NULL); 1201 preaction_op("pre_none", NULL);
1252 printk(KERN_INFO PFX "Unknown preaction '%s', defaulting to" 1202 printk(KERN_INFO PFX "Unknown preaction '%s', defaulting to"
1253 " none\n", preaction); 1203 " none\n", preaction);
1254 } 1204 }
1255 1205
1256 if (preop_op(preop, NULL)) { 1206 if (preop_op(preop, NULL)) {
1257 preop_op("preop_none", NULL); 1207 preop_op("preop_none", NULL);
1258 printk(KERN_INFO PFX "Unknown preop '%s', defaulting to" 1208 printk(KERN_INFO PFX "Unknown preop '%s', defaulting to"
1259 " none\n", preop); 1209 " none\n", preop);
1260 } 1210 }
1261 1211
1262 check_parms(); 1212 check_parms();
1263 1213
1264 register_reboot_notifier(&wdog_reboot_notifier); 1214 register_reboot_notifier(&wdog_reboot_notifier);
1265 atomic_notifier_chain_register(&panic_notifier_list, 1215 atomic_notifier_chain_register(&panic_notifier_list,
1266 &wdog_panic_notifier); 1216 &wdog_panic_notifier);
1267 1217
1268 rv = ipmi_smi_watcher_register(&smi_watcher); 1218 rv = ipmi_smi_watcher_register(&smi_watcher);
1269 if (rv) { 1219 if (rv) {
1270 #ifdef HAVE_DIE_NMI_POST 1220 #ifdef HAVE_NMI_HANDLER
1271 if (nmi_handler_registered) 1221 if (preaction_val == WDOG_PRETIMEOUT_NMI)
1272 unregister_die_notifier(&ipmi_nmi_handler); 1222 release_nmi(&ipmi_nmi_handler);
1273 #endif 1223 #endif
1274 atomic_notifier_chain_unregister(&panic_notifier_list, 1224 atomic_notifier_chain_unregister(&panic_notifier_list,
1275 &wdog_panic_notifier); 1225 &wdog_panic_notifier);
1276 unregister_reboot_notifier(&wdog_reboot_notifier); 1226 unregister_reboot_notifier(&wdog_reboot_notifier);
1277 printk(KERN_WARNING PFX "can't register smi watcher\n"); 1227 printk(KERN_WARNING PFX "can't register smi watcher\n");
1278 return rv; 1228 return rv;
1279 } 1229 }
1280 1230
1281 printk(KERN_INFO PFX "driver initialized\n"); 1231 printk(KERN_INFO PFX "driver initialized\n");
1282 1232
include/asm-i386/kdebug.h
1 #ifndef _I386_KDEBUG_H 1 #ifndef _I386_KDEBUG_H
2 #define _I386_KDEBUG_H 1 2 #define _I386_KDEBUG_H 1
3 3
4 /* 4 /*
5 * Aug-05 2004 Ported by Prasanna S Panchamukhi <prasanna@in.ibm.com> 5 * Aug-05 2004 Ported by Prasanna S Panchamukhi <prasanna@in.ibm.com>
6 * from x86_64 architecture. 6 * from x86_64 architecture.
7 */ 7 */
8 #include <linux/notifier.h> 8 #include <linux/notifier.h>
9 9
10 struct pt_regs; 10 struct pt_regs;
11 11
12 extern int register_page_fault_notifier(struct notifier_block *); 12 extern int register_page_fault_notifier(struct notifier_block *);
13 extern int unregister_page_fault_notifier(struct notifier_block *); 13 extern int unregister_page_fault_notifier(struct notifier_block *);
14 14
15 15
16 /* Grossly misnamed. */ 16 /* Grossly misnamed. */
17 enum die_val { 17 enum die_val {
18 DIE_OOPS = 1, 18 DIE_OOPS = 1,
19 DIE_INT3, 19 DIE_INT3,
20 DIE_DEBUG, 20 DIE_DEBUG,
21 DIE_PANIC, 21 DIE_PANIC,
22 DIE_NMI, 22 DIE_NMI,
23 DIE_DIE, 23 DIE_DIE,
24 DIE_NMIWATCHDOG, 24 DIE_NMIWATCHDOG,
25 DIE_KERNELDEBUG, 25 DIE_KERNELDEBUG,
26 DIE_TRAP, 26 DIE_TRAP,
27 DIE_GPF, 27 DIE_GPF,
28 DIE_CALL, 28 DIE_CALL,
29 DIE_NMI_IPI, 29 DIE_NMI_IPI,
30 DIE_NMI_POST,
31 DIE_PAGE_FAULT, 30 DIE_PAGE_FAULT,
32 }; 31 };
33 32
34 #endif 33 #endif
35 34
include/asm-x86_64/kdebug.h
1 #ifndef _X86_64_KDEBUG_H 1 #ifndef _X86_64_KDEBUG_H
2 #define _X86_64_KDEBUG_H 1 2 #define _X86_64_KDEBUG_H 1
3 3
4 #include <linux/notifier.h> 4 #include <linux/notifier.h>
5 5
6 struct pt_regs; 6 struct pt_regs;
7 7
8 extern int register_page_fault_notifier(struct notifier_block *); 8 extern int register_page_fault_notifier(struct notifier_block *);
9 extern int unregister_page_fault_notifier(struct notifier_block *); 9 extern int unregister_page_fault_notifier(struct notifier_block *);
10 10
11 /* Grossly misnamed. */ 11 /* Grossly misnamed. */
12 enum die_val { 12 enum die_val {
13 DIE_OOPS = 1, 13 DIE_OOPS = 1,
14 DIE_INT3, 14 DIE_INT3,
15 DIE_DEBUG, 15 DIE_DEBUG,
16 DIE_PANIC, 16 DIE_PANIC,
17 DIE_NMI, 17 DIE_NMI,
18 DIE_DIE, 18 DIE_DIE,
19 DIE_NMIWATCHDOG, 19 DIE_NMIWATCHDOG,
20 DIE_KERNELDEBUG, 20 DIE_KERNELDEBUG,
21 DIE_TRAP, 21 DIE_TRAP,
22 DIE_GPF, 22 DIE_GPF,
23 DIE_CALL, 23 DIE_CALL,
24 DIE_NMI_IPI, 24 DIE_NMI_IPI,
25 DIE_NMI_POST,
26 DIE_PAGE_FAULT, 25 DIE_PAGE_FAULT,
27 }; 26 };
28 27
29 extern void printk_address(unsigned long address); 28 extern void printk_address(unsigned long address);
30 extern void die(const char *,struct pt_regs *,long); 29 extern void die(const char *,struct pt_regs *,long);
31 extern void __die(const char *,struct pt_regs *,long); 30 extern void __die(const char *,struct pt_regs *,long);
32 extern void show_registers(struct pt_regs *regs); 31 extern void show_registers(struct pt_regs *regs);
33 extern void dump_pagetable(unsigned long); 32 extern void dump_pagetable(unsigned long);
34 extern unsigned long oops_begin(void); 33 extern unsigned long oops_begin(void);
35 extern void oops_end(unsigned long); 34 extern void oops_end(unsigned long);
36 35
37 #endif 36 #endif
38 37