Commit faa8b6c3c2e1454175609167a25ae525d075f045
1 parent
3ec2ab5514
Exists in
master
and in
7 other branches
Revert "ipmi: add new IPMI nmi watchdog handling"
This reverts commit f64da958dfc83335de1d2bef9d3868f30feb4e53. Andi Kleen is unhappy with the changes, and they really do not seem worth it. IPMI could use DIE_NMI_IPI instead of the new callback, even though that ends up having its own set of problems too, mainly because the IPMI code cannot really know the NMI was from IPMI or not. Manually fix up conflicts in arch/x86_64/kernel/traps.c and drivers/char/ipmi/ipmi_watchdog.c. Cc: Andi Kleen <ak@suse.de> Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> Cc: Corey Minyard <minyard@acm.org> Cc: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 5 changed files with 42 additions and 102 deletions Side-by-side Diff
arch/i386/kernel/traps.c
... | ... | @@ -733,11 +733,6 @@ |
733 | 733 | */ |
734 | 734 | if (nmi_watchdog_tick(regs, reason)) |
735 | 735 | return; |
736 | -#endif | |
737 | - if (notify_die(DIE_NMI_POST, "nmi_post", regs, reason, 2, 0) | |
738 | - == NOTIFY_STOP) | |
739 | - return; | |
740 | -#ifdef CONFIG_X86_LOCAL_APIC | |
741 | 736 | if (!do_nmi_callback(regs, smp_processor_id())) |
742 | 737 | #endif |
743 | 738 | unknown_nmi_error(reason, regs); |
arch/x86_64/kernel/traps.c
drivers/char/ipmi/ipmi_watchdog.c
... | ... | @@ -50,18 +50,10 @@ |
50 | 50 | #include <linux/poll.h> |
51 | 51 | #include <linux/string.h> |
52 | 52 | #include <linux/ctype.h> |
53 | -#include <linux/delay.h> | |
54 | 53 | #include <asm/atomic.h> |
55 | 54 | |
56 | -#ifdef CONFIG_X86 | |
57 | -/* This is ugly, but I've determined that x86 is the only architecture | |
58 | - that can reasonably support the IPMI NMI watchdog timeout at this | |
59 | - time. If another architecture adds this capability somehow, it | |
60 | - will have to be a somewhat different mechanism and I have no idea | |
61 | - how it will work. So in the unlikely event that another | |
62 | - architecture supports this, we can figure out a good generic | |
63 | - mechanism for it at that time. */ | |
64 | -#define HAVE_DIE_NMI_POST | |
55 | +#ifdef CONFIG_X86_LOCAL_APIC | |
56 | +#include <asm/apic.h> | |
65 | 57 | #endif |
66 | 58 | |
67 | 59 | #define PFX "IPMI Watchdog: " |
... | ... | @@ -327,11 +319,6 @@ |
327 | 319 | /* If a pretimeout occurs, this is used to allow only one panic to happen. */ |
328 | 320 | static atomic_t preop_panic_excl = ATOMIC_INIT(-1); |
329 | 321 | |
330 | -#ifdef HAVE_DIE_NMI_POST | |
331 | -static int testing_nmi; | |
332 | -static int nmi_handler_registered; | |
333 | -#endif | |
334 | - | |
335 | 322 | static int ipmi_heartbeat(void); |
336 | 323 | static void panic_halt_ipmi_heartbeat(void); |
337 | 324 | |
... | ... | @@ -373,10 +360,6 @@ |
373 | 360 | int hbnow = 0; |
374 | 361 | |
375 | 362 | |
376 | - /* These can be cleared as we are setting the timeout. */ | |
377 | - ipmi_start_timer_on_heartbeat = 0; | |
378 | - pretimeout_since_last_heartbeat = 0; | |
379 | - | |
380 | 363 | data[0] = 0; |
381 | 364 | WDOG_SET_TIMER_USE(data[0], WDOG_TIMER_USE_SMS_OS); |
382 | 365 | |
383 | 366 | |
384 | 367 | |
... | ... | @@ -451,12 +434,13 @@ |
451 | 434 | |
452 | 435 | wait_for_completion(&set_timeout_wait); |
453 | 436 | |
454 | - mutex_unlock(&set_timeout_lock); | |
455 | - | |
456 | 437 | if ((do_heartbeat == IPMI_SET_TIMEOUT_FORCE_HB) |
457 | 438 | || ((send_heartbeat_now) |
458 | 439 | && (do_heartbeat == IPMI_SET_TIMEOUT_HB_IF_NECESSARY))) |
440 | + { | |
459 | 441 | rv = ipmi_heartbeat(); |
442 | + } | |
443 | + mutex_unlock(&set_timeout_lock); | |
460 | 444 | |
461 | 445 | out: |
462 | 446 | return rv; |
463 | 447 | |
464 | 448 | |
... | ... | @@ -536,10 +520,12 @@ |
536 | 520 | int rv; |
537 | 521 | struct ipmi_system_interface_addr addr; |
538 | 522 | |
539 | - if (ipmi_ignore_heartbeat) | |
523 | + if (ipmi_ignore_heartbeat) { | |
540 | 524 | return 0; |
525 | + } | |
541 | 526 | |
542 | 527 | if (ipmi_start_timer_on_heartbeat) { |
528 | + ipmi_start_timer_on_heartbeat = 0; | |
543 | 529 | ipmi_watchdog_state = action_val; |
544 | 530 | return ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB); |
545 | 531 | } else if (pretimeout_since_last_heartbeat) { |
... | ... | @@ -547,6 +533,7 @@ |
547 | 533 | We don't want to set the action, though, we want to |
548 | 534 | leave that alone (thus it can't be combined with the |
549 | 535 | above operation. */ |
536 | + pretimeout_since_last_heartbeat = 0; | |
550 | 537 | return ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY); |
551 | 538 | } |
552 | 539 | |
... | ... | @@ -934,45 +921,6 @@ |
934 | 921 | printk(KERN_CRIT PFX "Unable to register misc device\n"); |
935 | 922 | } |
936 | 923 | |
937 | -#ifdef HAVE_DIE_NMI_POST | |
938 | - if (nmi_handler_registered) { | |
939 | - int old_pretimeout = pretimeout; | |
940 | - int old_timeout = timeout; | |
941 | - int old_preop_val = preop_val; | |
942 | - | |
943 | - /* Set the pretimeout to go off in a second and give | |
944 | - ourselves plenty of time to stop the timer. */ | |
945 | - ipmi_watchdog_state = WDOG_TIMEOUT_RESET; | |
946 | - preop_val = WDOG_PREOP_NONE; /* Make sure nothing happens */ | |
947 | - pretimeout = 99; | |
948 | - timeout = 100; | |
949 | - | |
950 | - testing_nmi = 1; | |
951 | - | |
952 | - rv = ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB); | |
953 | - if (rv) { | |
954 | - printk(KERN_WARNING PFX "Error starting timer to" | |
955 | - " test NMI: 0x%x. The NMI pretimeout will" | |
956 | - " likely not work\n", rv); | |
957 | - rv = 0; | |
958 | - goto out_restore; | |
959 | - } | |
960 | - | |
961 | - msleep(1500); | |
962 | - | |
963 | - if (testing_nmi != 2) { | |
964 | - printk(KERN_WARNING PFX "IPMI NMI didn't seem to" | |
965 | - " occur. The NMI pretimeout will" | |
966 | - " likely not work\n"); | |
967 | - } | |
968 | - out_restore: | |
969 | - testing_nmi = 0; | |
970 | - preop_val = old_preop_val; | |
971 | - pretimeout = old_pretimeout; | |
972 | - timeout = old_timeout; | |
973 | - } | |
974 | -#endif | |
975 | - | |
976 | 924 | out: |
977 | 925 | up_write(®ister_sem); |
978 | 926 | |
... | ... | @@ -982,10 +930,6 @@ |
982 | 930 | ipmi_watchdog_state = action_val; |
983 | 931 | ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB); |
984 | 932 | printk(KERN_INFO PFX "Starting now!\n"); |
985 | - } else { | |
986 | - /* Stop the timer now. */ | |
987 | - ipmi_watchdog_state = WDOG_TIMEOUT_NONE; | |
988 | - ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB); | |
989 | 933 | } |
990 | 934 | } |
991 | 935 | |
992 | 936 | |
993 | 937 | |
994 | 938 | |
995 | 939 | |
996 | 940 | |
... | ... | @@ -1022,28 +966,17 @@ |
1022 | 966 | up_write(®ister_sem); |
1023 | 967 | } |
1024 | 968 | |
1025 | -#ifdef HAVE_DIE_NMI_POST | |
969 | +#ifdef HAVE_NMI_HANDLER | |
1026 | 970 | static int |
1027 | -ipmi_nmi(struct notifier_block *self, unsigned long val, void *data) | |
971 | +ipmi_nmi(void *dev_id, int cpu, int handled) | |
1028 | 972 | { |
1029 | - if (val != DIE_NMI_POST) | |
1030 | - return NOTIFY_OK; | |
1031 | - | |
1032 | - if (testing_nmi) { | |
1033 | - testing_nmi = 2; | |
1034 | - return NOTIFY_STOP; | |
1035 | - } | |
1036 | - | |
1037 | 973 | /* If we are not expecting a timeout, ignore it. */ |
1038 | 974 | if (ipmi_watchdog_state == WDOG_TIMEOUT_NONE) |
1039 | - return NOTIFY_OK; | |
975 | + return NOTIFY_DONE; | |
1040 | 976 | |
1041 | - if (preaction_val != WDOG_PRETIMEOUT_NMI) | |
1042 | - return NOTIFY_OK; | |
1043 | - | |
1044 | 977 | /* If no one else handled the NMI, we assume it was the IPMI |
1045 | 978 | watchdog. */ |
1046 | - if (preop_val == WDOG_PREOP_PANIC) { | |
979 | + if ((!handled) && (preop_val == WDOG_PREOP_PANIC)) { | |
1047 | 980 | /* On some machines, the heartbeat will give |
1048 | 981 | an error and not work unless we re-enable |
1049 | 982 | the timer. So do so. */ |
1050 | 983 | |
1051 | 984 | |
... | ... | @@ -1052,12 +985,18 @@ |
1052 | 985 | panic(PFX "pre-timeout"); |
1053 | 986 | } |
1054 | 987 | |
1055 | - return NOTIFY_STOP; | |
988 | + return NOTIFY_DONE; | |
1056 | 989 | } |
1057 | 990 | |
1058 | -static struct notifier_block ipmi_nmi_handler = { | |
1059 | - .notifier_call = ipmi_nmi | |
991 | +static struct nmi_handler ipmi_nmi_handler = | |
992 | +{ | |
993 | + .link = LIST_HEAD_INIT(ipmi_nmi_handler.link), | |
994 | + .dev_name = "ipmi_watchdog", | |
995 | + .dev_id = NULL, | |
996 | + .handler = ipmi_nmi, | |
997 | + .priority = 0, /* Call us last. */ | |
1060 | 998 | }; |
999 | +int nmi_handler_registered; | |
1061 | 1000 | #endif |
1062 | 1001 | |
1063 | 1002 | static int wdog_reboot_handler(struct notifier_block *this, |
... | ... | @@ -1174,7 +1113,7 @@ |
1174 | 1113 | preaction_val = WDOG_PRETIMEOUT_NONE; |
1175 | 1114 | else if (strcmp(inval, "pre_smi") == 0) |
1176 | 1115 | preaction_val = WDOG_PRETIMEOUT_SMI; |
1177 | -#ifdef HAVE_DIE_NMI_POST | |
1116 | +#ifdef HAVE_NMI_HANDLER | |
1178 | 1117 | else if (strcmp(inval, "pre_nmi") == 0) |
1179 | 1118 | preaction_val = WDOG_PRETIMEOUT_NMI; |
1180 | 1119 | #endif |
... | ... | @@ -1208,7 +1147,7 @@ |
1208 | 1147 | |
1209 | 1148 | static void check_parms(void) |
1210 | 1149 | { |
1211 | -#ifdef HAVE_DIE_NMI_POST | |
1150 | +#ifdef HAVE_NMI_HANDLER | |
1212 | 1151 | int do_nmi = 0; |
1213 | 1152 | int rv; |
1214 | 1153 | |
1215 | 1154 | |
... | ... | @@ -1221,9 +1160,20 @@ |
1221 | 1160 | preop_op("preop_none", NULL); |
1222 | 1161 | do_nmi = 0; |
1223 | 1162 | } |
1163 | +#ifdef CONFIG_X86_LOCAL_APIC | |
1164 | + if (nmi_watchdog == NMI_IO_APIC) { | |
1165 | + printk(KERN_WARNING PFX "nmi_watchdog is set to IO APIC" | |
1166 | + " mode (value is %d), that is incompatible" | |
1167 | + " with using NMI in the IPMI watchdog." | |
1168 | + " Disabling IPMI nmi pretimeout.\n", | |
1169 | + nmi_watchdog); | |
1170 | + preaction_val = WDOG_PRETIMEOUT_NONE; | |
1171 | + do_nmi = 0; | |
1172 | + } | |
1173 | +#endif | |
1224 | 1174 | } |
1225 | 1175 | if (do_nmi && !nmi_handler_registered) { |
1226 | - rv = register_die_notifier(&ipmi_nmi_handler); | |
1176 | + rv = request_nmi(&ipmi_nmi_handler); | |
1227 | 1177 | if (rv) { |
1228 | 1178 | printk(KERN_WARNING PFX |
1229 | 1179 | "Can't register nmi handler\n"); |
... | ... | @@ -1231,7 +1181,7 @@ |
1231 | 1181 | } else |
1232 | 1182 | nmi_handler_registered = 1; |
1233 | 1183 | } else if (!do_nmi && nmi_handler_registered) { |
1234 | - unregister_die_notifier(&ipmi_nmi_handler); | |
1184 | + release_nmi(&ipmi_nmi_handler); | |
1235 | 1185 | nmi_handler_registered = 0; |
1236 | 1186 | } |
1237 | 1187 | #endif |
... | ... | @@ -1267,9 +1217,9 @@ |
1267 | 1217 | |
1268 | 1218 | rv = ipmi_smi_watcher_register(&smi_watcher); |
1269 | 1219 | if (rv) { |
1270 | -#ifdef HAVE_DIE_NMI_POST | |
1271 | - if (nmi_handler_registered) | |
1272 | - unregister_die_notifier(&ipmi_nmi_handler); | |
1220 | +#ifdef HAVE_NMI_HANDLER | |
1221 | + if (preaction_val == WDOG_PRETIMEOUT_NMI) | |
1222 | + release_nmi(&ipmi_nmi_handler); | |
1273 | 1223 | #endif |
1274 | 1224 | atomic_notifier_chain_unregister(&panic_notifier_list, |
1275 | 1225 | &wdog_panic_notifier); |
1276 | 1226 | |
... | ... | @@ -1288,9 +1238,9 @@ |
1288 | 1238 | ipmi_smi_watcher_unregister(&smi_watcher); |
1289 | 1239 | ipmi_unregister_watchdog(watchdog_ifnum); |
1290 | 1240 | |
1291 | -#ifdef HAVE_DIE_NMI_POST | |
1241 | +#ifdef HAVE_NMI_HANDLER | |
1292 | 1242 | if (nmi_handler_registered) |
1293 | - unregister_die_notifier(&ipmi_nmi_handler); | |
1243 | + release_nmi(&ipmi_nmi_handler); | |
1294 | 1244 | #endif |
1295 | 1245 | |
1296 | 1246 | atomic_notifier_chain_unregister(&panic_notifier_list, |
include/asm-i386/kdebug.h