Commit faa8b6c3c2e1454175609167a25ae525d075f045

Authored by Linus Torvalds
1 parent 3ec2ab5514

Revert "ipmi: add new IPMI nmi watchdog handling"

This reverts commit f64da958dfc83335de1d2bef9d3868f30feb4e53.

Andi Kleen is unhappy with the changes, and they really do not seem
worth it.  IPMI could use DIE_NMI_IPI instead of the new callback, even
though that ends up having its own set of problems too, mainly because
the IPMI code cannot really know the NMI was from IPMI or not.

Manually fix up conflicts in arch/x86_64/kernel/traps.c and
drivers/char/ipmi/ipmi_watchdog.c.

Cc: Andi Kleen <ak@suse.de>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Corey Minyard <minyard@acm.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 5 changed files with 42 additions and 102 deletions Side-by-side Diff

arch/i386/kernel/traps.c
... ... @@ -733,11 +733,6 @@
733 733 */
734 734 if (nmi_watchdog_tick(regs, reason))
735 735 return;
736   -#endif
737   - if (notify_die(DIE_NMI_POST, "nmi_post", regs, reason, 2, 0)
738   - == NOTIFY_STOP)
739   - return;
740   -#ifdef CONFIG_X86_LOCAL_APIC
741 736 if (!do_nmi_callback(regs, smp_processor_id()))
742 737 #endif
743 738 unknown_nmi_error(reason, regs);
arch/x86_64/kernel/traps.c
... ... @@ -776,9 +776,6 @@
776 776 */
777 777 if (nmi_watchdog_tick(regs,reason))
778 778 return;
779   - if (notify_die(DIE_NMI_POST, "nmi_post", regs, reason, 2, 0)
780   - == NOTIFY_STOP)
781   - return;
782 779 if (!do_nmi_callback(regs,cpu))
783 780 unknown_nmi_error(reason, regs);
784 781  
drivers/char/ipmi/ipmi_watchdog.c
... ... @@ -50,18 +50,10 @@
50 50 #include <linux/poll.h>
51 51 #include <linux/string.h>
52 52 #include <linux/ctype.h>
53   -#include <linux/delay.h>
54 53 #include <asm/atomic.h>
55 54  
56   -#ifdef CONFIG_X86
57   -/* This is ugly, but I've determined that x86 is the only architecture
58   - that can reasonably support the IPMI NMI watchdog timeout at this
59   - time. If another architecture adds this capability somehow, it
60   - will have to be a somewhat different mechanism and I have no idea
61   - how it will work. So in the unlikely event that another
62   - architecture supports this, we can figure out a good generic
63   - mechanism for it at that time. */
64   -#define HAVE_DIE_NMI_POST
  55 +#ifdef CONFIG_X86_LOCAL_APIC
  56 +#include <asm/apic.h>
65 57 #endif
66 58  
67 59 #define PFX "IPMI Watchdog: "
... ... @@ -327,11 +319,6 @@
327 319 /* If a pretimeout occurs, this is used to allow only one panic to happen. */
328 320 static atomic_t preop_panic_excl = ATOMIC_INIT(-1);
329 321  
330   -#ifdef HAVE_DIE_NMI_POST
331   -static int testing_nmi;
332   -static int nmi_handler_registered;
333   -#endif
334   -
335 322 static int ipmi_heartbeat(void);
336 323 static void panic_halt_ipmi_heartbeat(void);
337 324  
... ... @@ -373,10 +360,6 @@
373 360 int hbnow = 0;
374 361  
375 362  
376   - /* These can be cleared as we are setting the timeout. */
377   - ipmi_start_timer_on_heartbeat = 0;
378   - pretimeout_since_last_heartbeat = 0;
379   -
380 363 data[0] = 0;
381 364 WDOG_SET_TIMER_USE(data[0], WDOG_TIMER_USE_SMS_OS);
382 365  
383 366  
384 367  
... ... @@ -451,12 +434,13 @@
451 434  
452 435 wait_for_completion(&set_timeout_wait);
453 436  
454   - mutex_unlock(&set_timeout_lock);
455   -
456 437 if ((do_heartbeat == IPMI_SET_TIMEOUT_FORCE_HB)
457 438 || ((send_heartbeat_now)
458 439 && (do_heartbeat == IPMI_SET_TIMEOUT_HB_IF_NECESSARY)))
  440 + {
459 441 rv = ipmi_heartbeat();
  442 + }
  443 + mutex_unlock(&set_timeout_lock);
460 444  
461 445 out:
462 446 return rv;
463 447  
464 448  
... ... @@ -536,10 +520,12 @@
536 520 int rv;
537 521 struct ipmi_system_interface_addr addr;
538 522  
539   - if (ipmi_ignore_heartbeat)
  523 + if (ipmi_ignore_heartbeat) {
540 524 return 0;
  525 + }
541 526  
542 527 if (ipmi_start_timer_on_heartbeat) {
  528 + ipmi_start_timer_on_heartbeat = 0;
543 529 ipmi_watchdog_state = action_val;
544 530 return ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB);
545 531 } else if (pretimeout_since_last_heartbeat) {
... ... @@ -547,6 +533,7 @@
547 533 We don't want to set the action, though, we want to
548 534 leave that alone (thus it can't be combined with the
549 535 above operation. */
  536 + pretimeout_since_last_heartbeat = 0;
550 537 return ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY);
551 538 }
552 539  
... ... @@ -934,45 +921,6 @@
934 921 printk(KERN_CRIT PFX "Unable to register misc device\n");
935 922 }
936 923  
937   -#ifdef HAVE_DIE_NMI_POST
938   - if (nmi_handler_registered) {
939   - int old_pretimeout = pretimeout;
940   - int old_timeout = timeout;
941   - int old_preop_val = preop_val;
942   -
943   - /* Set the pretimeout to go off in a second and give
944   - ourselves plenty of time to stop the timer. */
945   - ipmi_watchdog_state = WDOG_TIMEOUT_RESET;
946   - preop_val = WDOG_PREOP_NONE; /* Make sure nothing happens */
947   - pretimeout = 99;
948   - timeout = 100;
949   -
950   - testing_nmi = 1;
951   -
952   - rv = ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB);
953   - if (rv) {
954   - printk(KERN_WARNING PFX "Error starting timer to"
955   - " test NMI: 0x%x. The NMI pretimeout will"
956   - " likely not work\n", rv);
957   - rv = 0;
958   - goto out_restore;
959   - }
960   -
961   - msleep(1500);
962   -
963   - if (testing_nmi != 2) {
964   - printk(KERN_WARNING PFX "IPMI NMI didn't seem to"
965   - " occur. The NMI pretimeout will"
966   - " likely not work\n");
967   - }
968   - out_restore:
969   - testing_nmi = 0;
970   - preop_val = old_preop_val;
971   - pretimeout = old_pretimeout;
972   - timeout = old_timeout;
973   - }
974   -#endif
975   -
976 924 out:
977 925 up_write(&register_sem);
978 926  
... ... @@ -982,10 +930,6 @@
982 930 ipmi_watchdog_state = action_val;
983 931 ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB);
984 932 printk(KERN_INFO PFX "Starting now!\n");
985   - } else {
986   - /* Stop the timer now. */
987   - ipmi_watchdog_state = WDOG_TIMEOUT_NONE;
988   - ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB);
989 933 }
990 934 }
991 935  
992 936  
993 937  
994 938  
995 939  
996 940  
... ... @@ -1022,28 +966,17 @@
1022 966 up_write(&register_sem);
1023 967 }
1024 968  
1025   -#ifdef HAVE_DIE_NMI_POST
  969 +#ifdef HAVE_NMI_HANDLER
1026 970 static int
1027   -ipmi_nmi(struct notifier_block *self, unsigned long val, void *data)
  971 +ipmi_nmi(void *dev_id, int cpu, int handled)
1028 972 {
1029   - if (val != DIE_NMI_POST)
1030   - return NOTIFY_OK;
1031   -
1032   - if (testing_nmi) {
1033   - testing_nmi = 2;
1034   - return NOTIFY_STOP;
1035   - }
1036   -
1037 973 /* If we are not expecting a timeout, ignore it. */
1038 974 if (ipmi_watchdog_state == WDOG_TIMEOUT_NONE)
1039   - return NOTIFY_OK;
  975 + return NOTIFY_DONE;
1040 976  
1041   - if (preaction_val != WDOG_PRETIMEOUT_NMI)
1042   - return NOTIFY_OK;
1043   -
1044 977 /* If no one else handled the NMI, we assume it was the IPMI
1045 978 watchdog. */
1046   - if (preop_val == WDOG_PREOP_PANIC) {
  979 + if ((!handled) && (preop_val == WDOG_PREOP_PANIC)) {
1047 980 /* On some machines, the heartbeat will give
1048 981 an error and not work unless we re-enable
1049 982 the timer. So do so. */
1050 983  
1051 984  
... ... @@ -1052,12 +985,18 @@
1052 985 panic(PFX "pre-timeout");
1053 986 }
1054 987  
1055   - return NOTIFY_STOP;
  988 + return NOTIFY_DONE;
1056 989 }
1057 990  
1058   -static struct notifier_block ipmi_nmi_handler = {
1059   - .notifier_call = ipmi_nmi
  991 +static struct nmi_handler ipmi_nmi_handler =
  992 +{
  993 + .link = LIST_HEAD_INIT(ipmi_nmi_handler.link),
  994 + .dev_name = "ipmi_watchdog",
  995 + .dev_id = NULL,
  996 + .handler = ipmi_nmi,
  997 + .priority = 0, /* Call us last. */
1060 998 };
  999 +int nmi_handler_registered;
1061 1000 #endif
1062 1001  
1063 1002 static int wdog_reboot_handler(struct notifier_block *this,
... ... @@ -1174,7 +1113,7 @@
1174 1113 preaction_val = WDOG_PRETIMEOUT_NONE;
1175 1114 else if (strcmp(inval, "pre_smi") == 0)
1176 1115 preaction_val = WDOG_PRETIMEOUT_SMI;
1177   -#ifdef HAVE_DIE_NMI_POST
  1116 +#ifdef HAVE_NMI_HANDLER
1178 1117 else if (strcmp(inval, "pre_nmi") == 0)
1179 1118 preaction_val = WDOG_PRETIMEOUT_NMI;
1180 1119 #endif
... ... @@ -1208,7 +1147,7 @@
1208 1147  
1209 1148 static void check_parms(void)
1210 1149 {
1211   -#ifdef HAVE_DIE_NMI_POST
  1150 +#ifdef HAVE_NMI_HANDLER
1212 1151 int do_nmi = 0;
1213 1152 int rv;
1214 1153  
1215 1154  
... ... @@ -1221,9 +1160,20 @@
1221 1160 preop_op("preop_none", NULL);
1222 1161 do_nmi = 0;
1223 1162 }
  1163 +#ifdef CONFIG_X86_LOCAL_APIC
  1164 + if (nmi_watchdog == NMI_IO_APIC) {
  1165 + printk(KERN_WARNING PFX "nmi_watchdog is set to IO APIC"
  1166 + " mode (value is %d), that is incompatible"
  1167 + " with using NMI in the IPMI watchdog."
  1168 + " Disabling IPMI nmi pretimeout.\n",
  1169 + nmi_watchdog);
  1170 + preaction_val = WDOG_PRETIMEOUT_NONE;
  1171 + do_nmi = 0;
  1172 + }
  1173 +#endif
1224 1174 }
1225 1175 if (do_nmi && !nmi_handler_registered) {
1226   - rv = register_die_notifier(&ipmi_nmi_handler);
  1176 + rv = request_nmi(&ipmi_nmi_handler);
1227 1177 if (rv) {
1228 1178 printk(KERN_WARNING PFX
1229 1179 "Can't register nmi handler\n");
... ... @@ -1231,7 +1181,7 @@
1231 1181 } else
1232 1182 nmi_handler_registered = 1;
1233 1183 } else if (!do_nmi && nmi_handler_registered) {
1234   - unregister_die_notifier(&ipmi_nmi_handler);
  1184 + release_nmi(&ipmi_nmi_handler);
1235 1185 nmi_handler_registered = 0;
1236 1186 }
1237 1187 #endif
... ... @@ -1267,9 +1217,9 @@
1267 1217  
1268 1218 rv = ipmi_smi_watcher_register(&smi_watcher);
1269 1219 if (rv) {
1270   -#ifdef HAVE_DIE_NMI_POST
1271   - if (nmi_handler_registered)
1272   - unregister_die_notifier(&ipmi_nmi_handler);
  1220 +#ifdef HAVE_NMI_HANDLER
  1221 + if (preaction_val == WDOG_PRETIMEOUT_NMI)
  1222 + release_nmi(&ipmi_nmi_handler);
1273 1223 #endif
1274 1224 atomic_notifier_chain_unregister(&panic_notifier_list,
1275 1225 &wdog_panic_notifier);
1276 1226  
... ... @@ -1288,9 +1238,9 @@
1288 1238 ipmi_smi_watcher_unregister(&smi_watcher);
1289 1239 ipmi_unregister_watchdog(watchdog_ifnum);
1290 1240  
1291   -#ifdef HAVE_DIE_NMI_POST
  1241 +#ifdef HAVE_NMI_HANDLER
1292 1242 if (nmi_handler_registered)
1293   - unregister_die_notifier(&ipmi_nmi_handler);
  1243 + release_nmi(&ipmi_nmi_handler);
1294 1244 #endif
1295 1245  
1296 1246 atomic_notifier_chain_unregister(&panic_notifier_list,
include/asm-i386/kdebug.h
... ... @@ -27,7 +27,6 @@
27 27 DIE_GPF,
28 28 DIE_CALL,
29 29 DIE_NMI_IPI,
30   - DIE_NMI_POST,
31 30 DIE_PAGE_FAULT,
32 31 };
33 32  
include/asm-x86_64/kdebug.h
... ... @@ -22,7 +22,6 @@
22 22 DIE_GPF,
23 23 DIE_CALL,
24 24 DIE_NMI_IPI,
25   - DIE_NMI_POST,
26 25 DIE_PAGE_FAULT,
27 26 };
28 27