Commit 44a69f6195628f6f940566d133a72987559e102d

Authored by Tomasz Nowicki
Committed by Tony Luck
1 parent 9dae3d0d9e

acpi, apei, ghes: Make NMI error notification to be GHES architecture extension.

Currently APEI depends on x86 architecture. It is because of NMI hardware
error notification of GHES which is currently supported by x86 only.
However, many other APEI features can be still used perfectly by other
architectures.

This commit adds two symbols:
1. HAVE_ACPI_APEI for those archs which support APEI.
2. HAVE_ACPI_APEI_NMI which is used for NMI code isolation in ghes.c
   file. NMI related data and functions are grouped so they can be wrapped
   inside one #ifdef section. Appropriate function stubs are provided for
   !NMI case.

Note there is no functional changes for x86 due to hard selected
HAVE_ACPI_APEI and HAVE_ACPI_APEI_NMI symbols.

Signed-off-by: Tomasz Nowicki <tomasz.nowicki@linaro.org>
Acked-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Tony Luck <tony.luck@intel.com>

Showing 4 changed files with 110 additions and 53 deletions Side-by-side Diff

... ... @@ -132,6 +132,8 @@
132 132 select GENERIC_CPU_AUTOPROBE
133 133 select HAVE_ARCH_AUDITSYSCALL
134 134 select ARCH_SUPPORTS_ATOMIC_RMW
  135 + select HAVE_ACPI_APEI if ACPI
  136 + select HAVE_ACPI_APEI_NMI if ACPI
135 137  
136 138 config INSTRUCTION_DECODER
137 139 def_bool y
drivers/acpi/apei/Kconfig
  1 +config HAVE_ACPI_APEI
  2 + bool
  3 +
  4 +config HAVE_ACPI_APEI_NMI
  5 + bool
  6 +
1 7 config ACPI_APEI
2 8 bool "ACPI Platform Error Interface (APEI)"
3 9 select MISC_FILESYSTEMS
4 10 select PSTORE
5 11 select UEFI_CPER
6   - depends on X86
  12 + depends on HAVE_ACPI_APEI
7 13 help
8 14 APEI allows to report errors (for example from the chipset)
9 15 to the operating system. This improves NMI handling
drivers/acpi/apei/ghes.c
... ... @@ -47,11 +47,11 @@
47 47 #include <linux/genalloc.h>
48 48 #include <linux/pci.h>
49 49 #include <linux/aer.h>
  50 +#include <linux/nmi.h>
50 51  
51 52 #include <acpi/ghes.h>
52 53 #include <acpi/apei.h>
53 54 #include <asm/tlbflush.h>
54   -#include <asm/nmi.h>
55 55  
56 56 #include "apei-internal.h"
57 57  
... ... @@ -86,8 +86,6 @@
86 86 bool ghes_disable;
87 87 module_param_named(disable, ghes_disable, bool, 0);
88 88  
89   -static int ghes_panic_timeout __read_mostly = 30;
90   -
91 89 /*
92 90 * All error sources notified with SCI shares one notifier function,
93 91 * so they need to be linked and checked one by one. This is applied
94 92  
... ... @@ -97,16 +95,9 @@
97 95 * list changing, not for traversing.
98 96 */
99 97 static LIST_HEAD(ghes_sci);
100   -static LIST_HEAD(ghes_nmi);
101 98 static DEFINE_MUTEX(ghes_list_mutex);
102 99  
103 100 /*
104   - * NMI may be triggered on any CPU, so ghes_nmi_lock is used for
105   - * mutual exclusion.
106   - */
107   -static DEFINE_RAW_SPINLOCK(ghes_nmi_lock);
108   -
109   -/*
110 101 * Because the memory area used to transfer hardware error information
111 102 * from BIOS to Linux can be determined only in NMI, IRQ or timer
112 103 * handler, but general ioremap can not be used in atomic context, so
113 104  
... ... @@ -130,18 +121,8 @@
130 121 static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi);
131 122 static DEFINE_SPINLOCK(ghes_ioremap_lock_irq);
132 123  
133   -/*
134   - * printk is not safe in NMI context. So in NMI handler, we allocate
135   - * required memory from lock-less memory allocator
136   - * (ghes_estatus_pool), save estatus into it, put them into lock-less
137   - * list (ghes_estatus_llist), then delay printk into IRQ context via
138   - * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record
139   - * required pool size by all NMI error source.
140   - */
141 124 static struct gen_pool *ghes_estatus_pool;
142 125 static unsigned long ghes_estatus_pool_size_request;
143   -static struct llist_head ghes_estatus_llist;
144   -static struct irq_work ghes_proc_irq_work;
145 126  
146 127 struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
147 128 static atomic_t ghes_estatus_cache_alloced;
... ... @@ -249,11 +230,6 @@
249 230 return 0;
250 231 }
251 232  
252   -static void ghes_estatus_pool_shrink(unsigned long len)
253   -{
254   - ghes_estatus_pool_size_request -= PAGE_ALIGN(len);
255   -}
256   -
257 233 static struct ghes *ghes_new(struct acpi_hest_generic *generic)
258 234 {
259 235 struct ghes *ghes;
... ... @@ -732,6 +708,32 @@
732 708 return ret;
733 709 }
734 710  
  711 +static struct notifier_block ghes_notifier_sci = {
  712 + .notifier_call = ghes_notify_sci,
  713 +};
  714 +
  715 +#ifdef CONFIG_HAVE_ACPI_APEI_NMI
  716 +/*
  717 + * printk is not safe in NMI context. So in NMI handler, we allocate
  718 + * required memory from lock-less memory allocator
  719 + * (ghes_estatus_pool), save estatus into it, put them into lock-less
  720 + * list (ghes_estatus_llist), then delay printk into IRQ context via
  721 + * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record
  722 + * required pool size by all NMI error source.
  723 + */
  724 +static struct llist_head ghes_estatus_llist;
  725 +static struct irq_work ghes_proc_irq_work;
  726 +
  727 +/*
  728 + * NMI may be triggered on any CPU, so ghes_nmi_lock is used for
  729 + * mutual exclusion.
  730 + */
  731 +static DEFINE_RAW_SPINLOCK(ghes_nmi_lock);
  732 +
  733 +static LIST_HEAD(ghes_nmi);
  734 +
  735 +static int ghes_panic_timeout __read_mostly = 30;
  736 +
735 737 static struct llist_node *llist_nodes_reverse(struct llist_node *llnode)
736 738 {
737 739 struct llist_node *next, *tail = NULL;
... ... @@ -875,10 +877,6 @@
875 877 return ret;
876 878 }
877 879  
878   -static struct notifier_block ghes_notifier_sci = {
879   - .notifier_call = ghes_notify_sci,
880   -};
881   -
882 880 static unsigned long ghes_esource_prealloc_size(
883 881 const struct acpi_hest_generic *generic)
884 882 {
885 883  
... ... @@ -894,11 +892,71 @@
894 892 return prealloc_size;
895 893 }
896 894  
  895 +static void ghes_estatus_pool_shrink(unsigned long len)
  896 +{
  897 + ghes_estatus_pool_size_request -= PAGE_ALIGN(len);
  898 +}
  899 +
  900 +static void ghes_nmi_add(struct ghes *ghes)
  901 +{
  902 + unsigned long len;
  903 +
  904 + len = ghes_esource_prealloc_size(ghes->generic);
  905 + ghes_estatus_pool_expand(len);
  906 + mutex_lock(&ghes_list_mutex);
  907 + if (list_empty(&ghes_nmi))
  908 + register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes");
  909 + list_add_rcu(&ghes->list, &ghes_nmi);
  910 + mutex_unlock(&ghes_list_mutex);
  911 +}
  912 +
  913 +static void ghes_nmi_remove(struct ghes *ghes)
  914 +{
  915 + unsigned long len;
  916 +
  917 + mutex_lock(&ghes_list_mutex);
  918 + list_del_rcu(&ghes->list);
  919 + if (list_empty(&ghes_nmi))
  920 + unregister_nmi_handler(NMI_LOCAL, "ghes");
  921 + mutex_unlock(&ghes_list_mutex);
  922 + /*
  923 + * To synchronize with NMI handler, ghes can only be
  924 + * freed after NMI handler finishes.
  925 + */
  926 + synchronize_rcu();
  927 + len = ghes_esource_prealloc_size(ghes->generic);
  928 + ghes_estatus_pool_shrink(len);
  929 +}
  930 +
  931 +static void ghes_nmi_init_cxt(void)
  932 +{
  933 + init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
  934 +}
  935 +#else /* CONFIG_HAVE_ACPI_APEI_NMI */
  936 +static inline void ghes_nmi_add(struct ghes *ghes)
  937 +{
  938 + pr_err(GHES_PFX "ID: %d, trying to add NMI notification which is not supported!\n",
  939 + ghes->generic->header.source_id);
  940 + BUG();
  941 +}
  942 +
  943 +static inline void ghes_nmi_remove(struct ghes *ghes)
  944 +{
  945 + pr_err(GHES_PFX "ID: %d, trying to remove NMI notification which is not supported!\n",
  946 + ghes->generic->header.source_id);
  947 + BUG();
  948 +}
  949 +
  950 +static inline void ghes_nmi_init_cxt(void)
  951 +{
  952 +}
  953 +#endif /* CONFIG_HAVE_ACPI_APEI_NMI */
  954 +
897 955 static int ghes_probe(struct platform_device *ghes_dev)
898 956 {
899 957 struct acpi_hest_generic *generic;
900 958 struct ghes *ghes = NULL;
901   - unsigned long len;
  959 +
902 960 int rc = -EINVAL;
903 961  
904 962 generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data;
905 963  
... ... @@ -909,7 +967,13 @@
909 967 case ACPI_HEST_NOTIFY_POLLED:
910 968 case ACPI_HEST_NOTIFY_EXTERNAL:
911 969 case ACPI_HEST_NOTIFY_SCI:
  970 + break;
912 971 case ACPI_HEST_NOTIFY_NMI:
  972 + if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) {
  973 + pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n",
  974 + generic->header.source_id);
  975 + goto err;
  976 + }
913 977 break;
914 978 case ACPI_HEST_NOTIFY_LOCAL:
915 979 pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n",
... ... @@ -970,14 +1034,7 @@
970 1034 mutex_unlock(&ghes_list_mutex);
971 1035 break;
972 1036 case ACPI_HEST_NOTIFY_NMI:
973   - len = ghes_esource_prealloc_size(generic);
974   - ghes_estatus_pool_expand(len);
975   - mutex_lock(&ghes_list_mutex);
976   - if (list_empty(&ghes_nmi))
977   - register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0,
978   - "ghes");
979   - list_add_rcu(&ghes->list, &ghes_nmi);
980   - mutex_unlock(&ghes_list_mutex);
  1037 + ghes_nmi_add(ghes);
981 1038 break;
982 1039 default:
983 1040 BUG();
... ... @@ -999,7 +1056,6 @@
999 1056 {
1000 1057 struct ghes *ghes;
1001 1058 struct acpi_hest_generic *generic;
1002   - unsigned long len;
1003 1059  
1004 1060 ghes = platform_get_drvdata(ghes_dev);
1005 1061 generic = ghes->generic;
... ... @@ -1020,18 +1076,7 @@
1020 1076 mutex_unlock(&ghes_list_mutex);
1021 1077 break;
1022 1078 case ACPI_HEST_NOTIFY_NMI:
1023   - mutex_lock(&ghes_list_mutex);
1024   - list_del_rcu(&ghes->list);
1025   - if (list_empty(&ghes_nmi))
1026   - unregister_nmi_handler(NMI_LOCAL, "ghes");
1027   - mutex_unlock(&ghes_list_mutex);
1028   - /*
1029   - * To synchronize with NMI handler, ghes can only be
1030   - * freed after NMI handler finishes.
1031   - */
1032   - synchronize_rcu();
1033   - len = ghes_esource_prealloc_size(generic);
1034   - ghes_estatus_pool_shrink(len);
  1079 + ghes_nmi_remove(ghes);
1035 1080 break;
1036 1081 default:
1037 1082 BUG();
... ... @@ -1075,7 +1120,7 @@
1075 1120 return -EINVAL;
1076 1121 }
1077 1122  
1078   - init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
  1123 + ghes_nmi_init_cxt();
1079 1124  
1080 1125 rc = ghes_ioremap_init();
1081 1126 if (rc)
... ... @@ -63,5 +63,9 @@
63 63 void __user *, size_t *, loff_t *);
64 64 #endif
65 65  
  66 +#ifdef CONFIG_HAVE_ACPI_APEI_NMI
  67 +#include <asm/nmi.h>
  68 +#endif
  69 +
66 70 #endif