Commit f4347553b30ec66530bfe63c84530afea3803396
1 parent
98a5ae2d99
Exists in
master
and in
7 other branches
amd64_edac: Remove polling mechanism
Switch to reusing the mcheck core's machine check polling mechanism instead of duplicating functionality by using the EDAC polling routine. Correct formatting while at it. Signed-off-by: Borislav Petkov <borislav.petkov@amd.com> Acked-by: Doug Thompson <dougthompson@xmission.com>
Showing 2 changed files with 8 additions and 126 deletions Side-by-side Diff
drivers/edac/amd64_edac.c
... | ... | @@ -1979,107 +1979,6 @@ |
1979 | 1979 | } |
1980 | 1980 | |
1981 | 1981 | /* |
1982 | - * Check for valid error in the NB Status High register. If so, proceed to read | |
1983 | - * NB Status Low, NB Address Low and NB Address High registers and store data | |
1984 | - * into error structure. | |
1985 | - * | |
1986 | - * Returns: | |
1987 | - * - 1: if hardware regs contains valid error info | |
1988 | - * - 0: if no valid error is indicated | |
1989 | - */ | |
1990 | -static int amd64_get_error_info_regs(struct mem_ctl_info *mci, | |
1991 | - struct err_regs *regs) | |
1992 | -{ | |
1993 | - struct amd64_pvt *pvt; | |
1994 | - struct pci_dev *misc_f3_ctl; | |
1995 | - | |
1996 | - pvt = mci->pvt_info; | |
1997 | - misc_f3_ctl = pvt->misc_f3_ctl; | |
1998 | - | |
1999 | - if (amd64_read_pci_cfg(misc_f3_ctl, K8_NBSH, ®s->nbsh)) | |
2000 | - return 0; | |
2001 | - | |
2002 | - if (!(regs->nbsh & K8_NBSH_VALID_BIT)) | |
2003 | - return 0; | |
2004 | - | |
2005 | - /* valid error, read remaining error information registers */ | |
2006 | - if (amd64_read_pci_cfg(misc_f3_ctl, K8_NBSL, ®s->nbsl) || | |
2007 | - amd64_read_pci_cfg(misc_f3_ctl, K8_NBEAL, ®s->nbeal) || | |
2008 | - amd64_read_pci_cfg(misc_f3_ctl, K8_NBEAH, ®s->nbeah) || | |
2009 | - amd64_read_pci_cfg(misc_f3_ctl, K8_NBCFG, ®s->nbcfg)) | |
2010 | - return 0; | |
2011 | - | |
2012 | - return 1; | |
2013 | -} | |
2014 | - | |
2015 | -/* | |
2016 | - * This function is called to retrieve the error data from hardware and store it | |
2017 | - * in the info structure. | |
2018 | - * | |
2019 | - * Returns: | |
2020 | - * - 1: if a valid error is found | |
2021 | - * - 0: if no error is found | |
2022 | - */ | |
2023 | -static int amd64_get_error_info(struct mem_ctl_info *mci, | |
2024 | - struct err_regs *info) | |
2025 | -{ | |
2026 | - struct amd64_pvt *pvt; | |
2027 | - struct err_regs regs; | |
2028 | - | |
2029 | - pvt = mci->pvt_info; | |
2030 | - | |
2031 | - if (!amd64_get_error_info_regs(mci, info)) | |
2032 | - return 0; | |
2033 | - | |
2034 | - /* | |
2035 | - * Here's the problem with the K8's EDAC reporting: There are four | |
2036 | - * registers which report pieces of error information. They are shared | |
2037 | - * between CEs and UEs. Furthermore, contrary to what is stated in the | |
2038 | - * BKDG, the overflow bit is never used! Every error always updates the | |
2039 | - * reporting registers. | |
2040 | - * | |
2041 | - * Can you see the race condition? All four error reporting registers | |
2042 | - * must be read before a new error updates them! There is no way to read | |
2043 | - * all four registers atomically. The best than can be done is to detect | |
2044 | - * that a race has occured and then report the error without any kind of | |
2045 | - * precision. | |
2046 | - * | |
2047 | - * What is still positive is that errors are still reported and thus | |
2048 | - * problems can still be detected - just not localized because the | |
2049 | - * syndrome and address are spread out across registers. | |
2050 | - * | |
2051 | - * Grrrrr!!!!! Here's hoping that AMD fixes this in some future K8 rev. | |
2052 | - * UEs and CEs should have separate register sets with proper overflow | |
2053 | - * bits that are used! At very least the problem can be fixed by | |
2054 | - * honoring the ErrValid bit in 'nbsh' and not updating registers - just | |
2055 | - * set the overflow bit - unless the current error is CE and the new | |
2056 | - * error is UE which would be the only situation for overwriting the | |
2057 | - * current values. | |
2058 | - */ | |
2059 | - | |
2060 | - regs = *info; | |
2061 | - | |
2062 | - /* Use info from the second read - most current */ | |
2063 | - if (unlikely(!amd64_get_error_info_regs(mci, info))) | |
2064 | - return 0; | |
2065 | - | |
2066 | - /* clear the error bits in hardware */ | |
2067 | - pci_write_bits32(pvt->misc_f3_ctl, K8_NBSH, 0, K8_NBSH_VALID_BIT); | |
2068 | - | |
2069 | - /* Check for the possible race condition */ | |
2070 | - if ((regs.nbsh != info->nbsh) || | |
2071 | - (regs.nbsl != info->nbsl) || | |
2072 | - (regs.nbeah != info->nbeah) || | |
2073 | - (regs.nbeal != info->nbeal)) { | |
2074 | - amd64_mc_printk(mci, KERN_WARNING, | |
2075 | - "hardware STATUS read access race condition " | |
2076 | - "detected!\n"); | |
2077 | - return 0; | |
2078 | - } | |
2079 | - return 1; | |
2080 | -} | |
2081 | - | |
2082 | -/* | |
2083 | 1982 | * Handle any Correctable Errors (CEs) that have occurred. Check for valid ERROR |
2084 | 1983 | * ADDRESS and process. |
2085 | 1984 | */ |
... | ... | @@ -2203,20 +2102,6 @@ |
2203 | 2102 | } |
2204 | 2103 | |
2205 | 2104 | /* |
2206 | - * The main polling 'check' function, called FROM the edac core to perform the | |
2207 | - * error checking and if an error is encountered, error processing. | |
2208 | - */ | |
2209 | -static void amd64_check(struct mem_ctl_info *mci) | |
2210 | -{ | |
2211 | - struct err_regs regs; | |
2212 | - | |
2213 | - if (amd64_get_error_info(mci, ®s)) { | |
2214 | - struct amd64_pvt *pvt = mci->pvt_info; | |
2215 | - amd_decode_nb_mce(pvt->mc_node_id, ®s, 1); | |
2216 | - } | |
2217 | -} | |
2218 | - | |
2219 | -/* | |
2220 | 2105 | * Input: |
2221 | 2106 | * 1) struct amd64_pvt which contains pvt->dram_f2_ctl pointer |
2222 | 2107 | * 2) AMD Family index value |
... | ... | @@ -2755,9 +2640,6 @@ |
2755 | 2640 | mci->ctl_name = get_amd_family_name(pvt->mc_type_index); |
2756 | 2641 | mci->dev_name = pci_name(pvt->dram_f2_ctl); |
2757 | 2642 | mci->ctl_page_to_phys = NULL; |
2758 | - | |
2759 | - /* IMPORTANT: Set the polling 'check' function in this module */ | |
2760 | - mci->edac_check = amd64_check; | |
2761 | 2643 | |
2762 | 2644 | /* memory scrubber interface */ |
2763 | 2645 | mci->set_sdram_scrub_rate = amd64_set_scrub_rate; |
drivers/edac/edac_mce_amd.c
... | ... | @@ -133,7 +133,7 @@ |
133 | 133 | u32 ec = mc0_status & 0xffff; |
134 | 134 | u32 xec = (mc0_status >> 16) & 0xf; |
135 | 135 | |
136 | - pr_emerg(" Data Cache Error"); | |
136 | + pr_emerg("Data Cache Error"); | |
137 | 137 | |
138 | 138 | if (xec == 1 && TLB_ERROR(ec)) |
139 | 139 | pr_cont(": %s TLB multimatch.\n", LL_MSG(ec)); |
... | ... | @@ -176,7 +176,7 @@ |
176 | 176 | u32 ec = mc1_status & 0xffff; |
177 | 177 | u32 xec = (mc1_status >> 16) & 0xf; |
178 | 178 | |
179 | - pr_emerg(" Instruction Cache Error"); | |
179 | + pr_emerg("Instruction Cache Error"); | |
180 | 180 | |
181 | 181 | if (xec == 1 && TLB_ERROR(ec)) |
182 | 182 | pr_cont(": %s TLB multimatch.\n", LL_MSG(ec)); |
... | ... | @@ -233,7 +233,7 @@ |
233 | 233 | u32 ec = mc2_status & 0xffff; |
234 | 234 | u32 xec = (mc2_status >> 16) & 0xf; |
235 | 235 | |
236 | - pr_emerg(" Bus Unit Error"); | |
236 | + pr_emerg("Bus Unit Error"); | |
237 | 237 | |
238 | 238 | if (xec == 0x1) |
239 | 239 | pr_cont(" in the write data buffers.\n"); |
... | ... | @@ -275,7 +275,7 @@ |
275 | 275 | u32 ec = mc3_status & 0xffff; |
276 | 276 | u32 xec = (mc3_status >> 16) & 0xf; |
277 | 277 | |
278 | - pr_emerg(" Load Store Error"); | |
278 | + pr_emerg("Load Store Error"); | |
279 | 279 | |
280 | 280 | if (xec == 0x0) { |
281 | 281 | u8 rrrr = (ec >> 4) & 0xf; |
... | ... | @@ -304,7 +304,7 @@ |
304 | 304 | if (TLB_ERROR(ec) && !report_gart_errors) |
305 | 305 | return; |
306 | 306 | |
307 | - pr_emerg(" Northbridge Error, node %d", node_id); | |
307 | + pr_emerg("Northbridge Error, node %d", node_id); | |
308 | 308 | |
309 | 309 | /* |
310 | 310 | * F10h, revD can disable ErrCpu[3:0] so check that first and also the |
311 | 311 | |
312 | 312 | |
... | ... | @@ -342,13 +342,13 @@ |
342 | 342 | static inline void amd_decode_err_code(unsigned int ec) |
343 | 343 | { |
344 | 344 | if (TLB_ERROR(ec)) { |
345 | - pr_emerg(" Transaction: %s, Cache Level %s\n", | |
345 | + pr_emerg("Transaction: %s, Cache Level %s\n", | |
346 | 346 | TT_MSG(ec), LL_MSG(ec)); |
347 | 347 | } else if (MEM_ERROR(ec)) { |
348 | - pr_emerg(" Transaction: %s, Type: %s, Cache Level: %s", | |
348 | + pr_emerg("Transaction: %s, Type: %s, Cache Level: %s", | |
349 | 349 | RRRR_MSG(ec), TT_MSG(ec), LL_MSG(ec)); |
350 | 350 | } else if (BUS_ERROR(ec)) { |
351 | - pr_emerg(" Transaction type: %s(%s), %s, Cache Level: %s, " | |
351 | + pr_emerg("Transaction type: %s(%s), %s, Cache Level: %s, " | |
352 | 352 | "Participating Processor: %s\n", |
353 | 353 | RRRR_MSG(ec), II_MSG(ec), TO_MSG(ec), LL_MSG(ec), |
354 | 354 | PP_MSG(ec)); |