Commit f6edea77c8c83760d74356ce6bd45d530d32b27f

Authored by Chen, Gong
Committed by Tony Luck
1 parent fbeef85fd2

ACPI, APEI, CPER: Cleanup CPER memory error output format

Memory error reporting is much too verbose.  Most users do not care about
the DIMM internal bank/row/column information. Downgrade the fine details
to "pr_debug" status so that those few who do care can get them if they
really want to.  The detail information will be later be provided by
perf/trace interface.
Since things are still a bit scary, and users are sometimes overly
nervous, provide a reassuring message that corrected errors do not
generally require any further action.

Suggested-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
Reviewed-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>

Showing 1 changed file with 31 additions and 36 deletions Side-by-side Diff

drivers/acpi/apei/cper.c
... ... @@ -33,6 +33,7 @@
33 33 #include <linux/pci.h>
34 34 #include <linux/aer.h>
35 35  
  36 +#define INDENT_SP " "
36 37 /*
37 38 * CPER record ID need to be unique even after reboot, because record
38 39 * ID is used as index for ERST storage, while CPER records from
39 40  
40 41  
41 42  
42 43  
43 44  
44 45  
45 46  
46 47  
47 48  
48 49  
49 50  
... ... @@ -206,29 +207,29 @@
206 207 printk("%s""physical_address_mask: 0x%016llx\n",
207 208 pfx, mem->physical_addr_mask);
208 209 if (mem->validation_bits & CPER_MEM_VALID_NODE)
209   - printk("%s""node: %d\n", pfx, mem->node);
  210 + pr_debug("node: %d\n", mem->node);
210 211 if (mem->validation_bits & CPER_MEM_VALID_CARD)
211   - printk("%s""card: %d\n", pfx, mem->card);
  212 + pr_debug("card: %d\n", mem->card);
212 213 if (mem->validation_bits & CPER_MEM_VALID_MODULE)
213   - printk("%s""module: %d\n", pfx, mem->module);
  214 + pr_debug("module: %d\n", mem->module);
214 215 if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
215   - printk("%s""rank: %d\n", pfx, mem->rank);
  216 + pr_debug("rank: %d\n", mem->rank);
216 217 if (mem->validation_bits & CPER_MEM_VALID_BANK)
217   - printk("%s""bank: %d\n", pfx, mem->bank);
  218 + pr_debug("bank: %d\n", mem->bank);
218 219 if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
219   - printk("%s""device: %d\n", pfx, mem->device);
  220 + pr_debug("device: %d\n", mem->device);
220 221 if (mem->validation_bits & CPER_MEM_VALID_ROW)
221   - printk("%s""row: %d\n", pfx, mem->row);
  222 + pr_debug("row: %d\n", mem->row);
222 223 if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
223   - printk("%s""column: %d\n", pfx, mem->column);
  224 + pr_debug("column: %d\n", mem->column);
224 225 if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
225   - printk("%s""bit_position: %d\n", pfx, mem->bit_pos);
  226 + pr_debug("bit_position: %d\n", mem->bit_pos);
226 227 if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
227   - printk("%s""requestor_id: 0x%016llx\n", pfx, mem->requestor_id);
  228 + pr_debug("requestor_id: 0x%016llx\n", mem->requestor_id);
228 229 if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
229   - printk("%s""responder_id: 0x%016llx\n", pfx, mem->responder_id);
  230 + pr_debug("responder_id: 0x%016llx\n", mem->responder_id);
230 231 if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
231   - printk("%s""target_id: 0x%016llx\n", pfx, mem->target_id);
  232 + pr_debug("target_id: 0x%016llx\n", mem->target_id);
232 233 if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
233 234 u8 etype = mem->error_type;
234 235 printk("%s""error_type: %d, %s\n", pfx, etype,
235 236  
236 237  
237 238  
238 239  
239 240  
240 241  
241 242  
242 243  
243 244  
244 245  
245 246  
... ... @@ -296,55 +297,45 @@
296 297 pfx, pcie->bridge.secondary_status, pcie->bridge.control);
297 298 }
298 299  
299   -static const char * const cper_estatus_section_flag_strs[] = {
300   - "primary",
301   - "containment warning",
302   - "reset",
303   - "error threshold exceeded",
304   - "resource not accessible",
305   - "latent error",
306   -};
307   -
308 300 static void cper_estatus_print_section(
309 301 const char *pfx, const struct acpi_generic_data *gdata, int sec_no)
310 302 {
311 303 uuid_le *sec_type = (uuid_le *)gdata->section_type;
312 304 __u16 severity;
  305 + char newpfx[64];
313 306  
314 307 severity = gdata->error_severity;
315   - printk("%s""section: %d, severity: %d, %s\n", pfx, sec_no, severity,
  308 + printk("%s""Error %d, type: %s\n", pfx, sec_no,
316 309 cper_severity_str(severity));
317   - printk("%s""flags: 0x%02x\n", pfx, gdata->flags);
318   - cper_print_bits(pfx, gdata->flags, cper_estatus_section_flag_strs,
319   - ARRAY_SIZE(cper_estatus_section_flag_strs));
320 310 if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
321 311 printk("%s""fru_id: %pUl\n", pfx, (uuid_le *)gdata->fru_id);
322 312 if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
323 313 printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
324 314  
  315 + snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
325 316 if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) {
326 317 struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1);
327   - printk("%s""section_type: general processor error\n", pfx);
  318 + printk("%s""section_type: general processor error\n", newpfx);
328 319 if (gdata->error_data_length >= sizeof(*proc_err))
329   - cper_print_proc_generic(pfx, proc_err);
  320 + cper_print_proc_generic(newpfx, proc_err);
330 321 else
331 322 goto err_section_too_small;
332 323 } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
333 324 struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
334   - printk("%s""section_type: memory error\n", pfx);
  325 + printk("%s""section_type: memory error\n", newpfx);
335 326 if (gdata->error_data_length >= sizeof(*mem_err))
336   - cper_print_mem(pfx, mem_err);
  327 + cper_print_mem(newpfx, mem_err);
337 328 else
338 329 goto err_section_too_small;
339 330 } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) {
340 331 struct cper_sec_pcie *pcie = (void *)(gdata + 1);
341   - printk("%s""section_type: PCIe error\n", pfx);
  332 + printk("%s""section_type: PCIe error\n", newpfx);
342 333 if (gdata->error_data_length >= sizeof(*pcie))
343   - cper_print_pcie(pfx, pcie, gdata);
  334 + cper_print_pcie(newpfx, pcie, gdata);
344 335 else
345 336 goto err_section_too_small;
346 337 } else
347   - printk("%s""section type: unknown, %pUl\n", pfx, sec_type);
  338 + printk("%s""section type: unknown, %pUl\n", newpfx, sec_type);
348 339  
349 340 return;
350 341  
351 342  
352 343  
353 344  
354 345  
... ... @@ -358,17 +349,21 @@
358 349 struct acpi_generic_data *gdata;
359 350 unsigned int data_len, gedata_len;
360 351 int sec_no = 0;
  352 + char newpfx[64];
361 353 __u16 severity;
362 354  
363   - printk("%s""Generic Hardware Error Status\n", pfx);
364 355 severity = estatus->error_severity;
365   - printk("%s""severity: %d, %s\n", pfx, severity,
366   - cper_severity_str(severity));
  356 + if (severity == CPER_SEV_CORRECTED)
  357 + printk("%s%s\n", pfx,
  358 + "It has been corrected by h/w "
  359 + "and requires no further action");
  360 + printk("%s""event severity: %s\n", pfx, cper_severity_str(severity));
367 361 data_len = estatus->data_length;
368 362 gdata = (struct acpi_generic_data *)(estatus + 1);
  363 + snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
369 364 while (data_len >= sizeof(*gdata)) {
370 365 gedata_len = gdata->error_data_length;
371   - cper_estatus_print_section(pfx, gdata, sec_no);
  366 + cper_estatus_print_section(newpfx, gdata, sec_no);
372 367 data_len -= gedata_len + sizeof(*gdata);
373 368 gdata = (void *)(gdata + 1) + gedata_len;
374 369 sec_no++;