Commit 5926ff502f6b93ca0c1654f8a5c5317ea236dbdb

Authored by Mauro Carvalho Chehab
1 parent ca0907b9e4

edac: Initialize the dimm label with the known information

While userspace doesn't fill the dimm labels, add there the dimm location,
as described by the used memory model. This could eventually match what
is described at the dmidecode, making easier for people to identify the
memory.

For example, on an Intel motherboard where the DMI table is reliable,
the first memory stick is described as:

Memory Device
	Array Handle: 0x0029
	Error Information Handle: Not Provided
	Total Width: 64 bits
	Data Width: 64 bits
	Size: 2048 MB
	Form Factor: DIMM
	Set: 1
	Locator: A1_DIMM0
	Bank Locator: A1_Node0_Channel0_Dimm0
	Type: <OUT OF SPEC>
	Type Detail: Synchronous
	Speed: 800 MHz
	Manufacturer: A1_Manufacturer0
	Serial Number: A1_SerNum0
	Asset Tag: A1_AssetTagNum0
	Part Number: A1_PartNum0

The memory named as "A1_DIMM0" is physically located at the first
memory controller (node 0), at channel 0, dimm slot 0.

After this patch, the memory label will be filled with:
	/sys/devices/system/edac/mc/csrow0/ch0_dimm_label:mc#0channel#0slot#0

And (after the new EDAC API patches) as:
	/sys/devices/system/edac/mc/mc0/dimm0/dimm_label:mc#0channel#0slot#0

So, even if the memory label is not initialized on userspace, an useful
information with the error location is filled there, expecially since
several systems/motherboards are provided with enough info to map from
channel/slot (or branch/channel/slot) into the DIMM label. So, letting the
EDAC core fill it by default is a good thing.

It should noticed that, as the label filling happens at the
edac_mc_alloc(), drivers can override it to better describe the memories
(and some actually do it).

Cc: Aristeu Rozanski <arozansk@redhat.com>
Cc: Doug Thompson <norsk5@yahoo.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

Showing 3 changed files with 27 additions and 11 deletions Side-by-side Diff

drivers/edac/edac_mc.c
... ... @@ -210,10 +210,10 @@
210 210 struct dimm_info *dimm;
211 211 u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
212 212 unsigned pos[EDAC_MAX_LAYERS];
213   - void *pvt, *ptr = NULL;
214 213 unsigned size, tot_dimms = 1, count = 1;
215 214 unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
216   - int i, j, err, row, chn;
  215 + void *pvt, *p, *ptr = NULL;
  216 + int i, j, err, row, chn, n, len;
217 217 bool per_rank = false;
218 218  
219 219 BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
220 220  
... ... @@ -325,10 +325,26 @@
325 325 i, per_rank ? "rank" : "dimm", (dimm - mci->dimms),
326 326 pos[0], pos[1], pos[2], row, chn);
327 327  
328   - /* Copy DIMM location */
329   - for (j = 0; j < n_layers; j++)
  328 + /*
  329 + * Copy DIMM location and initialize it.
  330 + */
  331 + len = sizeof(dimm->label);
  332 + p = dimm->label;
  333 + n = snprintf(p, len, "mc#%u", mc_num);
  334 + p += n;
  335 + len -= n;
  336 + for (j = 0; j < n_layers; j++) {
  337 + n = snprintf(p, len, "%s#%u",
  338 + edac_layer_name[layers[j].type],
  339 + pos[j]);
  340 + p += n;
  341 + len -= n;
330 342 dimm->location[j] = pos[j];
331 343  
  344 + if (len <= 0)
  345 + break;
  346 + }
  347 +
332 348 /* Link it to the csrows old API data */
333 349 chan->dimm = dimm;
334 350 dimm->csrow = row;
... ... @@ -834,7 +850,7 @@
834 850 {
835 851 int i, index = 0;
836 852  
837   - mci->ce_count++;
  853 + mci->ce_mc++;
838 854  
839 855 if (!enable_per_layer_report) {
840 856 mci->ce_noinfo_count++;
... ... @@ -858,7 +874,7 @@
858 874 {
859 875 int i, index = 0;
860 876  
861   - mci->ue_count++;
  877 + mci->ue_mc++;
862 878  
863 879 if (!enable_per_layer_report) {
864 880 mci->ce_noinfo_count++;
drivers/edac/edac_mc_sysfs.c
... ... @@ -425,8 +425,8 @@
425 425  
426 426 mci->ue_noinfo_count = 0;
427 427 mci->ce_noinfo_count = 0;
428   - mci->ue_count = 0;
429   - mci->ce_count = 0;
  428 + mci->ue_mc = 0;
  429 + mci->ce_mc = 0;
430 430  
431 431 for (row = 0; row < mci->nr_csrows; row++) {
432 432 struct csrow_info *ri = &mci->csrows[row];
433 433  
... ... @@ -495,12 +495,12 @@
495 495 /* default attribute files for the MCI object */
496 496 static ssize_t mci_ue_count_show(struct mem_ctl_info *mci, char *data)
497 497 {
498   - return sprintf(data, "%d\n", mci->ue_count);
  498 + return sprintf(data, "%d\n", mci->ue_mc);
499 499 }
500 500  
501 501 static ssize_t mci_ce_count_show(struct mem_ctl_info *mci, char *data)
502 502 {
503   - return sprintf(data, "%d\n", mci->ce_count);
  503 + return sprintf(data, "%d\n", mci->ce_mc);
504 504 }
505 505  
506 506 static ssize_t mci_ce_noinfo_show(struct mem_ctl_info *mci, char *data)
include/linux/edac.h
... ... @@ -581,7 +581,7 @@
581 581 * already handles that.
582 582 */
583 583 u32 ce_noinfo_count, ue_noinfo_count;
584   - u32 ue_count, ce_count;
  584 + u32 ue_mc, ce_mc;
585 585 u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
586 586  
587 587 struct completion complete;