Commit a7d7d2e1a07e3811dc49af2962c940fd8bbb6c8f

Authored by Mauro Carvalho Chehab
1 parent 76e10d158e

edac: Create a dimm struct and move the labels into it

The way a DIMM is currently represented implies that they're
linked into a per-csrow struct. However, some drivers don't see
csrows, as they're ridden behind some chip like the AMB's
on FBDIMM's, for example.

This forced drivers to fake^Wvirtualize a csrow struct, and to create
a mess under csrow/channel original's concept.

Move the DIMM labels into a per-DIMM struct, and add there
the real location of the socket, in terms of csrow/channel.
Latter patches will modify the location to properly represent the
memory architecture.

All other drivers will use a per-csrow type of location.
Some of those drivers will require a latter conversion, as
they also fake the csrows internally.

TODO: While this patch doesn't change the existing behavior, on
csrows-based memory controllers, a csrow/channel pair points to a memory
rank. There's a known bug at the EDAC core that allows having different
labels for the same DIMM, if it has more than one rank. A latter patch
is need to merge the several ranks for a DIMM into the same dimm_info
struct, in order to avoid having different labels for the same DIMM.

The edac_mc_alloc() will now contain a per-dimm initialization loop that
will be changed by latter patches in order to match other types of
memory architectures.

Reviewed-by: Aristeu Rozanski <arozansk@redhat.com>
Reviewed-by: Borislav Petkov <borislav.petkov@amd.com>
Cc: Doug Thompson <norsk5@yahoo.com>
Cc: Ranganathan Desikan <ravi@jetztechnologies.com>
Cc: "Arvind R." <arvino55@gmail.com>
Cc: "Niklas Söderlund" <niklas.soderlund@ericsson.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

Showing 7 changed files with 72 additions and 32 deletions Side-by-side Diff

drivers/edac/edac_mc.c
... ... @@ -44,7 +44,7 @@
44 44 debugf4("\tchannel = %p\n", chan);
45 45 debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
46 46 debugf4("\tchannel->ce_count = %d\n", chan->ce_count);
47   - debugf4("\tchannel->label = '%s'\n", chan->label);
  47 + debugf4("\tchannel->label = '%s'\n", chan->dimm->label);
48 48 debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
49 49 }
50 50  
... ... @@ -157,6 +157,7 @@
157 157 struct mem_ctl_info *mci;
158 158 struct csrow_info *csi, *csrow;
159 159 struct rank_info *chi, *chp, *chan;
  160 + struct dimm_info *dimm;
160 161 void *pvt;
161 162 unsigned size;
162 163 int row, chn;
... ... @@ -170,7 +171,8 @@
170 171 mci = (struct mem_ctl_info *)0;
171 172 csi = edac_align_ptr(&mci[1], sizeof(*csi));
172 173 chi = edac_align_ptr(&csi[nr_csrows], sizeof(*chi));
173   - pvt = edac_align_ptr(&chi[nr_chans * nr_csrows], sz_pvt);
  174 + dimm = edac_align_ptr(&chi[nr_chans * nr_csrows], sizeof(*dimm));
  175 + pvt = edac_align_ptr(&dimm[nr_chans * nr_csrows], sz_pvt);
174 176 size = ((unsigned long)pvt) + sz_pvt;
175 177  
176 178 mci = kzalloc(size, GFP_KERNEL);
177 179  
178 180  
... ... @@ -182,14 +184,22 @@
182 184 */
183 185 csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
184 186 chi = (struct rank_info *)(((char *)mci) + ((unsigned long)chi));
  187 + dimm = (struct dimm_info *)(((char *)mci) + ((unsigned long)dimm));
185 188 pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
186 189  
187 190 /* setup index and various internal pointers */
188 191 mci->mc_idx = edac_index;
189 192 mci->csrows = csi;
  193 + mci->dimms = dimm;
190 194 mci->pvt_info = pvt;
191 195 mci->nr_csrows = nr_csrows;
192 196  
  197 + /*
  198 + * For now, assumes that a per-csrow arrangement for dimms.
  199 + * This will be latter changed.
  200 + */
  201 + dimm = mci->dimms;
  202 +
193 203 for (row = 0; row < nr_csrows; row++) {
194 204 csrow = &csi[row];
195 205 csrow->csrow_idx = row;
... ... @@ -202,6 +212,12 @@
202 212 chan = &chp[chn];
203 213 chan->chan_idx = chn;
204 214 chan->csrow = csrow;
  215 +
  216 + mci->csrows[row].channels[chn].dimm = dimm;
  217 + dimm->csrow = row;
  218 + dimm->csrow_channel = chn;
  219 + dimm++;
  220 + mci->nr_dimms++;
205 221 }
206 222 }
207 223  
... ... @@ -678,6 +694,7 @@
678 694 int row, int channel, const char *msg)
679 695 {
680 696 unsigned long remapped_page;
  697 + char *label = NULL;
681 698  
682 699 debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
683 700  
... ... @@ -701,6 +718,8 @@
701 718 return;
702 719 }
703 720  
  721 + label = mci->csrows[row].channels[channel].dimm->label;
  722 +
704 723 if (edac_mc_get_log_ce())
705 724 /* FIXME - put in DIMM location */
706 725 edac_mc_printk(mci, KERN_WARNING,
... ... @@ -708,7 +727,7 @@
708 727 "0x%lx, row %d, channel %d, label \"%s\": %s\n",
709 728 page_frame_number, offset_in_page,
710 729 mci->csrows[row].grain, syndrome, row, channel,
711   - mci->csrows[row].channels[channel].label, msg);
  730 + label, msg);
712 731  
713 732 mci->ce_count++;
714 733 mci->csrows[row].ce_count++;
... ... @@ -754,6 +773,7 @@
754 773 char *pos = labels;
755 774 int chan;
756 775 int chars;
  776 + char *label = NULL;
757 777  
758 778 debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
759 779  
760 780  
... ... @@ -767,15 +787,15 @@
767 787 return;
768 788 }
769 789  
770   - chars = snprintf(pos, len + 1, "%s",
771   - mci->csrows[row].channels[0].label);
  790 + label = mci->csrows[row].channels[0].dimm->label;
  791 + chars = snprintf(pos, len + 1, "%s", label);
772 792 len -= chars;
773 793 pos += chars;
774 794  
775 795 for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0);
776 796 chan++) {
777   - chars = snprintf(pos, len + 1, ":%s",
778   - mci->csrows[row].channels[chan].label);
  797 + label = mci->csrows[row].channels[chan].dimm->label;
  798 + chars = snprintf(pos, len + 1, ":%s", label);
779 799 len -= chars;
780 800 pos += chars;
781 801 }
... ... @@ -824,6 +844,7 @@
824 844 char labels[len + 1];
825 845 char *pos = labels;
826 846 int chars;
  847 + char *label;
827 848  
828 849 if (csrow >= mci->nr_csrows) {
829 850 /* something is wrong */
830 851  
... ... @@ -858,12 +879,12 @@
858 879 mci->csrows[csrow].ue_count++;
859 880  
860 881 /* Generate the DIMM labels from the specified channels */
861   - chars = snprintf(pos, len + 1, "%s",
862   - mci->csrows[csrow].channels[channela].label);
  882 + label = mci->csrows[csrow].channels[channela].dimm->label;
  883 + chars = snprintf(pos, len + 1, "%s", label);
863 884 len -= chars;
864 885 pos += chars;
865 886 chars = snprintf(pos, len + 1, "-%s",
866   - mci->csrows[csrow].channels[channelb].label);
  887 + mci->csrows[csrow].channels[channelb].dimm->label);
867 888  
868 889 if (edac_mc_get_log_ue())
869 890 edac_mc_printk(mci, KERN_EMERG,
... ... @@ -885,6 +906,7 @@
885 906 void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci,
886 907 unsigned int csrow, unsigned int channel, char *msg)
887 908 {
  909 + char *label = NULL;
888 910  
889 911 /* Ensure boundary values */
890 912 if (csrow >= mci->nr_csrows) {
891 913  
... ... @@ -904,12 +926,13 @@
904 926 return;
905 927 }
906 928  
  929 + label = mci->csrows[csrow].channels[channel].dimm->label;
  930 +
907 931 if (edac_mc_get_log_ce())
908 932 /* FIXME - put in DIMM location */
909 933 edac_mc_printk(mci, KERN_WARNING,
910 934 "CE row %d, channel %d, label \"%s\": %s\n",
911   - csrow, channel,
912   - mci->csrows[csrow].channels[channel].label, msg);
  935 + csrow, channel, label, msg);
913 936  
914 937 mci->ce_count++;
915 938 mci->csrows[csrow].ce_count++;
drivers/edac/edac_mc_sysfs.c
... ... @@ -170,11 +170,11 @@
170 170 char *data, int channel)
171 171 {
172 172 /* if field has not been initialized, there is nothing to send */
173   - if (!csrow->channels[channel].label[0])
  173 + if (!csrow->channels[channel].dimm->label[0])
174 174 return 0;
175 175  
176 176 return snprintf(data, EDAC_MC_LABEL_LEN, "%s\n",
177   - csrow->channels[channel].label);
  177 + csrow->channels[channel].dimm->label);
178 178 }
179 179  
180 180 static ssize_t channel_dimm_label_store(struct csrow_info *csrow,
... ... @@ -184,8 +184,8 @@
184 184 ssize_t max_size = 0;
185 185  
186 186 max_size = min((ssize_t) count, (ssize_t) EDAC_MC_LABEL_LEN - 1);
187   - strncpy(csrow->channels[channel].label, data, max_size);
188   - csrow->channels[channel].label[max_size] = '\0';
  187 + strncpy(csrow->channels[channel].dimm->label, data, max_size);
  188 + csrow->channels[channel].dimm->label[max_size] = '\0';
189 189  
190 190 return max_size;
191 191 }
192 192  
... ... @@ -952,9 +952,8 @@
952 952 /* CSROW error: backout what has already been registered, */
953 953 fail1:
954 954 for (i--; i >= 0; i--) {
955   - if (csrow->nr_pages > 0) {
  955 + if (mci->csrows[i].nr_pages > 0)
956 956 kobject_put(&mci->csrows[i].kobj);
957   - }
958 957 }
959 958  
960 959 /* remove the mci instance's attributes, if any */
drivers/edac/i5100_edac.c
... ... @@ -433,7 +433,7 @@
433 433 "CE chan %d, bank %u, rank %u, syndrome 0x%lx, "
434 434 "cas %u, ras %u, csrow %u, label \"%s\": %s\n",
435 435 chan, bank, rank, syndrome, cas, ras,
436   - csrow, mci->csrows[csrow].channels[0].label, msg);
  436 + csrow, mci->csrows[csrow].channels[0].dimm->label, msg);
437 437  
438 438 mci->ce_count++;
439 439 mci->csrows[csrow].ce_count++;
... ... @@ -455,7 +455,7 @@
455 455 "UE chan %d, bank %u, rank %u, syndrome 0x%lx, "
456 456 "cas %u, ras %u, csrow %u, label \"%s\": %s\n",
457 457 chan, bank, rank, syndrome, cas, ras,
458   - csrow, mci->csrows[csrow].channels[0].label, msg);
  458 + csrow, mci->csrows[csrow].channels[0].dimm->label, msg);
459 459  
460 460 mci->ue_count++;
461 461 mci->csrows[csrow].ue_count++;
... ... @@ -868,8 +868,8 @@
868 868 mci->csrows[i].channels[0].chan_idx = 0;
869 869 mci->csrows[i].channels[0].ce_count = 0;
870 870 mci->csrows[i].channels[0].csrow = mci->csrows + i;
871   - snprintf(mci->csrows[i].channels[0].label,
872   - sizeof(mci->csrows[i].channels[0].label),
  871 + snprintf(mci->csrows[i].channels[0].dimm->label,
  872 + sizeof(mci->csrows[i].channels[0].dimm->label),
873 873 "DIMM%u", i5100_rank_to_slot(mci, chan, rank));
874 874  
875 875 total_pages += npages;
drivers/edac/i7core_edac.c
... ... @@ -746,8 +746,8 @@
746 746  
747 747 csr->edac_mode = mode;
748 748 csr->mtype = mtype;
749   - snprintf(csr->channels[0].label,
750   - sizeof(csr->channels[0].label),
  749 + snprintf(csr->channels[0].dimm->label,
  750 + sizeof(csr->channels[0].dimm->label),
751 751 "CPU#%uChannel#%u_DIMM#%u",
752 752 pvt->i7core_dev->socket, i, j);
753 753  
drivers/edac/i82975x_edac.c
... ... @@ -407,7 +407,7 @@
407 407 * [0-3] for dual-channel; i.e. csrow->nr_channels = 2
408 408 */
409 409 for (chan = 0; chan < csrow->nr_channels; chan++)
410   - strncpy(csrow->channels[chan].label,
  410 + strncpy(csrow->channels[chan].dimm->label,
411 411 labels[(index >> 1) + (chan * 2)],
412 412 EDAC_MC_LABEL_LEN);
413 413  
drivers/edac/sb_edac.c
... ... @@ -651,8 +651,8 @@
651 651 csr->channels[0].chan_idx = i;
652 652 csr->channels[0].ce_count = 0;
653 653 pvt->csrow_map[i][j] = csrow;
654   - snprintf(csr->channels[0].label,
655   - sizeof(csr->channels[0].label),
  654 + snprintf(csr->channels[0].dimm->label,
  655 + sizeof(csr->channels[0].dimm->label),
656 656 "CPU_SrcID#%u_Channel#%u_DIMM#%u",
657 657 pvt->sbridge_dev->source_id, i, j);
658 658 last_page += npages;
include/linux/edac.h
... ... @@ -312,23 +312,34 @@
312 312 * PS - I enjoyed writing all that about as much as you enjoyed reading it.
313 313 */
314 314  
  315 +/* FIXME: add a per-dimm ce error count */
  316 +struct dimm_info {
  317 + char label[EDAC_MC_LABEL_LEN + 1]; /* DIMM label on motherboard */
  318 + unsigned memory_controller;
  319 + unsigned csrow;
  320 + unsigned csrow_channel;
  321 +};
  322 +
315 323 /**
316 324 * struct rank_info - contains the information for one DIMM rank
317 325 *
318 326 * @chan_idx: channel number where the rank is (typically, 0 or 1)
319 327 * @ce_count: number of correctable errors for this rank
320   - * @label: DIMM label. Different ranks for the same DIMM should be
321   - * filled, on userspace, with the same label.
322   - * FIXME: The core currently won't enforce it.
323 328 * @csrow: A pointer to the chip select row structure (the parent
324 329 * structure). The location of the rank is given by
325 330 * the (csrow->csrow_idx, chan_idx) vector.
  331 + * @dimm: A pointer to the DIMM structure, where the DIMM label
  332 + * information is stored.
  333 + *
  334 + * FIXME: Currently, the EDAC core model will assume one DIMM per rank.
  335 + * This is a bad assumption, but it makes this patch easier. Later
  336 + * patches in this series will fix this issue.
326 337 */
327 338 struct rank_info {
328 339 int chan_idx;
329 340 u32 ce_count;
330   - char label[EDAC_MC_LABEL_LEN + 1];
331   - struct csrow_info *csrow; /* the parent */
  341 + struct csrow_info *csrow;
  342 + struct dimm_info *dimm;
332 343 };
333 344  
334 345 struct csrow_info {
... ... @@ -428,6 +439,13 @@
428 439 int mc_idx;
429 440 int nr_csrows;
430 441 struct csrow_info *csrows;
  442 +
  443 + /*
  444 + * DIMM info. Will eventually remove the entire csrows_info some day
  445 + */
  446 + unsigned nr_dimms;
  447 + struct dimm_info *dimms;
  448 +
431 449 /*
432 450 * FIXME - what about controllers on other busses? - IDs must be
433 451 * unique. dev pointer should be sufficiently unique, but