Commit 156edd4aaa819ec5867ced83c7b8dba9193789ea

Authored by Jeff Roberson
Committed by Linus Torvalds
1 parent 8c85dd8730

edac: i5400 fix csrow mapping

The i5400 EDAC driver has several bugs with chip-select row computation
which most likely lead to bugs in detailed error reporting.  Attempts to
contact the authors have gone mostly unanswered so I am presenting my diff
here.  I do not subscribe to lkml and would appreciate being kept in the
cc.

The most egregious problem was miscalculating the addresses of MTR
registers after register 0 by assuming they are 32bit rather than 16.
This caused the driver to miss half of the memories.  Most motherboards
tend to have only 8 dimm slots and not 16, so this may not have been
noticed before.

Further, the row calculations multiplied the number of dimms several
times, ultimately ending up with a maximum row of 32.  The chipset only
supports 4 dimms in each of 4 channels, so csrow could not be higher than
4 unless you use a row per-rank with dual-rank dimms.  I opted to
eliminate this behavior as it is confusing to the user and the error
reporting works by slot and not rank.  This gives a much clearer view of
memory by slot and channel in /sys.

Signed-off-by: Jeff Roberson <jroberson@jroberson.net>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 1 changed file with 28 additions and 61 deletions Side-by-side Diff

drivers/edac/i5400_edac.c
... ... @@ -46,9 +46,10 @@
46 46 /* Limits for i5400 */
47 47 #define NUM_MTRS_PER_BRANCH 4
48 48 #define CHANNELS_PER_BRANCH 2
  49 +#define MAX_DIMMS_PER_CHANNEL NUM_MTRS_PER_BRANCH
49 50 #define MAX_CHANNELS 4
50   -#define MAX_DIMMS (MAX_CHANNELS * 4) /* Up to 4 DIMM's per channel */
51   -#define MAX_CSROWS (MAX_DIMMS * 2) /* max possible csrows per channel */
  51 +/* max possible csrows per channel */
  52 +#define MAX_CSROWS (MAX_DIMMS_PER_CHANNEL)
52 53  
53 54 /* Device 16,
54 55 * Function 0: System Address
... ... @@ -331,7 +332,6 @@
331 332  
332 333 struct i5400_dimm_info {
333 334 int megabytes; /* size, 0 means not present */
334   - int dual_rank;
335 335 };
336 336  
337 337 /* driver private data structure */
338 338  
339 339  
... ... @@ -849,11 +849,9 @@
849 849 int n;
850 850  
851 851 /* There is one MTR for each slot pair of FB-DIMMs,
852   - Each slot may have one or two ranks (2 csrows),
853 852 Each slot pair may be at branch 0 or branch 1.
854   - So, csrow should be divided by eight
855 853 */
856   - n = csrow >> 3;
  854 + n = csrow;
857 855  
858 856 if (n >= NUM_MTRS_PER_BRANCH) {
859 857 debugf0("ERROR: trying to access an invalid csrow: %d\n",
860 858  
861 859  
... ... @@ -905,25 +903,22 @@
905 903 amb_present_reg = determine_amb_present_reg(pvt, channel);
906 904  
907 905 /* Determine if there is a DIMM present in this DIMM slot */
908   - if (amb_present_reg & (1 << (csrow >> 1))) {
909   - dinfo->dual_rank = MTR_DIMM_RANK(mtr);
  906 + if (amb_present_reg & (1 << csrow)) {
  907 + /* Start with the number of bits for a Bank
  908 + * on the DRAM */
  909 + addrBits = MTR_DRAM_BANKS_ADDR_BITS(mtr);
  910 + /* Add thenumber of ROW bits */
  911 + addrBits += MTR_DIMM_ROWS_ADDR_BITS(mtr);
  912 + /* add the number of COLUMN bits */
  913 + addrBits += MTR_DIMM_COLS_ADDR_BITS(mtr);
  914 + /* add the number of RANK bits */
  915 + addrBits += MTR_DIMM_RANK(mtr);
910 916  
911   - if (!((dinfo->dual_rank == 0) &&
912   - ((csrow & 0x1) == 0x1))) {
913   - /* Start with the number of bits for a Bank
914   - * on the DRAM */
915   - addrBits = MTR_DRAM_BANKS_ADDR_BITS(mtr);
916   - /* Add thenumber of ROW bits */
917   - addrBits += MTR_DIMM_ROWS_ADDR_BITS(mtr);
918   - /* add the number of COLUMN bits */
919   - addrBits += MTR_DIMM_COLS_ADDR_BITS(mtr);
  917 + addrBits += 6; /* add 64 bits per DIMM */
  918 + addrBits -= 20; /* divide by 2^^20 */
  919 + addrBits -= 3; /* 8 bits per bytes */
920 920  
921   - addrBits += 6; /* add 64 bits per DIMM */
922   - addrBits -= 20; /* divide by 2^^20 */
923   - addrBits -= 3; /* 8 bits per bytes */
924   -
925   - dinfo->megabytes = 1 << addrBits;
926   - }
  921 + dinfo->megabytes = 1 << addrBits;
927 922 }
928 923 }
929 924 }
930 925  
... ... @@ -951,12 +946,12 @@
951 946 return;
952 947 }
953 948  
954   - /* Scan all the actual CSROWS (which is # of DIMMS * 2)
  949 + /* Scan all the actual CSROWS
955 950 * and calculate the information for each DIMM
956 951 * Start with the highest csrow first, to display it first
957 952 * and work toward the 0th csrow
958 953 */
959   - max_csrows = pvt->maxdimmperch * 2;
  954 + max_csrows = pvt->maxdimmperch;
960 955 for (csrow = max_csrows - 1; csrow >= 0; csrow--) {
961 956  
962 957 /* on an odd csrow, first output a 'boundary' marker,
... ... @@ -1064,7 +1059,7 @@
1064 1059  
1065 1060 /* Get the set of MTR[0-3] regs by each branch */
1066 1061 for (slot_row = 0; slot_row < NUM_MTRS_PER_BRANCH; slot_row++) {
1067   - int where = MTR0 + (slot_row * sizeof(u32));
  1062 + int where = MTR0 + (slot_row * sizeof(u16));
1068 1063  
1069 1064 /* Branch 0 set of MTR registers */
1070 1065 pci_read_config_word(pvt->branch_0, where,
... ... @@ -1146,7 +1141,7 @@
1146 1141 pvt = mci->pvt_info;
1147 1142  
1148 1143 channel_count = pvt->maxch;
1149   - max_csrows = pvt->maxdimmperch * 2;
  1144 + max_csrows = pvt->maxdimmperch;
1150 1145  
1151 1146 empty = 1; /* Assume NO memory */
1152 1147  
... ... @@ -1215,28 +1210,6 @@
1215 1210 }
1216 1211  
1217 1212 /*
1218   - * i5400_get_dimm_and_channel_counts(pdev, &num_csrows, &num_channels)
1219   - *
1220   - * ask the device how many channels are present and how many CSROWS
1221   - * as well
1222   - */
1223   -static void i5400_get_dimm_and_channel_counts(struct pci_dev *pdev,
1224   - int *num_dimms_per_channel,
1225   - int *num_channels)
1226   -{
1227   - u8 value;
1228   -
1229   - /* Need to retrieve just how many channels and dimms per channel are
1230   - * supported on this memory controller
1231   - */
1232   - pci_read_config_byte(pdev, MAXDIMMPERCH, &value);
1233   - *num_dimms_per_channel = (int)value * 2;
1234   -
1235   - pci_read_config_byte(pdev, MAXCH, &value);
1236   - *num_channels = (int)value;
1237   -}
1238   -
1239   -/*
1240 1213 * i5400_probe1 Probe for ONE instance of device to see if it is
1241 1214 * present.
1242 1215 * return:
1243 1216  
... ... @@ -1263,22 +1236,16 @@
1263 1236 if (PCI_FUNC(pdev->devfn) != 0)
1264 1237 return -ENODEV;
1265 1238  
1266   - /* Ask the devices for the number of CSROWS and CHANNELS so
1267   - * that we can calculate the memory resources, etc
1268   - *
1269   - * The Chipset will report what it can handle which will be greater
1270   - * or equal to what the motherboard manufacturer will implement.
1271   - *
1272   - * As we don't have a motherboard identification routine to determine
  1239 + /* As we don't have a motherboard identification routine to determine
1273 1240 * actual number of slots/dimms per channel, we thus utilize the
1274 1241 * resource as specified by the chipset. Thus, we might have
1275 1242 * have more DIMMs per channel than actually on the mobo, but this
1276 1243 * allows the driver to support upto the chipset max, without
1277 1244 * some fancy mobo determination.
1278 1245 */
1279   - i5400_get_dimm_and_channel_counts(pdev, &num_dimms_per_channel,
1280   - &num_channels);
1281   - num_csrows = num_dimms_per_channel * 2;
  1246 + num_dimms_per_channel = MAX_DIMMS_PER_CHANNEL;
  1247 + num_channels = MAX_CHANNELS;
  1248 + num_csrows = num_dimms_per_channel;
1282 1249  
1283 1250 debugf0("MC: %s(): Number of - Channels= %d DIMMS= %d CSROWS= %d\n",
1284 1251 __func__, num_channels, num_dimms_per_channel, num_csrows);