Commit 982216a4290543fe73ae4f0a156f3d7906bd9b73
1 parent
93e4fe64ec
Exists in
smarc-l5.0.0_1.0.0-ga
and in
5 other branches
edac.h: Add generic layers for describing a memory location
The edac core were written with the idea that memory controllers are able to directly access csrows, and that the channels are used inside a csrows select. This is not true for FB-DIMM and RAMBUS memory controllers. Also, some recent advanced memory controllers don't present a per-csrows view. Instead, they view memories as DIMMs, instead of ranks, accessed via csrow/channel. So, changes are needed in order to allow the EDAC core to work with all types of architectures. In preparation for handling non-csrows based memory controllers, add some memory structs and a macro: enum hw_event_mc_err_type: describes the type of error (corrected, uncorrected, fatal) To be used by the new edac_mc_handle_error function; enum edac_mc_layer: describes the type of a given memory architecture layer (branch, channel, slot, csrow). struct edac_mc_layer: describes the properties of a memory layer (type, size, and if the layer will be used on a virtual csrow. EDAC_DIMM_PTR() - as the number of layers can vary from 1 to 3, this macro converts from an address with up to 3 layers into a linear address. Reviewed-by: Borislav Petkov <bp@amd64.org> Cc: Doug Thompson <norsk5@yahoo.com> Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Showing 1 changed file with 102 additions and 1 deletions Side-by-side Diff
include/linux/edac.h
... | ... | @@ -71,6 +71,25 @@ |
71 | 71 | #define DEV_FLAG_X64 BIT(DEV_X64) |
72 | 72 | |
73 | 73 | /** |
74 | + * enum hw_event_mc_err_type - type of the detected error | |
75 | + * | |
76 | + * @HW_EVENT_ERR_CORRECTED: Corrected Error - Indicates that an ECC | |
77 | + * corrected error was detected | |
78 | + * @HW_EVENT_ERR_UNCORRECTED: Uncorrected Error - Indicates an error that | |
79 | + * can't be corrected by ECC, but it is not | |
80 | + * fatal (maybe it is on an unused memory area, | |
81 | + * or the memory controller could recover from | |
82 | + * it for example, by re-trying the operation). | |
83 | + * @HW_EVENT_ERR_FATAL: Fatal Error - Uncorrected error that could not | |
84 | + * be recovered. | |
85 | + */ | |
86 | +enum hw_event_mc_err_type { | |
87 | + HW_EVENT_ERR_CORRECTED, | |
88 | + HW_EVENT_ERR_UNCORRECTED, | |
89 | + HW_EVENT_ERR_FATAL, | |
90 | +}; | |
91 | + | |
92 | +/** | |
74 | 93 | * enum mem_type - memory types. For a more detailed reference, please see |
75 | 94 | * http://en.wikipedia.org/wiki/DRAM |
76 | 95 | * |
... | ... | @@ -312,7 +331,89 @@ |
312 | 331 | * PS - I enjoyed writing all that about as much as you enjoyed reading it. |
313 | 332 | */ |
314 | 333 | |
315 | -/* FIXME: add a per-dimm ce error count */ | |
334 | +/** | |
335 | + * enum edac_mc_layer - memory controller hierarchy layer | |
336 | + * | |
337 | + * @EDAC_MC_LAYER_BRANCH: memory layer is named "branch" | |
338 | + * @EDAC_MC_LAYER_CHANNEL: memory layer is named "channel" | |
339 | + * @EDAC_MC_LAYER_SLOT: memory layer is named "slot" | |
340 | + * @EDAC_MC_LAYER_CHIP_SELECT: memory layer is named "chip select" | |
341 | + * | |
342 | + * This enum is used by the drivers to tell edac_mc_sysfs what name should | |
343 | + * be used when describing a memory stick location. | |
344 | + */ | |
345 | +enum edac_mc_layer_type { | |
346 | + EDAC_MC_LAYER_BRANCH, | |
347 | + EDAC_MC_LAYER_CHANNEL, | |
348 | + EDAC_MC_LAYER_SLOT, | |
349 | + EDAC_MC_LAYER_CHIP_SELECT, | |
350 | +}; | |
351 | + | |
352 | +/** | |
353 | + * struct edac_mc_layer - describes the memory controller hierarchy | |
354 | + * @layer: layer type | |
355 | + * @size: number of components per layer. For example, | |
356 | + * if the channel layer has two channels, size = 2 | |
357 | + * @is_virt_csrow: This layer is part of the "csrow" when old API | |
358 | + * compatibility mode is enabled. Otherwise, it is | |
359 | + * a channel | |
360 | + */ | |
361 | +struct edac_mc_layer { | |
362 | + enum edac_mc_layer_type type; | |
363 | + unsigned size; | |
364 | + bool is_virt_csrow; | |
365 | +}; | |
366 | + | |
367 | +/* | |
368 | + * Maximum number of layers used by the memory controller to uniquely | |
369 | + * identify a single memory stick. | |
370 | + * NOTE: Changing this constant requires not only to change the constant | |
371 | + * below, but also to change the existing code at the core, as there are | |
372 | + * some code there that are optimized for 3 layers. | |
373 | + */ | |
374 | +#define EDAC_MAX_LAYERS 3 | |
375 | + | |
376 | +/** | |
377 | + * EDAC_DIMM_PTR - Macro responsible to find a pointer inside a pointer array | |
378 | + * for the element given by [layer0,layer1,layer2] position | |
379 | + * | |
380 | + * @layers: a struct edac_mc_layer array, describing how many elements | |
381 | + * were allocated for each layer | |
382 | + * @var: name of the var where we want to get the pointer | |
383 | + * (like mci->dimms) | |
384 | + * @n_layers: Number of layers at the @layers array | |
385 | + * @layer0: layer0 position | |
386 | + * @layer1: layer1 position. Unused if n_layers < 2 | |
387 | + * @layer2: layer2 position. Unused if n_layers < 3 | |
388 | + * | |
389 | + * For 1 layer, this macro returns &var[layer0] | |
390 | + * For 2 layers, this macro is similar to allocate a bi-dimensional array | |
391 | + * and to return "&var[layer0][layer1]" | |
392 | + * For 3 layers, this macro is similar to allocate a tri-dimensional array | |
393 | + * and to return "&var[layer0][layer1][layer2]" | |
394 | + * | |
395 | + * A loop could be used here to make it more generic, but, as we only have | |
396 | + * 3 layers, this is a little faster. | |
397 | + * By design, layers can never be 0 or more than 3. If that ever happens, | |
398 | + * a NULL is returned, causing an OOPS during the memory allocation routine, | |
399 | + * with would point to the developer that he's doing something wrong. | |
400 | + */ | |
401 | +#define EDAC_DIMM_PTR(layers, var, nlayers, layer0, layer1, layer2) ({ \ | |
402 | + typeof(var) __p; \ | |
403 | + if ((nlayers) == 1) \ | |
404 | + __p = &var[layer0]; \ | |
405 | + else if ((nlayers) == 2) \ | |
406 | + __p = &var[(layer1) + ((layers[1]).size * (layer0))]; \ | |
407 | + else if ((nlayers) == 3) \ | |
408 | + __p = &var[(layer2) + ((layers[2]).size * ((layer1) + \ | |
409 | + ((layers[1]).size * (layer0))))]; \ | |
410 | + else \ | |
411 | + __p = NULL; \ | |
412 | + __p; \ | |
413 | +}) | |
414 | + | |
415 | + | |
416 | +/* FIXME: add the proper per-location error counts */ | |
316 | 417 | struct dimm_info { |
317 | 418 | char label[EDAC_MC_LABEL_LEN + 1]; /* DIMM label on motherboard */ |
318 | 419 | unsigned memory_controller; |