Commit 8dd93d450bff251575c56b8f058393124e1f00fb

Authored by Mauro Carvalho Chehab
1 parent e7e248304c

edac: add support for error type "Info"

The CPER spec defines a forth type of error: informational
logs. Add support for it at the edac API and at the
trace event interface.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

Showing 2 changed files with 17 additions and 3 deletions Inline Diff

include/linux/edac.h
1 /* 1 /*
2 * Generic EDAC defs 2 * Generic EDAC defs
3 * 3 *
4 * Author: Dave Jiang <djiang@mvista.com> 4 * Author: Dave Jiang <djiang@mvista.com>
5 * 5 *
6 * 2006-2008 (c) MontaVista Software, Inc. This file is licensed under 6 * 2006-2008 (c) MontaVista Software, Inc. This file is licensed under
7 * the terms of the GNU General Public License version 2. This program 7 * the terms of the GNU General Public License version 2. This program
8 * is licensed "as is" without any warranty of any kind, whether express 8 * is licensed "as is" without any warranty of any kind, whether express
9 * or implied. 9 * or implied.
10 * 10 *
11 */ 11 */
12 #ifndef _LINUX_EDAC_H_ 12 #ifndef _LINUX_EDAC_H_
13 #define _LINUX_EDAC_H_ 13 #define _LINUX_EDAC_H_
14 14
15 #include <linux/atomic.h> 15 #include <linux/atomic.h>
16 #include <linux/device.h> 16 #include <linux/device.h>
17 #include <linux/completion.h> 17 #include <linux/completion.h>
18 #include <linux/workqueue.h> 18 #include <linux/workqueue.h>
19 #include <linux/debugfs.h> 19 #include <linux/debugfs.h>
20 20
21 struct device; 21 struct device;
22 22
23 #define EDAC_OPSTATE_INVAL -1 23 #define EDAC_OPSTATE_INVAL -1
24 #define EDAC_OPSTATE_POLL 0 24 #define EDAC_OPSTATE_POLL 0
25 #define EDAC_OPSTATE_NMI 1 25 #define EDAC_OPSTATE_NMI 1
26 #define EDAC_OPSTATE_INT 2 26 #define EDAC_OPSTATE_INT 2
27 27
28 extern int edac_op_state; 28 extern int edac_op_state;
29 extern int edac_err_assert; 29 extern int edac_err_assert;
30 extern atomic_t edac_handlers; 30 extern atomic_t edac_handlers;
31 extern struct bus_type edac_subsys; 31 extern struct bus_type edac_subsys;
32 32
33 extern int edac_handler_set(void); 33 extern int edac_handler_set(void);
34 extern void edac_atomic_assert_error(void); 34 extern void edac_atomic_assert_error(void);
35 extern struct bus_type *edac_get_sysfs_subsys(void); 35 extern struct bus_type *edac_get_sysfs_subsys(void);
36 extern void edac_put_sysfs_subsys(void); 36 extern void edac_put_sysfs_subsys(void);
37 37
38 static inline void opstate_init(void) 38 static inline void opstate_init(void)
39 { 39 {
40 switch (edac_op_state) { 40 switch (edac_op_state) {
41 case EDAC_OPSTATE_POLL: 41 case EDAC_OPSTATE_POLL:
42 case EDAC_OPSTATE_NMI: 42 case EDAC_OPSTATE_NMI:
43 break; 43 break;
44 default: 44 default:
45 edac_op_state = EDAC_OPSTATE_POLL; 45 edac_op_state = EDAC_OPSTATE_POLL;
46 } 46 }
47 return; 47 return;
48 } 48 }
49 49
50 /* Max length of a DIMM label*/ 50 /* Max length of a DIMM label*/
51 #define EDAC_MC_LABEL_LEN 31 51 #define EDAC_MC_LABEL_LEN 31
52 52
53 /* Maximum size of the location string */ 53 /* Maximum size of the location string */
54 #define LOCATION_SIZE 80 54 #define LOCATION_SIZE 80
55 55
56 /* Defines the maximum number of labels that can be reported */ 56 /* Defines the maximum number of labels that can be reported */
57 #define EDAC_MAX_LABELS 8 57 #define EDAC_MAX_LABELS 8
58 58
59 /* String used to join two or more labels */ 59 /* String used to join two or more labels */
60 #define OTHER_LABEL " or " 60 #define OTHER_LABEL " or "
61 61
62 /** 62 /**
63 * enum dev_type - describe the type of memory DRAM chips used at the stick 63 * enum dev_type - describe the type of memory DRAM chips used at the stick
64 * @DEV_UNKNOWN: Can't be determined, or MC doesn't support detect it 64 * @DEV_UNKNOWN: Can't be determined, or MC doesn't support detect it
65 * @DEV_X1: 1 bit for data 65 * @DEV_X1: 1 bit for data
66 * @DEV_X2: 2 bits for data 66 * @DEV_X2: 2 bits for data
67 * @DEV_X4: 4 bits for data 67 * @DEV_X4: 4 bits for data
68 * @DEV_X8: 8 bits for data 68 * @DEV_X8: 8 bits for data
69 * @DEV_X16: 16 bits for data 69 * @DEV_X16: 16 bits for data
70 * @DEV_X32: 32 bits for data 70 * @DEV_X32: 32 bits for data
71 * @DEV_X64: 64 bits for data 71 * @DEV_X64: 64 bits for data
72 * 72 *
73 * Typical values are x4 and x8. 73 * Typical values are x4 and x8.
74 */ 74 */
75 enum dev_type { 75 enum dev_type {
76 DEV_UNKNOWN = 0, 76 DEV_UNKNOWN = 0,
77 DEV_X1, 77 DEV_X1,
78 DEV_X2, 78 DEV_X2,
79 DEV_X4, 79 DEV_X4,
80 DEV_X8, 80 DEV_X8,
81 DEV_X16, 81 DEV_X16,
82 DEV_X32, /* Do these parts exist? */ 82 DEV_X32, /* Do these parts exist? */
83 DEV_X64 /* Do these parts exist? */ 83 DEV_X64 /* Do these parts exist? */
84 }; 84 };
85 85
86 #define DEV_FLAG_UNKNOWN BIT(DEV_UNKNOWN) 86 #define DEV_FLAG_UNKNOWN BIT(DEV_UNKNOWN)
87 #define DEV_FLAG_X1 BIT(DEV_X1) 87 #define DEV_FLAG_X1 BIT(DEV_X1)
88 #define DEV_FLAG_X2 BIT(DEV_X2) 88 #define DEV_FLAG_X2 BIT(DEV_X2)
89 #define DEV_FLAG_X4 BIT(DEV_X4) 89 #define DEV_FLAG_X4 BIT(DEV_X4)
90 #define DEV_FLAG_X8 BIT(DEV_X8) 90 #define DEV_FLAG_X8 BIT(DEV_X8)
91 #define DEV_FLAG_X16 BIT(DEV_X16) 91 #define DEV_FLAG_X16 BIT(DEV_X16)
92 #define DEV_FLAG_X32 BIT(DEV_X32) 92 #define DEV_FLAG_X32 BIT(DEV_X32)
93 #define DEV_FLAG_X64 BIT(DEV_X64) 93 #define DEV_FLAG_X64 BIT(DEV_X64)
94 94
95 /** 95 /**
96 * enum hw_event_mc_err_type - type of the detected error 96 * enum hw_event_mc_err_type - type of the detected error
97 * 97 *
98 * @HW_EVENT_ERR_CORRECTED: Corrected Error - Indicates that an ECC 98 * @HW_EVENT_ERR_CORRECTED: Corrected Error - Indicates that an ECC
99 * corrected error was detected 99 * corrected error was detected
100 * @HW_EVENT_ERR_UNCORRECTED: Uncorrected Error - Indicates an error that 100 * @HW_EVENT_ERR_UNCORRECTED: Uncorrected Error - Indicates an error that
101 * can't be corrected by ECC, but it is not 101 * can't be corrected by ECC, but it is not
102 * fatal (maybe it is on an unused memory area, 102 * fatal (maybe it is on an unused memory area,
103 * or the memory controller could recover from 103 * or the memory controller could recover from
104 * it for example, by re-trying the operation). 104 * it for example, by re-trying the operation).
105 * @HW_EVENT_ERR_FATAL: Fatal Error - Uncorrected error that could not 105 * @HW_EVENT_ERR_FATAL: Fatal Error - Uncorrected error that could not
106 * be recovered. 106 * be recovered.
107 */ 107 */
108 enum hw_event_mc_err_type { 108 enum hw_event_mc_err_type {
109 HW_EVENT_ERR_CORRECTED, 109 HW_EVENT_ERR_CORRECTED,
110 HW_EVENT_ERR_UNCORRECTED, 110 HW_EVENT_ERR_UNCORRECTED,
111 HW_EVENT_ERR_FATAL, 111 HW_EVENT_ERR_FATAL,
112 HW_EVENT_ERR_INFO,
112 }; 113 };
114
115 static inline char *mc_event_error_type(const unsigned int err_type)
116 {
117 switch (err_type) {
118 case HW_EVENT_ERR_CORRECTED:
119 return "Corrected";
120 case HW_EVENT_ERR_UNCORRECTED:
121 return "Uncorrected";
122 case HW_EVENT_ERR_FATAL:
123 return "Fatal";
124 default:
125 case HW_EVENT_ERR_INFO:
126 return "Info";
127 }
128 }
113 129
114 /** 130 /**
115 * enum mem_type - memory types. For a more detailed reference, please see 131 * enum mem_type - memory types. For a more detailed reference, please see
116 * http://en.wikipedia.org/wiki/DRAM 132 * http://en.wikipedia.org/wiki/DRAM
117 * 133 *
118 * @MEM_EMPTY Empty csrow 134 * @MEM_EMPTY Empty csrow
119 * @MEM_RESERVED: Reserved csrow type 135 * @MEM_RESERVED: Reserved csrow type
120 * @MEM_UNKNOWN: Unknown csrow type 136 * @MEM_UNKNOWN: Unknown csrow type
121 * @MEM_FPM: FPM - Fast Page Mode, used on systems up to 1995. 137 * @MEM_FPM: FPM - Fast Page Mode, used on systems up to 1995.
122 * @MEM_EDO: EDO - Extended data out, used on systems up to 1998. 138 * @MEM_EDO: EDO - Extended data out, used on systems up to 1998.
123 * @MEM_BEDO: BEDO - Burst Extended data out, an EDO variant. 139 * @MEM_BEDO: BEDO - Burst Extended data out, an EDO variant.
124 * @MEM_SDR: SDR - Single data rate SDRAM 140 * @MEM_SDR: SDR - Single data rate SDRAM
125 * http://en.wikipedia.org/wiki/Synchronous_dynamic_random-access_memory 141 * http://en.wikipedia.org/wiki/Synchronous_dynamic_random-access_memory
126 * They use 3 pins for chip select: Pins 0 and 2 are 142 * They use 3 pins for chip select: Pins 0 and 2 are
127 * for rank 0; pins 1 and 3 are for rank 1, if the memory 143 * for rank 0; pins 1 and 3 are for rank 1, if the memory
128 * is dual-rank. 144 * is dual-rank.
129 * @MEM_RDR: Registered SDR SDRAM 145 * @MEM_RDR: Registered SDR SDRAM
130 * @MEM_DDR: Double data rate SDRAM 146 * @MEM_DDR: Double data rate SDRAM
131 * http://en.wikipedia.org/wiki/DDR_SDRAM 147 * http://en.wikipedia.org/wiki/DDR_SDRAM
132 * @MEM_RDDR: Registered Double data rate SDRAM 148 * @MEM_RDDR: Registered Double data rate SDRAM
133 * This is a variant of the DDR memories. 149 * This is a variant of the DDR memories.
134 * A registered memory has a buffer inside it, hiding 150 * A registered memory has a buffer inside it, hiding
135 * part of the memory details to the memory controller. 151 * part of the memory details to the memory controller.
136 * @MEM_RMBS: Rambus DRAM, used on a few Pentium III/IV controllers. 152 * @MEM_RMBS: Rambus DRAM, used on a few Pentium III/IV controllers.
137 * @MEM_DDR2: DDR2 RAM, as described at JEDEC JESD79-2F. 153 * @MEM_DDR2: DDR2 RAM, as described at JEDEC JESD79-2F.
138 * Those memories are labed as "PC2-" instead of "PC" to 154 * Those memories are labed as "PC2-" instead of "PC" to
139 * differenciate from DDR. 155 * differenciate from DDR.
140 * @MEM_FB_DDR2: Fully-Buffered DDR2, as described at JEDEC Std No. 205 156 * @MEM_FB_DDR2: Fully-Buffered DDR2, as described at JEDEC Std No. 205
141 * and JESD206. 157 * and JESD206.
142 * Those memories are accessed per DIMM slot, and not by 158 * Those memories are accessed per DIMM slot, and not by
143 * a chip select signal. 159 * a chip select signal.
144 * @MEM_RDDR2: Registered DDR2 RAM 160 * @MEM_RDDR2: Registered DDR2 RAM
145 * This is a variant of the DDR2 memories. 161 * This is a variant of the DDR2 memories.
146 * @MEM_XDR: Rambus XDR 162 * @MEM_XDR: Rambus XDR
147 * It is an evolution of the original RAMBUS memories, 163 * It is an evolution of the original RAMBUS memories,
148 * created to compete with DDR2. Weren't used on any 164 * created to compete with DDR2. Weren't used on any
149 * x86 arch, but cell_edac PPC memory controller uses it. 165 * x86 arch, but cell_edac PPC memory controller uses it.
150 * @MEM_DDR3: DDR3 RAM 166 * @MEM_DDR3: DDR3 RAM
151 * @MEM_RDDR3: Registered DDR3 RAM 167 * @MEM_RDDR3: Registered DDR3 RAM
152 * This is a variant of the DDR3 memories. 168 * This is a variant of the DDR3 memories.
153 */ 169 */
154 enum mem_type { 170 enum mem_type {
155 MEM_EMPTY = 0, 171 MEM_EMPTY = 0,
156 MEM_RESERVED, 172 MEM_RESERVED,
157 MEM_UNKNOWN, 173 MEM_UNKNOWN,
158 MEM_FPM, 174 MEM_FPM,
159 MEM_EDO, 175 MEM_EDO,
160 MEM_BEDO, 176 MEM_BEDO,
161 MEM_SDR, 177 MEM_SDR,
162 MEM_RDR, 178 MEM_RDR,
163 MEM_DDR, 179 MEM_DDR,
164 MEM_RDDR, 180 MEM_RDDR,
165 MEM_RMBS, 181 MEM_RMBS,
166 MEM_DDR2, 182 MEM_DDR2,
167 MEM_FB_DDR2, 183 MEM_FB_DDR2,
168 MEM_RDDR2, 184 MEM_RDDR2,
169 MEM_XDR, 185 MEM_XDR,
170 MEM_DDR3, 186 MEM_DDR3,
171 MEM_RDDR3, 187 MEM_RDDR3,
172 }; 188 };
173 189
174 #define MEM_FLAG_EMPTY BIT(MEM_EMPTY) 190 #define MEM_FLAG_EMPTY BIT(MEM_EMPTY)
175 #define MEM_FLAG_RESERVED BIT(MEM_RESERVED) 191 #define MEM_FLAG_RESERVED BIT(MEM_RESERVED)
176 #define MEM_FLAG_UNKNOWN BIT(MEM_UNKNOWN) 192 #define MEM_FLAG_UNKNOWN BIT(MEM_UNKNOWN)
177 #define MEM_FLAG_FPM BIT(MEM_FPM) 193 #define MEM_FLAG_FPM BIT(MEM_FPM)
178 #define MEM_FLAG_EDO BIT(MEM_EDO) 194 #define MEM_FLAG_EDO BIT(MEM_EDO)
179 #define MEM_FLAG_BEDO BIT(MEM_BEDO) 195 #define MEM_FLAG_BEDO BIT(MEM_BEDO)
180 #define MEM_FLAG_SDR BIT(MEM_SDR) 196 #define MEM_FLAG_SDR BIT(MEM_SDR)
181 #define MEM_FLAG_RDR BIT(MEM_RDR) 197 #define MEM_FLAG_RDR BIT(MEM_RDR)
182 #define MEM_FLAG_DDR BIT(MEM_DDR) 198 #define MEM_FLAG_DDR BIT(MEM_DDR)
183 #define MEM_FLAG_RDDR BIT(MEM_RDDR) 199 #define MEM_FLAG_RDDR BIT(MEM_RDDR)
184 #define MEM_FLAG_RMBS BIT(MEM_RMBS) 200 #define MEM_FLAG_RMBS BIT(MEM_RMBS)
185 #define MEM_FLAG_DDR2 BIT(MEM_DDR2) 201 #define MEM_FLAG_DDR2 BIT(MEM_DDR2)
186 #define MEM_FLAG_FB_DDR2 BIT(MEM_FB_DDR2) 202 #define MEM_FLAG_FB_DDR2 BIT(MEM_FB_DDR2)
187 #define MEM_FLAG_RDDR2 BIT(MEM_RDDR2) 203 #define MEM_FLAG_RDDR2 BIT(MEM_RDDR2)
188 #define MEM_FLAG_XDR BIT(MEM_XDR) 204 #define MEM_FLAG_XDR BIT(MEM_XDR)
189 #define MEM_FLAG_DDR3 BIT(MEM_DDR3) 205 #define MEM_FLAG_DDR3 BIT(MEM_DDR3)
190 #define MEM_FLAG_RDDR3 BIT(MEM_RDDR3) 206 #define MEM_FLAG_RDDR3 BIT(MEM_RDDR3)
191 207
192 /** 208 /**
193 * enum edac-type - Error Detection and Correction capabilities and mode 209 * enum edac-type - Error Detection and Correction capabilities and mode
194 * @EDAC_UNKNOWN: Unknown if ECC is available 210 * @EDAC_UNKNOWN: Unknown if ECC is available
195 * @EDAC_NONE: Doesn't support ECC 211 * @EDAC_NONE: Doesn't support ECC
196 * @EDAC_RESERVED: Reserved ECC type 212 * @EDAC_RESERVED: Reserved ECC type
197 * @EDAC_PARITY: Detects parity errors 213 * @EDAC_PARITY: Detects parity errors
198 * @EDAC_EC: Error Checking - no correction 214 * @EDAC_EC: Error Checking - no correction
199 * @EDAC_SECDED: Single bit error correction, Double detection 215 * @EDAC_SECDED: Single bit error correction, Double detection
200 * @EDAC_S2ECD2ED: Chipkill x2 devices - do these exist? 216 * @EDAC_S2ECD2ED: Chipkill x2 devices - do these exist?
201 * @EDAC_S4ECD4ED: Chipkill x4 devices 217 * @EDAC_S4ECD4ED: Chipkill x4 devices
202 * @EDAC_S8ECD8ED: Chipkill x8 devices 218 * @EDAC_S8ECD8ED: Chipkill x8 devices
203 * @EDAC_S16ECD16ED: Chipkill x16 devices 219 * @EDAC_S16ECD16ED: Chipkill x16 devices
204 */ 220 */
205 enum edac_type { 221 enum edac_type {
206 EDAC_UNKNOWN = 0, 222 EDAC_UNKNOWN = 0,
207 EDAC_NONE, 223 EDAC_NONE,
208 EDAC_RESERVED, 224 EDAC_RESERVED,
209 EDAC_PARITY, 225 EDAC_PARITY,
210 EDAC_EC, 226 EDAC_EC,
211 EDAC_SECDED, 227 EDAC_SECDED,
212 EDAC_S2ECD2ED, 228 EDAC_S2ECD2ED,
213 EDAC_S4ECD4ED, 229 EDAC_S4ECD4ED,
214 EDAC_S8ECD8ED, 230 EDAC_S8ECD8ED,
215 EDAC_S16ECD16ED, 231 EDAC_S16ECD16ED,
216 }; 232 };
217 233
218 #define EDAC_FLAG_UNKNOWN BIT(EDAC_UNKNOWN) 234 #define EDAC_FLAG_UNKNOWN BIT(EDAC_UNKNOWN)
219 #define EDAC_FLAG_NONE BIT(EDAC_NONE) 235 #define EDAC_FLAG_NONE BIT(EDAC_NONE)
220 #define EDAC_FLAG_PARITY BIT(EDAC_PARITY) 236 #define EDAC_FLAG_PARITY BIT(EDAC_PARITY)
221 #define EDAC_FLAG_EC BIT(EDAC_EC) 237 #define EDAC_FLAG_EC BIT(EDAC_EC)
222 #define EDAC_FLAG_SECDED BIT(EDAC_SECDED) 238 #define EDAC_FLAG_SECDED BIT(EDAC_SECDED)
223 #define EDAC_FLAG_S2ECD2ED BIT(EDAC_S2ECD2ED) 239 #define EDAC_FLAG_S2ECD2ED BIT(EDAC_S2ECD2ED)
224 #define EDAC_FLAG_S4ECD4ED BIT(EDAC_S4ECD4ED) 240 #define EDAC_FLAG_S4ECD4ED BIT(EDAC_S4ECD4ED)
225 #define EDAC_FLAG_S8ECD8ED BIT(EDAC_S8ECD8ED) 241 #define EDAC_FLAG_S8ECD8ED BIT(EDAC_S8ECD8ED)
226 #define EDAC_FLAG_S16ECD16ED BIT(EDAC_S16ECD16ED) 242 #define EDAC_FLAG_S16ECD16ED BIT(EDAC_S16ECD16ED)
227 243
228 /** 244 /**
229 * enum scrub_type - scrubbing capabilities 245 * enum scrub_type - scrubbing capabilities
230 * @SCRUB_UNKNOWN Unknown if scrubber is available 246 * @SCRUB_UNKNOWN Unknown if scrubber is available
231 * @SCRUB_NONE: No scrubber 247 * @SCRUB_NONE: No scrubber
232 * @SCRUB_SW_PROG: SW progressive (sequential) scrubbing 248 * @SCRUB_SW_PROG: SW progressive (sequential) scrubbing
233 * @SCRUB_SW_SRC: Software scrub only errors 249 * @SCRUB_SW_SRC: Software scrub only errors
234 * @SCRUB_SW_PROG_SRC: Progressive software scrub from an error 250 * @SCRUB_SW_PROG_SRC: Progressive software scrub from an error
235 * @SCRUB_SW_TUNABLE: Software scrub frequency is tunable 251 * @SCRUB_SW_TUNABLE: Software scrub frequency is tunable
236 * @SCRUB_HW_PROG: HW progressive (sequential) scrubbing 252 * @SCRUB_HW_PROG: HW progressive (sequential) scrubbing
237 * @SCRUB_HW_SRC: Hardware scrub only errors 253 * @SCRUB_HW_SRC: Hardware scrub only errors
238 * @SCRUB_HW_PROG_SRC: Progressive hardware scrub from an error 254 * @SCRUB_HW_PROG_SRC: Progressive hardware scrub from an error
239 * SCRUB_HW_TUNABLE: Hardware scrub frequency is tunable 255 * SCRUB_HW_TUNABLE: Hardware scrub frequency is tunable
240 */ 256 */
241 enum scrub_type { 257 enum scrub_type {
242 SCRUB_UNKNOWN = 0, 258 SCRUB_UNKNOWN = 0,
243 SCRUB_NONE, 259 SCRUB_NONE,
244 SCRUB_SW_PROG, 260 SCRUB_SW_PROG,
245 SCRUB_SW_SRC, 261 SCRUB_SW_SRC,
246 SCRUB_SW_PROG_SRC, 262 SCRUB_SW_PROG_SRC,
247 SCRUB_SW_TUNABLE, 263 SCRUB_SW_TUNABLE,
248 SCRUB_HW_PROG, 264 SCRUB_HW_PROG,
249 SCRUB_HW_SRC, 265 SCRUB_HW_SRC,
250 SCRUB_HW_PROG_SRC, 266 SCRUB_HW_PROG_SRC,
251 SCRUB_HW_TUNABLE 267 SCRUB_HW_TUNABLE
252 }; 268 };
253 269
254 #define SCRUB_FLAG_SW_PROG BIT(SCRUB_SW_PROG) 270 #define SCRUB_FLAG_SW_PROG BIT(SCRUB_SW_PROG)
255 #define SCRUB_FLAG_SW_SRC BIT(SCRUB_SW_SRC) 271 #define SCRUB_FLAG_SW_SRC BIT(SCRUB_SW_SRC)
256 #define SCRUB_FLAG_SW_PROG_SRC BIT(SCRUB_SW_PROG_SRC) 272 #define SCRUB_FLAG_SW_PROG_SRC BIT(SCRUB_SW_PROG_SRC)
257 #define SCRUB_FLAG_SW_TUN BIT(SCRUB_SW_SCRUB_TUNABLE) 273 #define SCRUB_FLAG_SW_TUN BIT(SCRUB_SW_SCRUB_TUNABLE)
258 #define SCRUB_FLAG_HW_PROG BIT(SCRUB_HW_PROG) 274 #define SCRUB_FLAG_HW_PROG BIT(SCRUB_HW_PROG)
259 #define SCRUB_FLAG_HW_SRC BIT(SCRUB_HW_SRC) 275 #define SCRUB_FLAG_HW_SRC BIT(SCRUB_HW_SRC)
260 #define SCRUB_FLAG_HW_PROG_SRC BIT(SCRUB_HW_PROG_SRC) 276 #define SCRUB_FLAG_HW_PROG_SRC BIT(SCRUB_HW_PROG_SRC)
261 #define SCRUB_FLAG_HW_TUN BIT(SCRUB_HW_TUNABLE) 277 #define SCRUB_FLAG_HW_TUN BIT(SCRUB_HW_TUNABLE)
262 278
263 /* FIXME - should have notify capabilities: NMI, LOG, PROC, etc */ 279 /* FIXME - should have notify capabilities: NMI, LOG, PROC, etc */
264 280
265 /* EDAC internal operation states */ 281 /* EDAC internal operation states */
266 #define OP_ALLOC 0x100 282 #define OP_ALLOC 0x100
267 #define OP_RUNNING_POLL 0x201 283 #define OP_RUNNING_POLL 0x201
268 #define OP_RUNNING_INTERRUPT 0x202 284 #define OP_RUNNING_INTERRUPT 0x202
269 #define OP_RUNNING_POLL_INTR 0x203 285 #define OP_RUNNING_POLL_INTR 0x203
270 #define OP_OFFLINE 0x300 286 #define OP_OFFLINE 0x300
271 287
272 /* 288 /*
273 * Concepts used at the EDAC subsystem 289 * Concepts used at the EDAC subsystem
274 * 290 *
275 * There are several things to be aware of that aren't at all obvious: 291 * There are several things to be aware of that aren't at all obvious:
276 * 292 *
277 * SOCKETS, SOCKET SETS, BANKS, ROWS, CHIP-SELECT ROWS, CHANNELS, etc.. 293 * SOCKETS, SOCKET SETS, BANKS, ROWS, CHIP-SELECT ROWS, CHANNELS, etc..
278 * 294 *
279 * These are some of the many terms that are thrown about that don't always 295 * These are some of the many terms that are thrown about that don't always
280 * mean what people think they mean (Inconceivable!). In the interest of 296 * mean what people think they mean (Inconceivable!). In the interest of
281 * creating a common ground for discussion, terms and their definitions 297 * creating a common ground for discussion, terms and their definitions
282 * will be established. 298 * will be established.
283 * 299 *
284 * Memory devices: The individual DRAM chips on a memory stick. These 300 * Memory devices: The individual DRAM chips on a memory stick. These
285 * devices commonly output 4 and 8 bits each (x4, x8). 301 * devices commonly output 4 and 8 bits each (x4, x8).
286 * Grouping several of these in parallel provides the 302 * Grouping several of these in parallel provides the
287 * number of bits that the memory controller expects: 303 * number of bits that the memory controller expects:
288 * typically 72 bits, in order to provide 64 bits + 304 * typically 72 bits, in order to provide 64 bits +
289 * 8 bits of ECC data. 305 * 8 bits of ECC data.
290 * 306 *
291 * Memory Stick: A printed circuit board that aggregates multiple 307 * Memory Stick: A printed circuit board that aggregates multiple
292 * memory devices in parallel. In general, this is the 308 * memory devices in parallel. In general, this is the
293 * Field Replaceable Unit (FRU) which gets replaced, in 309 * Field Replaceable Unit (FRU) which gets replaced, in
294 * the case of excessive errors. Most often it is also 310 * the case of excessive errors. Most often it is also
295 * called DIMM (Dual Inline Memory Module). 311 * called DIMM (Dual Inline Memory Module).
296 * 312 *
297 * Memory Socket: A physical connector on the motherboard that accepts 313 * Memory Socket: A physical connector on the motherboard that accepts
298 * a single memory stick. Also called as "slot" on several 314 * a single memory stick. Also called as "slot" on several
299 * datasheets. 315 * datasheets.
300 * 316 *
301 * Channel: A memory controller channel, responsible to communicate 317 * Channel: A memory controller channel, responsible to communicate
302 * with a group of DIMMs. Each channel has its own 318 * with a group of DIMMs. Each channel has its own
303 * independent control (command) and data bus, and can 319 * independent control (command) and data bus, and can
304 * be used independently or grouped with other channels. 320 * be used independently or grouped with other channels.
305 * 321 *
306 * Branch: It is typically the highest hierarchy on a 322 * Branch: It is typically the highest hierarchy on a
307 * Fully-Buffered DIMM memory controller. 323 * Fully-Buffered DIMM memory controller.
308 * Typically, it contains two channels. 324 * Typically, it contains two channels.
309 * Two channels at the same branch can be used in single 325 * Two channels at the same branch can be used in single
310 * mode or in lockstep mode. 326 * mode or in lockstep mode.
311 * When lockstep is enabled, the cacheline is doubled, 327 * When lockstep is enabled, the cacheline is doubled,
312 * but it generally brings some performance penalty. 328 * but it generally brings some performance penalty.
313 * Also, it is generally not possible to point to just one 329 * Also, it is generally not possible to point to just one
314 * memory stick when an error occurs, as the error 330 * memory stick when an error occurs, as the error
315 * correction code is calculated using two DIMMs instead 331 * correction code is calculated using two DIMMs instead
316 * of one. Due to that, it is capable of correcting more 332 * of one. Due to that, it is capable of correcting more
317 * errors than on single mode. 333 * errors than on single mode.
318 * 334 *
319 * Single-channel: The data accessed by the memory controller is contained 335 * Single-channel: The data accessed by the memory controller is contained
320 * into one dimm only. E. g. if the data is 64 bits-wide, 336 * into one dimm only. E. g. if the data is 64 bits-wide,
321 * the data flows to the CPU using one 64 bits parallel 337 * the data flows to the CPU using one 64 bits parallel
322 * access. 338 * access.
323 * Typically used with SDR, DDR, DDR2 and DDR3 memories. 339 * Typically used with SDR, DDR, DDR2 and DDR3 memories.
324 * FB-DIMM and RAMBUS use a different concept for channel, 340 * FB-DIMM and RAMBUS use a different concept for channel,
325 * so this concept doesn't apply there. 341 * so this concept doesn't apply there.
326 * 342 *
327 * Double-channel: The data size accessed by the memory controller is 343 * Double-channel: The data size accessed by the memory controller is
328 * interlaced into two dimms, accessed at the same time. 344 * interlaced into two dimms, accessed at the same time.
329 * E. g. if the DIMM is 64 bits-wide (72 bits with ECC), 345 * E. g. if the DIMM is 64 bits-wide (72 bits with ECC),
330 * the data flows to the CPU using a 128 bits parallel 346 * the data flows to the CPU using a 128 bits parallel
331 * access. 347 * access.
332 * 348 *
333 * Chip-select row: This is the name of the DRAM signal used to select the 349 * Chip-select row: This is the name of the DRAM signal used to select the
334 * DRAM ranks to be accessed. Common chip-select rows for 350 * DRAM ranks to be accessed. Common chip-select rows for
335 * single channel are 64 bits, for dual channel 128 bits. 351 * single channel are 64 bits, for dual channel 128 bits.
336 * It may not be visible by the memory controller, as some 352 * It may not be visible by the memory controller, as some
337 * DIMM types have a memory buffer that can hide direct 353 * DIMM types have a memory buffer that can hide direct
338 * access to it from the Memory Controller. 354 * access to it from the Memory Controller.
339 * 355 *
340 * Single-Ranked stick: A Single-ranked stick has 1 chip-select row of memory. 356 * Single-Ranked stick: A Single-ranked stick has 1 chip-select row of memory.
341 * Motherboards commonly drive two chip-select pins to 357 * Motherboards commonly drive two chip-select pins to
342 * a memory stick. A single-ranked stick, will occupy 358 * a memory stick. A single-ranked stick, will occupy
343 * only one of those rows. The other will be unused. 359 * only one of those rows. The other will be unused.
344 * 360 *
345 * Double-Ranked stick: A double-ranked stick has two chip-select rows which 361 * Double-Ranked stick: A double-ranked stick has two chip-select rows which
346 * access different sets of memory devices. The two 362 * access different sets of memory devices. The two
347 * rows cannot be accessed concurrently. 363 * rows cannot be accessed concurrently.
348 * 364 *
349 * Double-sided stick: DEPRECATED TERM, see Double-Ranked stick. 365 * Double-sided stick: DEPRECATED TERM, see Double-Ranked stick.
350 * A double-sided stick has two chip-select rows which 366 * A double-sided stick has two chip-select rows which
351 * access different sets of memory devices. The two 367 * access different sets of memory devices. The two
352 * rows cannot be accessed concurrently. "Double-sided" 368 * rows cannot be accessed concurrently. "Double-sided"
353 * is irrespective of the memory devices being mounted 369 * is irrespective of the memory devices being mounted
354 * on both sides of the memory stick. 370 * on both sides of the memory stick.
355 * 371 *
356 * Socket set: All of the memory sticks that are required for 372 * Socket set: All of the memory sticks that are required for
357 * a single memory access or all of the memory sticks 373 * a single memory access or all of the memory sticks
358 * spanned by a chip-select row. A single socket set 374 * spanned by a chip-select row. A single socket set
359 * has two chip-select rows and if double-sided sticks 375 * has two chip-select rows and if double-sided sticks
360 * are used these will occupy those chip-select rows. 376 * are used these will occupy those chip-select rows.
361 * 377 *
362 * Bank: This term is avoided because it is unclear when 378 * Bank: This term is avoided because it is unclear when
363 * needing to distinguish between chip-select rows and 379 * needing to distinguish between chip-select rows and
364 * socket sets. 380 * socket sets.
365 * 381 *
366 * Controller pages: 382 * Controller pages:
367 * 383 *
368 * Physical pages: 384 * Physical pages:
369 * 385 *
370 * Virtual pages: 386 * Virtual pages:
371 * 387 *
372 * 388 *
373 * STRUCTURE ORGANIZATION AND CHOICES 389 * STRUCTURE ORGANIZATION AND CHOICES
374 * 390 *
375 * 391 *
376 * 392 *
377 * PS - I enjoyed writing all that about as much as you enjoyed reading it. 393 * PS - I enjoyed writing all that about as much as you enjoyed reading it.
378 */ 394 */
379 395
380 /** 396 /**
381 * enum edac_mc_layer - memory controller hierarchy layer 397 * enum edac_mc_layer - memory controller hierarchy layer
382 * 398 *
383 * @EDAC_MC_LAYER_BRANCH: memory layer is named "branch" 399 * @EDAC_MC_LAYER_BRANCH: memory layer is named "branch"
384 * @EDAC_MC_LAYER_CHANNEL: memory layer is named "channel" 400 * @EDAC_MC_LAYER_CHANNEL: memory layer is named "channel"
385 * @EDAC_MC_LAYER_SLOT: memory layer is named "slot" 401 * @EDAC_MC_LAYER_SLOT: memory layer is named "slot"
386 * @EDAC_MC_LAYER_CHIP_SELECT: memory layer is named "chip select" 402 * @EDAC_MC_LAYER_CHIP_SELECT: memory layer is named "chip select"
387 * @EDAC_MC_LAYER_ALL_MEM: memory layout is unknown. All memory is mapped 403 * @EDAC_MC_LAYER_ALL_MEM: memory layout is unknown. All memory is mapped
388 * as a single memory area. This is used when 404 * as a single memory area. This is used when
389 * retrieving errors from a firmware driven driver. 405 * retrieving errors from a firmware driven driver.
390 * 406 *
391 * This enum is used by the drivers to tell edac_mc_sysfs what name should 407 * This enum is used by the drivers to tell edac_mc_sysfs what name should
392 * be used when describing a memory stick location. 408 * be used when describing a memory stick location.
393 */ 409 */
394 enum edac_mc_layer_type { 410 enum edac_mc_layer_type {
395 EDAC_MC_LAYER_BRANCH, 411 EDAC_MC_LAYER_BRANCH,
396 EDAC_MC_LAYER_CHANNEL, 412 EDAC_MC_LAYER_CHANNEL,
397 EDAC_MC_LAYER_SLOT, 413 EDAC_MC_LAYER_SLOT,
398 EDAC_MC_LAYER_CHIP_SELECT, 414 EDAC_MC_LAYER_CHIP_SELECT,
399 EDAC_MC_LAYER_ALL_MEM, 415 EDAC_MC_LAYER_ALL_MEM,
400 }; 416 };
401 417
402 /** 418 /**
403 * struct edac_mc_layer - describes the memory controller hierarchy 419 * struct edac_mc_layer - describes the memory controller hierarchy
404 * @layer: layer type 420 * @layer: layer type
405 * @size: number of components per layer. For example, 421 * @size: number of components per layer. For example,
406 * if the channel layer has two channels, size = 2 422 * if the channel layer has two channels, size = 2
407 * @is_virt_csrow: This layer is part of the "csrow" when old API 423 * @is_virt_csrow: This layer is part of the "csrow" when old API
408 * compatibility mode is enabled. Otherwise, it is 424 * compatibility mode is enabled. Otherwise, it is
409 * a channel 425 * a channel
410 */ 426 */
411 struct edac_mc_layer { 427 struct edac_mc_layer {
412 enum edac_mc_layer_type type; 428 enum edac_mc_layer_type type;
413 unsigned size; 429 unsigned size;
414 bool is_virt_csrow; 430 bool is_virt_csrow;
415 }; 431 };
416 432
417 /* 433 /*
418 * Maximum number of layers used by the memory controller to uniquely 434 * Maximum number of layers used by the memory controller to uniquely
419 * identify a single memory stick. 435 * identify a single memory stick.
420 * NOTE: Changing this constant requires not only to change the constant 436 * NOTE: Changing this constant requires not only to change the constant
421 * below, but also to change the existing code at the core, as there are 437 * below, but also to change the existing code at the core, as there are
422 * some code there that are optimized for 3 layers. 438 * some code there that are optimized for 3 layers.
423 */ 439 */
424 #define EDAC_MAX_LAYERS 3 440 #define EDAC_MAX_LAYERS 3
425 441
426 /** 442 /**
427 * EDAC_DIMM_OFF - Macro responsible to get a pointer offset inside a pointer array 443 * EDAC_DIMM_OFF - Macro responsible to get a pointer offset inside a pointer array
428 * for the element given by [layer0,layer1,layer2] position 444 * for the element given by [layer0,layer1,layer2] position
429 * 445 *
430 * @layers: a struct edac_mc_layer array, describing how many elements 446 * @layers: a struct edac_mc_layer array, describing how many elements
431 * were allocated for each layer 447 * were allocated for each layer
432 * @n_layers: Number of layers at the @layers array 448 * @n_layers: Number of layers at the @layers array
433 * @layer0: layer0 position 449 * @layer0: layer0 position
434 * @layer1: layer1 position. Unused if n_layers < 2 450 * @layer1: layer1 position. Unused if n_layers < 2
435 * @layer2: layer2 position. Unused if n_layers < 3 451 * @layer2: layer2 position. Unused if n_layers < 3
436 * 452 *
437 * For 1 layer, this macro returns &var[layer0] - &var 453 * For 1 layer, this macro returns &var[layer0] - &var
438 * For 2 layers, this macro is similar to allocate a bi-dimensional array 454 * For 2 layers, this macro is similar to allocate a bi-dimensional array
439 * and to return "&var[layer0][layer1] - &var" 455 * and to return "&var[layer0][layer1] - &var"
440 * For 3 layers, this macro is similar to allocate a tri-dimensional array 456 * For 3 layers, this macro is similar to allocate a tri-dimensional array
441 * and to return "&var[layer0][layer1][layer2] - &var" 457 * and to return "&var[layer0][layer1][layer2] - &var"
442 * 458 *
443 * A loop could be used here to make it more generic, but, as we only have 459 * A loop could be used here to make it more generic, but, as we only have
444 * 3 layers, this is a little faster. 460 * 3 layers, this is a little faster.
445 * By design, layers can never be 0 or more than 3. If that ever happens, 461 * By design, layers can never be 0 or more than 3. If that ever happens,
446 * a NULL is returned, causing an OOPS during the memory allocation routine, 462 * a NULL is returned, causing an OOPS during the memory allocation routine,
447 * with would point to the developer that he's doing something wrong. 463 * with would point to the developer that he's doing something wrong.
448 */ 464 */
449 #define EDAC_DIMM_OFF(layers, nlayers, layer0, layer1, layer2) ({ \ 465 #define EDAC_DIMM_OFF(layers, nlayers, layer0, layer1, layer2) ({ \
450 int __i; \ 466 int __i; \
451 if ((nlayers) == 1) \ 467 if ((nlayers) == 1) \
452 __i = layer0; \ 468 __i = layer0; \
453 else if ((nlayers) == 2) \ 469 else if ((nlayers) == 2) \
454 __i = (layer1) + ((layers[1]).size * (layer0)); \ 470 __i = (layer1) + ((layers[1]).size * (layer0)); \
455 else if ((nlayers) == 3) \ 471 else if ((nlayers) == 3) \
456 __i = (layer2) + ((layers[2]).size * ((layer1) + \ 472 __i = (layer2) + ((layers[2]).size * ((layer1) + \
457 ((layers[1]).size * (layer0)))); \ 473 ((layers[1]).size * (layer0)))); \
458 else \ 474 else \
459 __i = -EINVAL; \ 475 __i = -EINVAL; \
460 __i; \ 476 __i; \
461 }) 477 })
462 478
463 /** 479 /**
464 * EDAC_DIMM_PTR - Macro responsible to get a pointer inside a pointer array 480 * EDAC_DIMM_PTR - Macro responsible to get a pointer inside a pointer array
465 * for the element given by [layer0,layer1,layer2] position 481 * for the element given by [layer0,layer1,layer2] position
466 * 482 *
467 * @layers: a struct edac_mc_layer array, describing how many elements 483 * @layers: a struct edac_mc_layer array, describing how many elements
468 * were allocated for each layer 484 * were allocated for each layer
469 * @var: name of the var where we want to get the pointer 485 * @var: name of the var where we want to get the pointer
470 * (like mci->dimms) 486 * (like mci->dimms)
471 * @n_layers: Number of layers at the @layers array 487 * @n_layers: Number of layers at the @layers array
472 * @layer0: layer0 position 488 * @layer0: layer0 position
473 * @layer1: layer1 position. Unused if n_layers < 2 489 * @layer1: layer1 position. Unused if n_layers < 2
474 * @layer2: layer2 position. Unused if n_layers < 3 490 * @layer2: layer2 position. Unused if n_layers < 3
475 * 491 *
476 * For 1 layer, this macro returns &var[layer0] 492 * For 1 layer, this macro returns &var[layer0]
477 * For 2 layers, this macro is similar to allocate a bi-dimensional array 493 * For 2 layers, this macro is similar to allocate a bi-dimensional array
478 * and to return "&var[layer0][layer1]" 494 * and to return "&var[layer0][layer1]"
479 * For 3 layers, this macro is similar to allocate a tri-dimensional array 495 * For 3 layers, this macro is similar to allocate a tri-dimensional array
480 * and to return "&var[layer0][layer1][layer2]" 496 * and to return "&var[layer0][layer1][layer2]"
481 */ 497 */
482 #define EDAC_DIMM_PTR(layers, var, nlayers, layer0, layer1, layer2) ({ \ 498 #define EDAC_DIMM_PTR(layers, var, nlayers, layer0, layer1, layer2) ({ \
483 typeof(*var) __p; \ 499 typeof(*var) __p; \
484 int ___i = EDAC_DIMM_OFF(layers, nlayers, layer0, layer1, layer2); \ 500 int ___i = EDAC_DIMM_OFF(layers, nlayers, layer0, layer1, layer2); \
485 if (___i < 0) \ 501 if (___i < 0) \
486 __p = NULL; \ 502 __p = NULL; \
487 else \ 503 else \
488 __p = (var)[___i]; \ 504 __p = (var)[___i]; \
489 __p; \ 505 __p; \
490 }) 506 })
491 507
492 struct dimm_info { 508 struct dimm_info {
493 struct device dev; 509 struct device dev;
494 510
495 char label[EDAC_MC_LABEL_LEN + 1]; /* DIMM label on motherboard */ 511 char label[EDAC_MC_LABEL_LEN + 1]; /* DIMM label on motherboard */
496 512
497 /* Memory location data */ 513 /* Memory location data */
498 unsigned location[EDAC_MAX_LAYERS]; 514 unsigned location[EDAC_MAX_LAYERS];
499 515
500 struct mem_ctl_info *mci; /* the parent */ 516 struct mem_ctl_info *mci; /* the parent */
501 517
502 u32 grain; /* granularity of reported error in bytes */ 518 u32 grain; /* granularity of reported error in bytes */
503 enum dev_type dtype; /* memory device type */ 519 enum dev_type dtype; /* memory device type */
504 enum mem_type mtype; /* memory dimm type */ 520 enum mem_type mtype; /* memory dimm type */
505 enum edac_type edac_mode; /* EDAC mode for this dimm */ 521 enum edac_type edac_mode; /* EDAC mode for this dimm */
506 522
507 u32 nr_pages; /* number of pages on this dimm */ 523 u32 nr_pages; /* number of pages on this dimm */
508 524
509 unsigned csrow, cschannel; /* Points to the old API data */ 525 unsigned csrow, cschannel; /* Points to the old API data */
510 }; 526 };
511 527
512 /** 528 /**
513 * struct rank_info - contains the information for one DIMM rank 529 * struct rank_info - contains the information for one DIMM rank
514 * 530 *
515 * @chan_idx: channel number where the rank is (typically, 0 or 1) 531 * @chan_idx: channel number where the rank is (typically, 0 or 1)
516 * @ce_count: number of correctable errors for this rank 532 * @ce_count: number of correctable errors for this rank
517 * @csrow: A pointer to the chip select row structure (the parent 533 * @csrow: A pointer to the chip select row structure (the parent
518 * structure). The location of the rank is given by 534 * structure). The location of the rank is given by
519 * the (csrow->csrow_idx, chan_idx) vector. 535 * the (csrow->csrow_idx, chan_idx) vector.
520 * @dimm: A pointer to the DIMM structure, where the DIMM label 536 * @dimm: A pointer to the DIMM structure, where the DIMM label
521 * information is stored. 537 * information is stored.
522 * 538 *
523 * FIXME: Currently, the EDAC core model will assume one DIMM per rank. 539 * FIXME: Currently, the EDAC core model will assume one DIMM per rank.
524 * This is a bad assumption, but it makes this patch easier. Later 540 * This is a bad assumption, but it makes this patch easier. Later
525 * patches in this series will fix this issue. 541 * patches in this series will fix this issue.
526 */ 542 */
527 struct rank_info { 543 struct rank_info {
528 int chan_idx; 544 int chan_idx;
529 struct csrow_info *csrow; 545 struct csrow_info *csrow;
530 struct dimm_info *dimm; 546 struct dimm_info *dimm;
531 547
532 u32 ce_count; /* Correctable Errors for this csrow */ 548 u32 ce_count; /* Correctable Errors for this csrow */
533 }; 549 };
534 550
535 struct csrow_info { 551 struct csrow_info {
536 struct device dev; 552 struct device dev;
537 553
538 /* Used only by edac_mc_find_csrow_by_page() */ 554 /* Used only by edac_mc_find_csrow_by_page() */
539 unsigned long first_page; /* first page number in csrow */ 555 unsigned long first_page; /* first page number in csrow */
540 unsigned long last_page; /* last page number in csrow */ 556 unsigned long last_page; /* last page number in csrow */
541 unsigned long page_mask; /* used for interleaving - 557 unsigned long page_mask; /* used for interleaving -
542 * 0UL for non intlv */ 558 * 0UL for non intlv */
543 559
544 int csrow_idx; /* the chip-select row */ 560 int csrow_idx; /* the chip-select row */
545 561
546 u32 ue_count; /* Uncorrectable Errors for this csrow */ 562 u32 ue_count; /* Uncorrectable Errors for this csrow */
547 u32 ce_count; /* Correctable Errors for this csrow */ 563 u32 ce_count; /* Correctable Errors for this csrow */
548 u32 nr_pages; /* combined pages count of all channels */ 564 u32 nr_pages; /* combined pages count of all channels */
549 565
550 struct mem_ctl_info *mci; /* the parent */ 566 struct mem_ctl_info *mci; /* the parent */
551 567
552 /* channel information for this csrow */ 568 /* channel information for this csrow */
553 u32 nr_channels; 569 u32 nr_channels;
554 struct rank_info **channels; 570 struct rank_info **channels;
555 }; 571 };
556 572
557 /* 573 /*
558 * struct errcount_attribute - used to store the several error counts 574 * struct errcount_attribute - used to store the several error counts
559 */ 575 */
560 struct errcount_attribute_data { 576 struct errcount_attribute_data {
561 int n_layers; 577 int n_layers;
562 int pos[EDAC_MAX_LAYERS]; 578 int pos[EDAC_MAX_LAYERS];
563 int layer0, layer1, layer2; 579 int layer0, layer1, layer2;
564 }; 580 };
565 581
566 /** 582 /**
567 * edac_raw_error_desc - Raw error report structure 583 * edac_raw_error_desc - Raw error report structure
568 * @grain: minimum granularity for an error report, in bytes 584 * @grain: minimum granularity for an error report, in bytes
569 * @error_count: number of errors of the same type 585 * @error_count: number of errors of the same type
570 * @top_layer: top layer of the error (layer[0]) 586 * @top_layer: top layer of the error (layer[0])
571 * @mid_layer: middle layer of the error (layer[1]) 587 * @mid_layer: middle layer of the error (layer[1])
572 * @low_layer: low layer of the error (layer[2]) 588 * @low_layer: low layer of the error (layer[2])
573 * @page_frame_number: page where the error happened 589 * @page_frame_number: page where the error happened
574 * @offset_in_page: page offset 590 * @offset_in_page: page offset
575 * @syndrome: syndrome of the error (or 0 if unknown or if 591 * @syndrome: syndrome of the error (or 0 if unknown or if
576 * the syndrome is not applicable) 592 * the syndrome is not applicable)
577 * @msg: error message 593 * @msg: error message
578 * @location: location of the error 594 * @location: location of the error
579 * @label: label of the affected DIMM(s) 595 * @label: label of the affected DIMM(s)
580 * @other_detail: other driver-specific detail about the error 596 * @other_detail: other driver-specific detail about the error
581 * @enable_per_layer_report: if false, the error affects all layers 597 * @enable_per_layer_report: if false, the error affects all layers
582 * (typically, a memory controller error) 598 * (typically, a memory controller error)
583 */ 599 */
584 struct edac_raw_error_desc { 600 struct edac_raw_error_desc {
585 /* 601 /*
586 * NOTE: everything before grain won't be cleaned by 602 * NOTE: everything before grain won't be cleaned by
587 * edac_raw_error_desc_clean() 603 * edac_raw_error_desc_clean()
588 */ 604 */
589 char location[LOCATION_SIZE]; 605 char location[LOCATION_SIZE];
590 char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * EDAC_MAX_LABELS]; 606 char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * EDAC_MAX_LABELS];
591 long grain; 607 long grain;
592 608
593 /* the vars below and grain will be cleaned on every new error report */ 609 /* the vars below and grain will be cleaned on every new error report */
594 u16 error_count; 610 u16 error_count;
595 int top_layer; 611 int top_layer;
596 int mid_layer; 612 int mid_layer;
597 int low_layer; 613 int low_layer;
598 unsigned long page_frame_number; 614 unsigned long page_frame_number;
599 unsigned long offset_in_page; 615 unsigned long offset_in_page;
600 unsigned long syndrome; 616 unsigned long syndrome;
601 const char *msg; 617 const char *msg;
602 const char *other_detail; 618 const char *other_detail;
603 bool enable_per_layer_report; 619 bool enable_per_layer_report;
604 }; 620 };
605 621
606 /* MEMORY controller information structure 622 /* MEMORY controller information structure
607 */ 623 */
608 struct mem_ctl_info { 624 struct mem_ctl_info {
609 struct device dev; 625 struct device dev;
610 struct bus_type bus; 626 struct bus_type bus;
611 627
612 struct list_head link; /* for global list of mem_ctl_info structs */ 628 struct list_head link; /* for global list of mem_ctl_info structs */
613 629
614 struct module *owner; /* Module owner of this control struct */ 630 struct module *owner; /* Module owner of this control struct */
615 631
616 unsigned long mtype_cap; /* memory types supported by mc */ 632 unsigned long mtype_cap; /* memory types supported by mc */
617 unsigned long edac_ctl_cap; /* Mem controller EDAC capabilities */ 633 unsigned long edac_ctl_cap; /* Mem controller EDAC capabilities */
618 unsigned long edac_cap; /* configuration capabilities - this is 634 unsigned long edac_cap; /* configuration capabilities - this is
619 * closely related to edac_ctl_cap. The 635 * closely related to edac_ctl_cap. The
620 * difference is that the controller may be 636 * difference is that the controller may be
621 * capable of s4ecd4ed which would be listed 637 * capable of s4ecd4ed which would be listed
622 * in edac_ctl_cap, but if channels aren't 638 * in edac_ctl_cap, but if channels aren't
623 * capable of s4ecd4ed then the edac_cap would 639 * capable of s4ecd4ed then the edac_cap would
624 * not have that capability. 640 * not have that capability.
625 */ 641 */
626 unsigned long scrub_cap; /* chipset scrub capabilities */ 642 unsigned long scrub_cap; /* chipset scrub capabilities */
627 enum scrub_type scrub_mode; /* current scrub mode */ 643 enum scrub_type scrub_mode; /* current scrub mode */
628 644
629 /* Translates sdram memory scrub rate given in bytes/sec to the 645 /* Translates sdram memory scrub rate given in bytes/sec to the
630 internal representation and configures whatever else needs 646 internal representation and configures whatever else needs
631 to be configured. 647 to be configured.
632 */ 648 */
633 int (*set_sdram_scrub_rate) (struct mem_ctl_info * mci, u32 bw); 649 int (*set_sdram_scrub_rate) (struct mem_ctl_info * mci, u32 bw);
634 650
635 /* Get the current sdram memory scrub rate from the internal 651 /* Get the current sdram memory scrub rate from the internal
636 representation and converts it to the closest matching 652 representation and converts it to the closest matching
637 bandwidth in bytes/sec. 653 bandwidth in bytes/sec.
638 */ 654 */
639 int (*get_sdram_scrub_rate) (struct mem_ctl_info * mci); 655 int (*get_sdram_scrub_rate) (struct mem_ctl_info * mci);
640 656
641 657
642 /* pointer to edac checking routine */ 658 /* pointer to edac checking routine */
643 void (*edac_check) (struct mem_ctl_info * mci); 659 void (*edac_check) (struct mem_ctl_info * mci);
644 660
645 /* 661 /*
646 * Remaps memory pages: controller pages to physical pages. 662 * Remaps memory pages: controller pages to physical pages.
647 * For most MC's, this will be NULL. 663 * For most MC's, this will be NULL.
648 */ 664 */
649 /* FIXME - why not send the phys page to begin with? */ 665 /* FIXME - why not send the phys page to begin with? */
650 unsigned long (*ctl_page_to_phys) (struct mem_ctl_info * mci, 666 unsigned long (*ctl_page_to_phys) (struct mem_ctl_info * mci,
651 unsigned long page); 667 unsigned long page);
652 int mc_idx; 668 int mc_idx;
653 struct csrow_info **csrows; 669 struct csrow_info **csrows;
654 unsigned nr_csrows, num_cschannel; 670 unsigned nr_csrows, num_cschannel;
655 671
656 /* 672 /*
657 * Memory Controller hierarchy 673 * Memory Controller hierarchy
658 * 674 *
659 * There are basically two types of memory controller: the ones that 675 * There are basically two types of memory controller: the ones that
660 * sees memory sticks ("dimms"), and the ones that sees memory ranks. 676 * sees memory sticks ("dimms"), and the ones that sees memory ranks.
661 * All old memory controllers enumerate memories per rank, but most 677 * All old memory controllers enumerate memories per rank, but most
662 * of the recent drivers enumerate memories per DIMM, instead. 678 * of the recent drivers enumerate memories per DIMM, instead.
663 * When the memory controller is per rank, mem_is_per_rank is true. 679 * When the memory controller is per rank, mem_is_per_rank is true.
664 */ 680 */
665 unsigned n_layers; 681 unsigned n_layers;
666 struct edac_mc_layer *layers; 682 struct edac_mc_layer *layers;
667 bool mem_is_per_rank; 683 bool mem_is_per_rank;
668 684
669 /* 685 /*
670 * DIMM info. Will eventually remove the entire csrows_info some day 686 * DIMM info. Will eventually remove the entire csrows_info some day
671 */ 687 */
672 unsigned tot_dimms; 688 unsigned tot_dimms;
673 struct dimm_info **dimms; 689 struct dimm_info **dimms;
674 690
675 /* 691 /*
676 * FIXME - what about controllers on other busses? - IDs must be 692 * FIXME - what about controllers on other busses? - IDs must be
677 * unique. dev pointer should be sufficiently unique, but 693 * unique. dev pointer should be sufficiently unique, but
678 * BUS:SLOT.FUNC numbers may not be unique. 694 * BUS:SLOT.FUNC numbers may not be unique.
679 */ 695 */
680 struct device *pdev; 696 struct device *pdev;
681 const char *mod_name; 697 const char *mod_name;
682 const char *mod_ver; 698 const char *mod_ver;
683 const char *ctl_name; 699 const char *ctl_name;
684 const char *dev_name; 700 const char *dev_name;
685 void *pvt_info; 701 void *pvt_info;
686 unsigned long start_time; /* mci load start time (in jiffies) */ 702 unsigned long start_time; /* mci load start time (in jiffies) */
687 703
688 /* 704 /*
689 * drivers shouldn't access those fields directly, as the core 705 * drivers shouldn't access those fields directly, as the core
690 * already handles that. 706 * already handles that.
691 */ 707 */
692 u32 ce_noinfo_count, ue_noinfo_count; 708 u32 ce_noinfo_count, ue_noinfo_count;
693 u32 ue_mc, ce_mc; 709 u32 ue_mc, ce_mc;
694 u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS]; 710 u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
695 711
696 struct completion complete; 712 struct completion complete;
697 713
698 /* Additional top controller level attributes, but specified 714 /* Additional top controller level attributes, but specified
699 * by the low level driver. 715 * by the low level driver.
700 * 716 *
701 * Set by the low level driver to provide attributes at the 717 * Set by the low level driver to provide attributes at the
702 * controller level. 718 * controller level.
703 * An array of structures, NULL terminated 719 * An array of structures, NULL terminated
704 * 720 *
705 * If attributes are desired, then set to array of attributes 721 * If attributes are desired, then set to array of attributes
706 * If no attributes are desired, leave NULL 722 * If no attributes are desired, leave NULL
707 */ 723 */
708 const struct mcidev_sysfs_attribute *mc_driver_sysfs_attributes; 724 const struct mcidev_sysfs_attribute *mc_driver_sysfs_attributes;
709 725
710 /* work struct for this MC */ 726 /* work struct for this MC */
711 struct delayed_work work; 727 struct delayed_work work;
712 728
713 /* 729 /*
714 * Used to report an error - by being at the global struct 730 * Used to report an error - by being at the global struct
715 * makes the memory allocated by the EDAC core 731 * makes the memory allocated by the EDAC core
716 */ 732 */
717 struct edac_raw_error_desc error_desc; 733 struct edac_raw_error_desc error_desc;
718 734
719 /* the internal state of this controller instance */ 735 /* the internal state of this controller instance */
720 int op_state; 736 int op_state;
721 737
722 #ifdef CONFIG_EDAC_DEBUG 738 #ifdef CONFIG_EDAC_DEBUG
723 struct dentry *debugfs; 739 struct dentry *debugfs;
724 u8 fake_inject_layer[EDAC_MAX_LAYERS]; 740 u8 fake_inject_layer[EDAC_MAX_LAYERS];
725 u32 fake_inject_ue; 741 u32 fake_inject_ue;
726 u16 fake_inject_count; 742 u16 fake_inject_count;
727 #endif 743 #endif
728 __u8 csbased : 1, /* csrow-based memory controller */ 744 __u8 csbased : 1, /* csrow-based memory controller */
729 __resv : 7; 745 __resv : 7;
730 }; 746 };
731 747
732 #endif 748 #endif
733 749
include/ras/ras_event.h
1 #undef TRACE_SYSTEM 1 #undef TRACE_SYSTEM
2 #define TRACE_SYSTEM ras 2 #define TRACE_SYSTEM ras
3 #define TRACE_INCLUDE_FILE ras_event 3 #define TRACE_INCLUDE_FILE ras_event
4 4
5 #if !defined(_TRACE_HW_EVENT_MC_H) || defined(TRACE_HEADER_MULTI_READ) 5 #if !defined(_TRACE_HW_EVENT_MC_H) || defined(TRACE_HEADER_MULTI_READ)
6 #define _TRACE_HW_EVENT_MC_H 6 #define _TRACE_HW_EVENT_MC_H
7 7
8 #include <linux/tracepoint.h> 8 #include <linux/tracepoint.h>
9 #include <linux/edac.h> 9 #include <linux/edac.h>
10 #include <linux/ktime.h> 10 #include <linux/ktime.h>
11 11
12 /* 12 /*
13 * Hardware Events Report 13 * Hardware Events Report
14 * 14 *
15 * Those events are generated when hardware detected a corrected or 15 * Those events are generated when hardware detected a corrected or
16 * uncorrected event, and are meant to replace the current API to report 16 * uncorrected event, and are meant to replace the current API to report
17 * errors defined on both EDAC and MCE subsystems. 17 * errors defined on both EDAC and MCE subsystems.
18 * 18 *
19 * FIXME: Add events for handling memory errors originated from the 19 * FIXME: Add events for handling memory errors originated from the
20 * MCE subsystem. 20 * MCE subsystem.
21 */ 21 */
22 22
23 /* 23 /*
24 * Hardware-independent Memory Controller specific events 24 * Hardware-independent Memory Controller specific events
25 */ 25 */
26 26
27 /* 27 /*
28 * Default error mechanisms for Memory Controller errors (CE and UE) 28 * Default error mechanisms for Memory Controller errors (CE and UE)
29 */ 29 */
30 TRACE_EVENT(mc_event, 30 TRACE_EVENT(mc_event,
31 31
32 TP_PROTO(const unsigned int err_type, 32 TP_PROTO(const unsigned int err_type,
33 const char *error_msg, 33 const char *error_msg,
34 const char *label, 34 const char *label,
35 const int error_count, 35 const int error_count,
36 const u8 mc_index, 36 const u8 mc_index,
37 const s8 top_layer, 37 const s8 top_layer,
38 const s8 mid_layer, 38 const s8 mid_layer,
39 const s8 low_layer, 39 const s8 low_layer,
40 unsigned long address, 40 unsigned long address,
41 const u8 grain_bits, 41 const u8 grain_bits,
42 unsigned long syndrome, 42 unsigned long syndrome,
43 const char *driver_detail), 43 const char *driver_detail),
44 44
45 TP_ARGS(err_type, error_msg, label, error_count, mc_index, 45 TP_ARGS(err_type, error_msg, label, error_count, mc_index,
46 top_layer, mid_layer, low_layer, address, grain_bits, 46 top_layer, mid_layer, low_layer, address, grain_bits,
47 syndrome, driver_detail), 47 syndrome, driver_detail),
48 48
49 TP_STRUCT__entry( 49 TP_STRUCT__entry(
50 __field( unsigned int, error_type ) 50 __field( unsigned int, error_type )
51 __string( msg, error_msg ) 51 __string( msg, error_msg )
52 __string( label, label ) 52 __string( label, label )
53 __field( u16, error_count ) 53 __field( u16, error_count )
54 __field( u8, mc_index ) 54 __field( u8, mc_index )
55 __field( s8, top_layer ) 55 __field( s8, top_layer )
56 __field( s8, middle_layer ) 56 __field( s8, middle_layer )
57 __field( s8, lower_layer ) 57 __field( s8, lower_layer )
58 __field( long, address ) 58 __field( long, address )
59 __field( u8, grain_bits ) 59 __field( u8, grain_bits )
60 __field( long, syndrome ) 60 __field( long, syndrome )
61 __string( driver_detail, driver_detail ) 61 __string( driver_detail, driver_detail )
62 ), 62 ),
63 63
64 TP_fast_assign( 64 TP_fast_assign(
65 __entry->error_type = err_type; 65 __entry->error_type = err_type;
66 __assign_str(msg, error_msg); 66 __assign_str(msg, error_msg);
67 __assign_str(label, label); 67 __assign_str(label, label);
68 __entry->error_count = error_count; 68 __entry->error_count = error_count;
69 __entry->mc_index = mc_index; 69 __entry->mc_index = mc_index;
70 __entry->top_layer = top_layer; 70 __entry->top_layer = top_layer;
71 __entry->middle_layer = mid_layer; 71 __entry->middle_layer = mid_layer;
72 __entry->lower_layer = low_layer; 72 __entry->lower_layer = low_layer;
73 __entry->address = address; 73 __entry->address = address;
74 __entry->grain_bits = grain_bits; 74 __entry->grain_bits = grain_bits;
75 __entry->syndrome = syndrome; 75 __entry->syndrome = syndrome;
76 __assign_str(driver_detail, driver_detail); 76 __assign_str(driver_detail, driver_detail);
77 ), 77 ),
78 78
79 TP_printk("%d %s error%s:%s%s on %s (mc:%d location:%d:%d:%d address:0x%08lx grain:%d syndrome:0x%08lx%s%s)", 79 TP_printk("%d %s error%s:%s%s on %s (mc:%d location:%d:%d:%d address:0x%08lx grain:%d syndrome:0x%08lx%s%s)",
80 __entry->error_count, 80 __entry->error_count,
81 (__entry->error_type == HW_EVENT_ERR_CORRECTED) ? "Corrected" : 81 mc_event_error_type(__entry->error_type),
82 ((__entry->error_type == HW_EVENT_ERR_FATAL) ?
83 "Fatal" : "Uncorrected"),
84 __entry->error_count > 1 ? "s" : "", 82 __entry->error_count > 1 ? "s" : "",
85 ((char *)__get_str(msg))[0] ? " " : "", 83 ((char *)__get_str(msg))[0] ? " " : "",
86 __get_str(msg), 84 __get_str(msg),
87 __get_str(label), 85 __get_str(label),
88 __entry->mc_index, 86 __entry->mc_index,
89 __entry->top_layer, 87 __entry->top_layer,
90 __entry->middle_layer, 88 __entry->middle_layer,
91 __entry->lower_layer, 89 __entry->lower_layer,
92 __entry->address, 90 __entry->address,
93 1 << __entry->grain_bits, 91 1 << __entry->grain_bits,
94 __entry->syndrome, 92 __entry->syndrome,
95 ((char *)__get_str(driver_detail))[0] ? " " : "", 93 ((char *)__get_str(driver_detail))[0] ? " " : "",
96 __get_str(driver_detail)) 94 __get_str(driver_detail))
97 ); 95 );
98 96
99 #endif /* _TRACE_HW_EVENT_MC_H */ 97 #endif /* _TRACE_HW_EVENT_MC_H */
100 98
101 /* This part must be outside protection */ 99 /* This part must be outside protection */
102 #include <trace/define_trace.h> 100 #include <trace/define_trace.h>
103 101