Commit c7ef7645544131b0750478d1cf94cdfa945c809d
1 parent
80cc7d87d5
Exists in
smarc-l5.0.0_1.0.0-ga
and in
5 other branches
edac: reduce stack pressure by using a pre-allocated buffer
The number of variables at the stack is too big. Reduces the stack usage by using a pre-allocated error buffer. Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Showing 2 changed files with 104 additions and 33 deletions Side-by-side Diff
drivers/edac/edac_mc.c
... | ... | @@ -1065,7 +1065,6 @@ |
1065 | 1065 | edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count); |
1066 | 1066 | } |
1067 | 1067 | |
1068 | -#define OTHER_LABEL " or " | |
1069 | 1068 | |
1070 | 1069 | /** |
1071 | 1070 | * edac_mc_handle_error - reports a memory event to userspace |
1072 | 1071 | |
1073 | 1072 | |
1074 | 1073 | |
... | ... | @@ -1097,19 +1096,28 @@ |
1097 | 1096 | const char *msg, |
1098 | 1097 | const char *other_detail) |
1099 | 1098 | { |
1100 | - /* FIXME: too much for stack: move it to some pre-alocated area */ | |
1101 | - char detail[80], location[80]; | |
1102 | - char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms]; | |
1099 | + char detail[80]; | |
1103 | 1100 | char *p; |
1104 | 1101 | int row = -1, chan = -1; |
1105 | 1102 | int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer }; |
1106 | - int i; | |
1107 | - long grain; | |
1108 | - bool enable_per_layer_report = false; | |
1103 | + int i, n_labels = 0; | |
1109 | 1104 | u8 grain_bits; |
1105 | + struct edac_raw_error_desc *e = &mci->error_desc; | |
1110 | 1106 | |
1111 | 1107 | edac_dbg(3, "MC%d\n", mci->mc_idx); |
1112 | 1108 | |
1109 | + /* Fills the error report buffer */ | |
1110 | + memset(e, 0, sizeof (*e)); | |
1111 | + e->error_count = error_count; | |
1112 | + e->top_layer = top_layer; | |
1113 | + e->mid_layer = mid_layer; | |
1114 | + e->low_layer = low_layer; | |
1115 | + e->page_frame_number = page_frame_number; | |
1116 | + e->offset_in_page = offset_in_page; | |
1117 | + e->syndrome = syndrome; | |
1118 | + e->msg = msg; | |
1119 | + e->other_detail = other_detail; | |
1120 | + | |
1113 | 1121 | /* |
1114 | 1122 | * Check if the event report is consistent and if the memory |
1115 | 1123 | * location is known. If it is known, enable_per_layer_report will be |
... | ... | @@ -1132,7 +1140,7 @@ |
1132 | 1140 | pos[i] = -1; |
1133 | 1141 | } |
1134 | 1142 | if (pos[i] >= 0) |
1135 | - enable_per_layer_report = true; | |
1143 | + e->enable_per_layer_report = true; | |
1136 | 1144 | } |
1137 | 1145 | |
1138 | 1146 | /* |
... | ... | @@ -1146,8 +1154,7 @@ |
1146 | 1154 | * where each memory belongs to a separate channel within the same |
1147 | 1155 | * branch. |
1148 | 1156 | */ |
1149 | - grain = 0; | |
1150 | - p = label; | |
1157 | + p = e->label; | |
1151 | 1158 | *p = '\0'; |
1152 | 1159 | |
1153 | 1160 | for (i = 0; i < mci->tot_dimms; i++) { |
... | ... | @@ -1161,8 +1168,8 @@ |
1161 | 1168 | continue; |
1162 | 1169 | |
1163 | 1170 | /* get the max grain, over the error match range */ |
1164 | - if (dimm->grain > grain) | |
1165 | - grain = dimm->grain; | |
1171 | + if (dimm->grain > e->grain) | |
1172 | + e->grain = dimm->grain; | |
1166 | 1173 | |
1167 | 1174 | /* |
1168 | 1175 | * If the error is memory-controller wide, there's no need to |
... | ... | @@ -1170,8 +1177,13 @@ |
1170 | 1177 | * channel/memory controller/... may be affected. |
1171 | 1178 | * Also, don't show errors for empty DIMM slots. |
1172 | 1179 | */ |
1173 | - if (enable_per_layer_report && dimm->nr_pages) { | |
1174 | - if (p != label) { | |
1180 | + if (e->enable_per_layer_report && dimm->nr_pages) { | |
1181 | + if (n_labels >= EDAC_MAX_LABELS) { | |
1182 | + e->enable_per_layer_report = false; | |
1183 | + break; | |
1184 | + } | |
1185 | + n_labels++; | |
1186 | + if (p != e->label) { | |
1175 | 1187 | strcpy(p, OTHER_LABEL); |
1176 | 1188 | p += strlen(OTHER_LABEL); |
1177 | 1189 | } |
1178 | 1190 | |
... | ... | @@ -1198,12 +1210,12 @@ |
1198 | 1210 | } |
1199 | 1211 | } |
1200 | 1212 | |
1201 | - if (!enable_per_layer_report) { | |
1202 | - strcpy(label, "any memory"); | |
1213 | + if (!e->enable_per_layer_report) { | |
1214 | + strcpy(e->label, "any memory"); | |
1203 | 1215 | } else { |
1204 | 1216 | edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan); |
1205 | - if (p == label) | |
1206 | - strcpy(label, "unknown memory"); | |
1217 | + if (p == e->label) | |
1218 | + strcpy(e->label, "unknown memory"); | |
1207 | 1219 | if (type == HW_EVENT_ERR_CORRECTED) { |
1208 | 1220 | if (row >= 0) { |
1209 | 1221 | mci->csrows[row]->ce_count += error_count; |
... | ... | @@ -1216,7 +1228,7 @@ |
1216 | 1228 | } |
1217 | 1229 | |
1218 | 1230 | /* Fill the RAM location data */ |
1219 | - p = location; | |
1231 | + p = e->location; | |
1220 | 1232 | |
1221 | 1233 | for (i = 0; i < mci->n_layers; i++) { |
1222 | 1234 | if (pos[i] < 0) |
1223 | 1235 | |
1224 | 1236 | |
1225 | 1237 | |
1226 | 1238 | |
... | ... | @@ -1226,32 +1238,35 @@ |
1226 | 1238 | edac_layer_name[mci->layers[i].type], |
1227 | 1239 | pos[i]); |
1228 | 1240 | } |
1229 | - if (p > location) | |
1241 | + if (p > e->location) | |
1230 | 1242 | *(p - 1) = '\0'; |
1231 | 1243 | |
1232 | 1244 | /* Report the error via the trace interface */ |
1233 | - grain_bits = fls_long(grain) + 1; | |
1234 | - trace_mc_event(type, msg, label, error_count, | |
1235 | - mci->mc_idx, top_layer, mid_layer, low_layer, | |
1236 | - PAGES_TO_MiB(page_frame_number) | offset_in_page, | |
1237 | - grain_bits, syndrome, other_detail); | |
1245 | + grain_bits = fls_long(e->grain) + 1; | |
1246 | + trace_mc_event(type, e->msg, e->label, e->error_count, | |
1247 | + mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer, | |
1248 | + PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page, | |
1249 | + grain_bits, e->syndrome, other_detail); | |
1238 | 1250 | |
1239 | 1251 | /* Memory type dependent details about the error */ |
1240 | 1252 | if (type == HW_EVENT_ERR_CORRECTED) { |
1241 | 1253 | snprintf(detail, sizeof(detail), |
1242 | 1254 | "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx", |
1243 | - page_frame_number, offset_in_page, | |
1244 | - grain, syndrome); | |
1245 | - edac_ce_error(mci, error_count, pos, msg, location, label, | |
1246 | - detail, other_detail, enable_per_layer_report, | |
1247 | - page_frame_number, offset_in_page, grain); | |
1255 | + e->page_frame_number, e->offset_in_page, | |
1256 | + e->grain, e->syndrome); | |
1257 | + edac_ce_error(mci, e->error_count, pos, e->msg, e->location, | |
1258 | + e->label, detail, other_detail, | |
1259 | + e->enable_per_layer_report, | |
1260 | + e->page_frame_number, e->offset_in_page, | |
1261 | + e->grain); | |
1248 | 1262 | } else { |
1249 | 1263 | snprintf(detail, sizeof(detail), |
1250 | 1264 | "page:0x%lx offset:0x%lx grain:%ld", |
1251 | - page_frame_number, offset_in_page, grain); | |
1265 | + page_frame_number, offset_in_page, e->grain); | |
1252 | 1266 | |
1253 | - edac_ue_error(mci, error_count, pos, msg, location, label, | |
1254 | - detail, other_detail, enable_per_layer_report); | |
1267 | + edac_ue_error(mci, e->error_count, pos, e->msg, e->location, | |
1268 | + e->label, detail, other_detail, | |
1269 | + e->enable_per_layer_report); | |
1255 | 1270 | } |
1256 | 1271 | } |
1257 | 1272 | EXPORT_SYMBOL_GPL(edac_mc_handle_error); |
include/linux/edac.h
... | ... | @@ -47,8 +47,18 @@ |
47 | 47 | return; |
48 | 48 | } |
49 | 49 | |
50 | +/* Max length of a DIMM label*/ | |
50 | 51 | #define EDAC_MC_LABEL_LEN 31 |
51 | 52 | |
53 | +/* Maximum size of the location string */ | |
54 | +#define LOCATION_SIZE 80 | |
55 | + | |
56 | +/* Defines the maximum number of labels that can be reported */ | |
57 | +#define EDAC_MAX_LABELS 8 | |
58 | + | |
59 | +/* String used to join two or more labels */ | |
60 | +#define OTHER_LABEL " or " | |
61 | + | |
52 | 62 | /** |
53 | 63 | * enum dev_type - describe the type of memory DRAM chips used at the stick |
54 | 64 | * @DEV_UNKNOWN: Can't be determined, or MC doesn't support detect it |
... | ... | @@ -553,6 +563,46 @@ |
553 | 563 | int layer0, layer1, layer2; |
554 | 564 | }; |
555 | 565 | |
566 | +/** | |
567 | + * edac_raw_error_desc - Raw error report structure | |
568 | + * @grain: minimum granularity for an error report, in bytes | |
569 | + * @error_count: number of errors of the same type | |
570 | + * @top_layer: top layer of the error (layer[0]) | |
571 | + * @mid_layer: middle layer of the error (layer[1]) | |
572 | + * @low_layer: low layer of the error (layer[2]) | |
573 | + * @page_frame_number: page where the error happened | |
574 | + * @offset_in_page: page offset | |
575 | + * @syndrome: syndrome of the error (or 0 if unknown or if | |
576 | + * the syndrome is not applicable) | |
577 | + * @msg: error message | |
578 | + * @location: location of the error | |
579 | + * @label: label of the affected DIMM(s) | |
580 | + * @other_detail: other driver-specific detail about the error | |
581 | + * @enable_per_layer_report: if false, the error affects all layers | |
582 | + * (typically, a memory controller error) | |
583 | + */ | |
584 | +struct edac_raw_error_desc { | |
585 | + /* | |
586 | + * NOTE: everything before grain won't be cleaned by | |
587 | + * edac_raw_error_desc_clean() | |
588 | + */ | |
589 | + char location[LOCATION_SIZE]; | |
590 | + char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * EDAC_MAX_LABELS]; | |
591 | + long grain; | |
592 | + | |
593 | + /* the vars below and grain will be cleaned on every new error report */ | |
594 | + u16 error_count; | |
595 | + int top_layer; | |
596 | + int mid_layer; | |
597 | + int low_layer; | |
598 | + unsigned long page_frame_number; | |
599 | + unsigned long offset_in_page; | |
600 | + unsigned long syndrome; | |
601 | + const char *msg; | |
602 | + const char *other_detail; | |
603 | + bool enable_per_layer_report; | |
604 | +}; | |
605 | + | |
556 | 606 | /* MEMORY controller information structure |
557 | 607 | */ |
558 | 608 | struct mem_ctl_info { |
... | ... | @@ -659,6 +709,12 @@ |
659 | 709 | |
660 | 710 | /* work struct for this MC */ |
661 | 711 | struct delayed_work work; |
712 | + | |
713 | + /* | |
714 | + * Used to report an error - by being at the global struct | |
715 | + * makes the memory allocated by the EDAC core | |
716 | + */ | |
717 | + struct edac_raw_error_desc error_desc; | |
662 | 718 | |
663 | 719 | /* the internal state of this controller instance */ |
664 | 720 | int op_state; |