Commit c7ef7645544131b0750478d1cf94cdfa945c809d

Authored by Mauro Carvalho Chehab
1 parent 80cc7d87d5

edac: reduce stack pressure by using a pre-allocated buffer

The number of variables at the stack is too big.
Reduces the stack usage by using a pre-allocated error
buffer.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

Showing 2 changed files with 104 additions and 33 deletions Side-by-side Diff

drivers/edac/edac_mc.c
... ... @@ -1065,7 +1065,6 @@
1065 1065 edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
1066 1066 }
1067 1067  
1068   -#define OTHER_LABEL " or "
1069 1068  
1070 1069 /**
1071 1070 * edac_mc_handle_error - reports a memory event to userspace
1072 1071  
1073 1072  
1074 1073  
... ... @@ -1097,19 +1096,28 @@
1097 1096 const char *msg,
1098 1097 const char *other_detail)
1099 1098 {
1100   - /* FIXME: too much for stack: move it to some pre-alocated area */
1101   - char detail[80], location[80];
1102   - char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
  1099 + char detail[80];
1103 1100 char *p;
1104 1101 int row = -1, chan = -1;
1105 1102 int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
1106   - int i;
1107   - long grain;
1108   - bool enable_per_layer_report = false;
  1103 + int i, n_labels = 0;
1109 1104 u8 grain_bits;
  1105 + struct edac_raw_error_desc *e = &mci->error_desc;
1110 1106  
1111 1107 edac_dbg(3, "MC%d\n", mci->mc_idx);
1112 1108  
  1109 + /* Fills the error report buffer */
  1110 + memset(e, 0, sizeof (*e));
  1111 + e->error_count = error_count;
  1112 + e->top_layer = top_layer;
  1113 + e->mid_layer = mid_layer;
  1114 + e->low_layer = low_layer;
  1115 + e->page_frame_number = page_frame_number;
  1116 + e->offset_in_page = offset_in_page;
  1117 + e->syndrome = syndrome;
  1118 + e->msg = msg;
  1119 + e->other_detail = other_detail;
  1120 +
1113 1121 /*
1114 1122 * Check if the event report is consistent and if the memory
1115 1123 * location is known. If it is known, enable_per_layer_report will be
... ... @@ -1132,7 +1140,7 @@
1132 1140 pos[i] = -1;
1133 1141 }
1134 1142 if (pos[i] >= 0)
1135   - enable_per_layer_report = true;
  1143 + e->enable_per_layer_report = true;
1136 1144 }
1137 1145  
1138 1146 /*
... ... @@ -1146,8 +1154,7 @@
1146 1154 * where each memory belongs to a separate channel within the same
1147 1155 * branch.
1148 1156 */
1149   - grain = 0;
1150   - p = label;
  1157 + p = e->label;
1151 1158 *p = '\0';
1152 1159  
1153 1160 for (i = 0; i < mci->tot_dimms; i++) {
... ... @@ -1161,8 +1168,8 @@
1161 1168 continue;
1162 1169  
1163 1170 /* get the max grain, over the error match range */
1164   - if (dimm->grain > grain)
1165   - grain = dimm->grain;
  1171 + if (dimm->grain > e->grain)
  1172 + e->grain = dimm->grain;
1166 1173  
1167 1174 /*
1168 1175 * If the error is memory-controller wide, there's no need to
... ... @@ -1170,8 +1177,13 @@
1170 1177 * channel/memory controller/... may be affected.
1171 1178 * Also, don't show errors for empty DIMM slots.
1172 1179 */
1173   - if (enable_per_layer_report && dimm->nr_pages) {
1174   - if (p != label) {
  1180 + if (e->enable_per_layer_report && dimm->nr_pages) {
  1181 + if (n_labels >= EDAC_MAX_LABELS) {
  1182 + e->enable_per_layer_report = false;
  1183 + break;
  1184 + }
  1185 + n_labels++;
  1186 + if (p != e->label) {
1175 1187 strcpy(p, OTHER_LABEL);
1176 1188 p += strlen(OTHER_LABEL);
1177 1189 }
1178 1190  
... ... @@ -1198,12 +1210,12 @@
1198 1210 }
1199 1211 }
1200 1212  
1201   - if (!enable_per_layer_report) {
1202   - strcpy(label, "any memory");
  1213 + if (!e->enable_per_layer_report) {
  1214 + strcpy(e->label, "any memory");
1203 1215 } else {
1204 1216 edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
1205   - if (p == label)
1206   - strcpy(label, "unknown memory");
  1217 + if (p == e->label)
  1218 + strcpy(e->label, "unknown memory");
1207 1219 if (type == HW_EVENT_ERR_CORRECTED) {
1208 1220 if (row >= 0) {
1209 1221 mci->csrows[row]->ce_count += error_count;
... ... @@ -1216,7 +1228,7 @@
1216 1228 }
1217 1229  
1218 1230 /* Fill the RAM location data */
1219   - p = location;
  1231 + p = e->location;
1220 1232  
1221 1233 for (i = 0; i < mci->n_layers; i++) {
1222 1234 if (pos[i] < 0)
1223 1235  
1224 1236  
1225 1237  
1226 1238  
... ... @@ -1226,32 +1238,35 @@
1226 1238 edac_layer_name[mci->layers[i].type],
1227 1239 pos[i]);
1228 1240 }
1229   - if (p > location)
  1241 + if (p > e->location)
1230 1242 *(p - 1) = '\0';
1231 1243  
1232 1244 /* Report the error via the trace interface */
1233   - grain_bits = fls_long(grain) + 1;
1234   - trace_mc_event(type, msg, label, error_count,
1235   - mci->mc_idx, top_layer, mid_layer, low_layer,
1236   - PAGES_TO_MiB(page_frame_number) | offset_in_page,
1237   - grain_bits, syndrome, other_detail);
  1245 + grain_bits = fls_long(e->grain) + 1;
  1246 + trace_mc_event(type, e->msg, e->label, e->error_count,
  1247 + mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer,
  1248 + PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page,
  1249 + grain_bits, e->syndrome, other_detail);
1238 1250  
1239 1251 /* Memory type dependent details about the error */
1240 1252 if (type == HW_EVENT_ERR_CORRECTED) {
1241 1253 snprintf(detail, sizeof(detail),
1242 1254 "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
1243   - page_frame_number, offset_in_page,
1244   - grain, syndrome);
1245   - edac_ce_error(mci, error_count, pos, msg, location, label,
1246   - detail, other_detail, enable_per_layer_report,
1247   - page_frame_number, offset_in_page, grain);
  1255 + e->page_frame_number, e->offset_in_page,
  1256 + e->grain, e->syndrome);
  1257 + edac_ce_error(mci, e->error_count, pos, e->msg, e->location,
  1258 + e->label, detail, other_detail,
  1259 + e->enable_per_layer_report,
  1260 + e->page_frame_number, e->offset_in_page,
  1261 + e->grain);
1248 1262 } else {
1249 1263 snprintf(detail, sizeof(detail),
1250 1264 "page:0x%lx offset:0x%lx grain:%ld",
1251   - page_frame_number, offset_in_page, grain);
  1265 + page_frame_number, offset_in_page, e->grain);
1252 1266  
1253   - edac_ue_error(mci, error_count, pos, msg, location, label,
1254   - detail, other_detail, enable_per_layer_report);
  1267 + edac_ue_error(mci, e->error_count, pos, e->msg, e->location,
  1268 + e->label, detail, other_detail,
  1269 + e->enable_per_layer_report);
1255 1270 }
1256 1271 }
1257 1272 EXPORT_SYMBOL_GPL(edac_mc_handle_error);
include/linux/edac.h
... ... @@ -47,8 +47,18 @@
47 47 return;
48 48 }
49 49  
  50 +/* Max length of a DIMM label*/
50 51 #define EDAC_MC_LABEL_LEN 31
51 52  
  53 +/* Maximum size of the location string */
  54 +#define LOCATION_SIZE 80
  55 +
  56 +/* Defines the maximum number of labels that can be reported */
  57 +#define EDAC_MAX_LABELS 8
  58 +
  59 +/* String used to join two or more labels */
  60 +#define OTHER_LABEL " or "
  61 +
52 62 /**
53 63 * enum dev_type - describe the type of memory DRAM chips used at the stick
54 64 * @DEV_UNKNOWN: Can't be determined, or MC doesn't support detect it
... ... @@ -553,6 +563,46 @@
553 563 int layer0, layer1, layer2;
554 564 };
555 565  
  566 +/**
  567 + * edac_raw_error_desc - Raw error report structure
  568 + * @grain: minimum granularity for an error report, in bytes
  569 + * @error_count: number of errors of the same type
  570 + * @top_layer: top layer of the error (layer[0])
  571 + * @mid_layer: middle layer of the error (layer[1])
  572 + * @low_layer: low layer of the error (layer[2])
  573 + * @page_frame_number: page where the error happened
  574 + * @offset_in_page: page offset
  575 + * @syndrome: syndrome of the error (or 0 if unknown or if
  576 + * the syndrome is not applicable)
  577 + * @msg: error message
  578 + * @location: location of the error
  579 + * @label: label of the affected DIMM(s)
  580 + * @other_detail: other driver-specific detail about the error
  581 + * @enable_per_layer_report: if false, the error affects all layers
  582 + * (typically, a memory controller error)
  583 + */
  584 +struct edac_raw_error_desc {
  585 + /*
  586 + * NOTE: everything before grain won't be cleaned by
  587 + * edac_raw_error_desc_clean()
  588 + */
  589 + char location[LOCATION_SIZE];
  590 + char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * EDAC_MAX_LABELS];
  591 + long grain;
  592 +
  593 + /* the vars below and grain will be cleaned on every new error report */
  594 + u16 error_count;
  595 + int top_layer;
  596 + int mid_layer;
  597 + int low_layer;
  598 + unsigned long page_frame_number;
  599 + unsigned long offset_in_page;
  600 + unsigned long syndrome;
  601 + const char *msg;
  602 + const char *other_detail;
  603 + bool enable_per_layer_report;
  604 +};
  605 +
556 606 /* MEMORY controller information structure
557 607 */
558 608 struct mem_ctl_info {
... ... @@ -659,6 +709,12 @@
659 709  
660 710 /* work struct for this MC */
661 711 struct delayed_work work;
  712 +
  713 + /*
  714 + * Used to report an error - by being at the global struct
  715 + * makes the memory allocated by the EDAC core
  716 + */
  717 + struct edac_raw_error_desc error_desc;
662 718  
663 719 /* the internal state of this controller instance */
664 720 int op_state;