Commit 48bb52c80be0e462328f58ca3a34ecfef3584320

Authored by Eran Ben Elisha
Committed by David S. Miller
1 parent ba7d16c779

devlink: Add auto dump flag to health reporter

On low memory system, run time dumps can consume too much memory. Add
administrator ability to disable auto dumps per reporter as part of the
error flow handle routine.

This attribute is not relevant while executing
DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET.

By default, auto dump is activated for any reporter that has a dump method,
as part of the reporter registration to devlink.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 2 changed files with 24 additions and 4 deletions Side-by-side Diff

include/uapi/linux/devlink.h
... ... @@ -429,6 +429,8 @@
429 429 DEVLINK_ATTR_NETNS_FD, /* u32 */
430 430 DEVLINK_ATTR_NETNS_PID, /* u32 */
431 431 DEVLINK_ATTR_NETNS_ID, /* u32 */
  432 +
  433 + DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP, /* u8 */
432 434 /* add new attributes above here, update the policy in devlink.c */
433 435  
434 436 __DEVLINK_ATTR_MAX,
... ... @@ -5089,6 +5089,7 @@
5089 5089 struct mutex dump_lock; /* lock parallel read/write from dump buffers */
5090 5090 u64 graceful_period;
5091 5091 bool auto_recover;
  5092 + bool auto_dump;
5092 5093 u8 health_state;
5093 5094 u64 dump_ts;
5094 5095 u64 dump_real_ts;
... ... @@ -5155,6 +5156,7 @@
5155 5156 reporter->devlink = devlink;
5156 5157 reporter->graceful_period = graceful_period;
5157 5158 reporter->auto_recover = !!ops->recover;
  5159 + reporter->auto_dump = !!ops->dump;
5158 5160 mutex_init(&reporter->dump_lock);
5159 5161 refcount_set(&reporter->refcount, 1);
5160 5162 list_add_tail(&reporter->list, &devlink->reporter_list);
... ... @@ -5235,6 +5237,10 @@
5235 5237 nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS,
5236 5238 reporter->dump_real_ts, DEVLINK_ATTR_PAD))
5237 5239 goto reporter_nest_cancel;
  5240 + if (reporter->ops->dump &&
  5241 + nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP,
  5242 + reporter->auto_dump))
  5243 + goto reporter_nest_cancel;
5238 5244  
5239 5245 nla_nest_end(msg, reporter_attr);
5240 5246 genlmsg_end(msg, hdr);
... ... @@ -5381,10 +5387,12 @@
5381 5387  
5382 5388 reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR;
5383 5389  
5384   - mutex_lock(&reporter->dump_lock);
5385   - /* store current dump of current error, for later analysis */
5386   - devlink_health_do_dump(reporter, priv_ctx, NULL);
5387   - mutex_unlock(&reporter->dump_lock);
  5390 + if (reporter->auto_dump) {
  5391 + mutex_lock(&reporter->dump_lock);
  5392 + /* store current dump of current error, for later analysis */
  5393 + devlink_health_do_dump(reporter, priv_ctx, NULL);
  5394 + mutex_unlock(&reporter->dump_lock);
  5395 + }
5388 5396  
5389 5397 if (reporter->auto_recover)
5390 5398 return devlink_health_reporter_recover(reporter,
... ... @@ -5558,6 +5566,11 @@
5558 5566 err = -EOPNOTSUPP;
5559 5567 goto out;
5560 5568 }
  5569 + if (!reporter->ops->dump &&
  5570 + info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP]) {
  5571 + err = -EOPNOTSUPP;
  5572 + goto out;
  5573 + }
5561 5574  
5562 5575 if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD])
5563 5576 reporter->graceful_period =
... ... @@ -5567,6 +5580,10 @@
5567 5580 reporter->auto_recover =
5568 5581 nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]);
5569 5582  
  5583 + if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP])
  5584 + reporter->auto_dump =
  5585 + nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP]);
  5586 +
5570 5587 devlink_health_reporter_put(reporter);
5571 5588 return 0;
5572 5589 out:
... ... @@ -6313,6 +6330,7 @@
6313 6330 [DEVLINK_ATTR_NETNS_PID] = { .type = NLA_U32 },
6314 6331 [DEVLINK_ATTR_NETNS_FD] = { .type = NLA_U32 },
6315 6332 [DEVLINK_ATTR_NETNS_ID] = { .type = NLA_U32 },
  6333 + [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP] = { .type = NLA_U8 },
6316 6334 };
6317 6335  
6318 6336 static const struct genl_ops devlink_nl_ops[] = {