mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-13 05:36:32 +07:00
net/mlx5e: Support dump callback in TX reporter
Add support for SQ's FW dump on TX reporter's events. Use Resource dump API to retrieve the relevant data: SX slice, SQ dump and SQ buffer. Wrap it in formatted messages and store the binary output in devlink core. Example: $ devlink health dump show pci/0000:00:0b.0 reporter tx SX Slice: data: 00 00 00 00 00 00 00 80 00 01 00 00 00 00 ad de 22 01 00 00 00 00 ad de 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ff ff ff ff 01 00 00 00 00 00 00 00 00 00 00 00 00 02 01 00 00 00 00 80 00 01 00 00 00 00 ad de 22 01 00 00 00 00 ad de 00 20 40 90 81 88 ff ff 00 00 00 00 00 00 00 00 15 00 15 00 00 00 00 00 ff ff ff ff 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 80 81 ae 41 06 00 ea ff ff SQs: SQ: index: 1511 data: 00 00 00 00 00 00 00 80 00 01 00 00 00 00 ad de 22 01 00 00 00 00 ad de 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ff ff ff ff 01 00 00 00 00 00 00 00 00 00 00 00 00 02 01 00 00 00 00 80 00 01 00 00 00 00 ad de 22 01 00 00 00 00 ad de 00 20 40 90 81 88 ff ff 00 00 00 00 00 00 00 00 15 00 15 00 00 00 00 00 ff ff ff ff 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 80 81 ae 41 06 00 ea ff ff SQ: index: 1516 data: 00 00 00 00 00 00 00 80 00 01 00 00 00 00 ad de 22 01 00 00 00 00 ad de 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ff ff ff ff 01 00 00 00 00 00 00 00 00 00 00 00 00 02 01 00 00 00 00 80 00 01 00 00 00 00 ad de 22 01 00 00 00 00 ad de 00 20 40 90 81 88 ff ff 00 00 00 00 00 00 00 00 15 00 15 00 00 00 00 00 ff ff ff ff 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 80 81 ae 41 06 00 ea ff ff $ devlink health dump show pci/0000:00:0b.0 reporter tx -jp { "SX Slice": { "data": [ 0,0,0,0,0,0,0,128,0,1,0,0,0,0,173,222,34,1,0,0,0,0,173,222,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,0,0,128,0,1,0,0,0,0,173,222,34,1,0,0,0,0,173,222,0,32,64,144,129,136,255,255,0,0,0,0,0,0,0,0,21,0,21,0,0,0,0,0,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,129,174,65,6,0,234,255,255], }, "SQs": [ { "SQ": { "index": 1511, "data": [ 0,0,0,0,0,0,0,128,0,1,0,0,0,0,173,222,34,1,0,0,0,0,173,222,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,0,0,128,0,1,0,0,0,0,173,222,34,1,0,0,0,0,173,222,0,32,64,144,129,136,255,255,0,0,0,0,0,0,0,0,21,0,21,0,0,0,0,0,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,129,174,65,6,0,234,255,255] } },{ "SQ": { "index": 1516, "data": [ 0,0,0,0,0,0,0,128,0,1,0,0,0,0,173,222,34,1,0,0,0,0,173,222,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,0,0,128,0,1,0,0,0,0,173,222,34,1,0,0,0,0,173,222,0,32,64,144,129,136,255,255,0,0,0,0,0,0,0,0,21,0,21,0,0,0,0,0,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,129,174,65,6,0,234,255,255] } } ] } Signed-off-by: Aya Levin <ayal@mellanox.com> Reviewed-by: Moshe Shemesh <moshe@mellanox.com> Acked-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
This commit is contained in:
parent
0a56be3c88
commit
5f29458b77
@ -3,6 +3,7 @@
|
||||
|
||||
#include "health.h"
|
||||
#include "lib/eq.h"
|
||||
#include "lib/mlx5.h"
|
||||
|
||||
int mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name)
|
||||
{
|
||||
@ -204,3 +205,107 @@ int mlx5e_health_report(struct mlx5e_priv *priv,
|
||||
|
||||
return devlink_health_report(reporter, err_str, err_ctx);
|
||||
}
|
||||
|
||||
#define MLX5_HEALTH_DEVLINK_MAX_SIZE 1024
|
||||
static int mlx5e_health_rsc_fmsg_binary(struct devlink_fmsg *fmsg,
|
||||
const void *value, u32 value_len)
|
||||
|
||||
{
|
||||
u32 data_size;
|
||||
u32 offset;
|
||||
int err;
|
||||
|
||||
for (offset = 0; offset < value_len; offset += data_size) {
|
||||
data_size = value_len - offset;
|
||||
if (data_size > MLX5_HEALTH_DEVLINK_MAX_SIZE)
|
||||
data_size = MLX5_HEALTH_DEVLINK_MAX_SIZE;
|
||||
err = devlink_fmsg_binary_put(fmsg, value + offset, data_size);
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key,
|
||||
struct devlink_fmsg *fmsg)
|
||||
{
|
||||
struct mlx5_core_dev *mdev = priv->mdev;
|
||||
struct mlx5_rsc_dump_cmd *cmd;
|
||||
struct page *page;
|
||||
int cmd_err, err;
|
||||
int end_err;
|
||||
int size;
|
||||
|
||||
if (IS_ERR_OR_NULL(mdev->rsc_dump))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
page = alloc_page(GFP_KERNEL);
|
||||
if (!page)
|
||||
return -ENOMEM;
|
||||
|
||||
err = devlink_fmsg_binary_pair_nest_start(fmsg, "data");
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
cmd = mlx5_rsc_dump_cmd_create(mdev, key);
|
||||
if (IS_ERR(cmd)) {
|
||||
err = PTR_ERR(cmd);
|
||||
goto free_page;
|
||||
}
|
||||
|
||||
do {
|
||||
cmd_err = mlx5_rsc_dump_next(mdev, cmd, page, &size);
|
||||
if (cmd_err < 0) {
|
||||
err = cmd_err;
|
||||
goto destroy_cmd;
|
||||
}
|
||||
|
||||
err = mlx5e_health_rsc_fmsg_binary(fmsg, page_address(page), size);
|
||||
if (err)
|
||||
goto destroy_cmd;
|
||||
|
||||
} while (cmd_err > 0);
|
||||
|
||||
destroy_cmd:
|
||||
mlx5_rsc_dump_cmd_destroy(cmd);
|
||||
end_err = devlink_fmsg_binary_pair_nest_end(fmsg);
|
||||
if (end_err)
|
||||
err = end_err;
|
||||
free_page:
|
||||
__free_page(page);
|
||||
return err;
|
||||
}
|
||||
|
||||
int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
|
||||
int queue_idx, char *lbl)
|
||||
{
|
||||
struct mlx5_rsc_key key = {};
|
||||
int err;
|
||||
|
||||
key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
|
||||
key.index1 = queue_idx;
|
||||
key.size = PAGE_SIZE;
|
||||
key.num_of_obj1 = 1;
|
||||
|
||||
err = devlink_fmsg_obj_nest_start(fmsg);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = mlx5e_reporter_named_obj_nest_start(fmsg, lbl);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = devlink_fmsg_u32_pair_put(fmsg, "index", queue_idx);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = mlx5e_reporter_named_obj_nest_end(fmsg);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return devlink_fmsg_obj_nest_end(fmsg);
|
||||
}
|
||||
|
@ -5,6 +5,7 @@
|
||||
#define __MLX5E_EN_HEALTH_H
|
||||
|
||||
#include "en.h"
|
||||
#include "diag/rsc_dump.h"
|
||||
|
||||
#define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)
|
||||
|
||||
@ -36,6 +37,7 @@ void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq);
|
||||
|
||||
struct mlx5e_err_ctx {
|
||||
int (*recover)(void *ctx);
|
||||
int (*dump)(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, void *ctx);
|
||||
void *ctx;
|
||||
};
|
||||
|
||||
@ -48,6 +50,8 @@ int mlx5e_health_report(struct mlx5e_priv *priv,
|
||||
int mlx5e_health_create_reporters(struct mlx5e_priv *priv);
|
||||
void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv);
|
||||
void mlx5e_health_channels_update(struct mlx5e_priv *priv);
|
||||
|
||||
|
||||
int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key,
|
||||
struct devlink_fmsg *fmsg);
|
||||
int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
|
||||
int queue_idx, char *lbl);
|
||||
#endif
|
||||
|
@ -246,6 +246,126 @@ static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
|
||||
return err;
|
||||
}
|
||||
|
||||
static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
|
||||
void *ctx)
|
||||
{
|
||||
struct mlx5_rsc_key key = {};
|
||||
struct mlx5e_txqsq *sq = ctx;
|
||||
int err;
|
||||
|
||||
if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
|
||||
return 0;
|
||||
|
||||
err = mlx5e_reporter_named_obj_nest_start(fmsg, "SX Slice");
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
key.size = PAGE_SIZE;
|
||||
key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL;
|
||||
err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = mlx5e_reporter_named_obj_nest_end(fmsg);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = mlx5e_reporter_named_obj_nest_start(fmsg, "SQ");
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = mlx5e_reporter_named_obj_nest_start(fmsg, "QPC");
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
|
||||
key.index1 = sq->sqn;
|
||||
key.num_of_obj1 = 1;
|
||||
|
||||
err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = mlx5e_reporter_named_obj_nest_end(fmsg);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = mlx5e_reporter_named_obj_nest_start(fmsg, "send_buff");
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
key.rsc = MLX5_SGMT_TYPE_SND_BUFF;
|
||||
key.num_of_obj2 = MLX5_RSC_DUMP_ALL;
|
||||
err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = mlx5e_reporter_named_obj_nest_end(fmsg);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return mlx5e_reporter_named_obj_nest_end(fmsg);
|
||||
}
|
||||
|
||||
static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv,
|
||||
struct devlink_fmsg *fmsg)
|
||||
{
|
||||
struct mlx5_rsc_key key = {};
|
||||
int i, tc, err;
|
||||
|
||||
if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
|
||||
return 0;
|
||||
|
||||
err = mlx5e_reporter_named_obj_nest_start(fmsg, "SX Slice");
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
key.size = PAGE_SIZE;
|
||||
key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL;
|
||||
err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = mlx5e_reporter_named_obj_nest_end(fmsg);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs");
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
for (i = 0; i < priv->channels.num; i++) {
|
||||
struct mlx5e_channel *c = priv->channels.c[i];
|
||||
|
||||
for (tc = 0; tc < priv->channels.params.num_tc; tc++) {
|
||||
struct mlx5e_txqsq *sq = &c->sq[tc];
|
||||
|
||||
err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "SQ");
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
}
|
||||
return devlink_fmsg_arr_pair_nest_end(fmsg);
|
||||
}
|
||||
|
||||
static int mlx5e_tx_reporter_dump_from_ctx(struct mlx5e_priv *priv,
|
||||
struct mlx5e_err_ctx *err_ctx,
|
||||
struct devlink_fmsg *fmsg)
|
||||
{
|
||||
return err_ctx->dump(priv, fmsg, err_ctx->ctx);
|
||||
}
|
||||
|
||||
static int mlx5e_tx_reporter_dump(struct devlink_health_reporter *reporter,
|
||||
struct devlink_fmsg *fmsg, void *context,
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
|
||||
struct mlx5e_err_ctx *err_ctx = context;
|
||||
|
||||
return err_ctx ? mlx5e_tx_reporter_dump_from_ctx(priv, err_ctx, fmsg) :
|
||||
mlx5e_tx_reporter_dump_all_sqs(priv, fmsg);
|
||||
}
|
||||
|
||||
void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq)
|
||||
{
|
||||
struct mlx5e_priv *priv = sq->channel->priv;
|
||||
@ -254,6 +374,7 @@ void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq)
|
||||
|
||||
err_ctx.ctx = sq;
|
||||
err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover;
|
||||
err_ctx.dump = mlx5e_tx_reporter_dump_sq;
|
||||
sprintf(err_str, "ERR CQE on SQ: 0x%x", sq->sqn);
|
||||
|
||||
mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
|
||||
@ -267,6 +388,7 @@ int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq)
|
||||
|
||||
err_ctx.ctx = sq;
|
||||
err_ctx.recover = mlx5e_tx_reporter_timeout_recover;
|
||||
err_ctx.dump = mlx5e_tx_reporter_dump_sq;
|
||||
sprintf(err_str,
|
||||
"TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n",
|
||||
sq->channel->ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
|
||||
@ -279,6 +401,7 @@ static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = {
|
||||
.name = "tx",
|
||||
.recover = mlx5e_tx_reporter_recover,
|
||||
.diagnose = mlx5e_tx_reporter_diagnose,
|
||||
.dump = mlx5e_tx_reporter_dump,
|
||||
};
|
||||
|
||||
#define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500
|
||||
|
Loading…
Reference in New Issue
Block a user