net/mlx5e: Support dump callback in TX reporter

Add support for SQ's FW dump on TX reporter's events. Use Resource dump
API to retrieve the relevant data: SX slice, SQ dump and SQ buffer. Wrap
it in formatted messages and store the binary output in devlink core.

Example:
$ devlink health dump show pci/0000:00:0b.0 reporter tx
SX Slice:
   data:
     00 00 00 00 00 00 00 80 00 01 00 00 00 00 ad de
     22 01 00 00 00 00 ad de 00 00 00 00 00 00 00 00
     00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
     ff ff ff ff 01 00 00 00 00 00 00 00 00 00 00 00
     00 02 01 00 00 00 00 80 00 01 00 00 00 00 ad de
     22 01 00 00 00 00 ad de 00 20 40 90 81 88 ff ff
     00 00 00 00 00 00 00 00 15 00 15 00 00 00 00 00
     ff ff ff ff 01 00 00 00 00 00 00 00 00 00 00 00
     00 00 00 00 00 00 00 80 81 ae 41 06 00 ea ff ff
  SQs:
    SQ:
      index: 1511
      data:
        00 00 00 00 00 00 00 80 00 01 00 00 00 00 ad de
        22 01 00 00 00 00 ad de 00 00 00 00 00 00 00 00
        00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
        ff ff ff ff 01 00 00 00 00 00 00 00 00 00 00 00
        00 02 01 00 00 00 00 80 00 01 00 00 00 00 ad de
        22 01 00 00 00 00 ad de 00 20 40 90 81 88 ff ff
        00 00 00 00 00 00 00 00 15 00 15 00 00 00 00 00
        ff ff ff ff 01 00 00 00 00 00 00 00 00 00 00 00
        00 00 00 00 00 00 00 80 81 ae 41 06 00 ea ff ff
    SQ:
      index: 1516
      data:
        00 00 00 00 00 00 00 80 00 01 00 00 00 00 ad de
        22 01 00 00 00 00 ad de 00 00 00 00 00 00 00 00
        00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
        ff ff ff ff 01 00 00 00 00 00 00 00 00 00 00 00
        00 02 01 00 00 00 00 80 00 01 00 00 00 00 ad de
        22 01 00 00 00 00 ad de 00 20 40 90 81 88 ff ff
        00 00 00 00 00 00 00 00 15 00 15 00 00 00 00 00
        ff ff ff ff 01 00 00 00 00 00 00 00 00 00 00 00
        00 00 00 00 00 00 00 80 81 ae 41 06 00 ea ff ff

$ devlink health dump show pci/0000:00:0b.0 reporter tx -jp
{
    "SX Slice": {
    	"data": [ 0,0,0,0,0,0,0,128,0,1,0,0,0,0,173,222,34,1,0,0,0,0,173,222,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,0,0,128,0,1,0,0,0,0,173,222,34,1,0,0,0,0,173,222,0,32,64,144,129,136,255,255,0,0,0,0,0,0,0,0,21,0,21,0,0,0,0,0,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,129,174,65,6,0,234,255,255],
    	},
    "SQs": [ {
            "SQ": {
                "index": 1511,
                "data": [ 0,0,0,0,0,0,0,128,0,1,0,0,0,0,173,222,34,1,0,0,0,0,173,222,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,0,0,128,0,1,0,0,0,0,173,222,34,1,0,0,0,0,173,222,0,32,64,144,129,136,255,255,0,0,0,0,0,0,0,0,21,0,21,0,0,0,0,0,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,129,174,65,6,0,234,255,255]
            }
        },{
            "SQ": {
                "index": 1516,
                "data": [ 0,0,0,0,0,0,0,128,0,1,0,0,0,0,173,222,34,1,0,0,0,0,173,222,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,0,0,128,0,1,0,0,0,0,173,222,34,1,0,0,0,0,173,222,0,32,64,144,129,136,255,255,0,0,0,0,0,0,0,0,21,0,21,0,0,0,0,0,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,129,174,65,6,0,234,255,255]
            }
        } ]
}

Signed-off-by: Aya Levin <ayal@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
This commit is contained in:
Aya Levin 2020-02-11 14:32:45 -08:00 committed by Saeed Mahameed
parent 0a56be3c88
commit 5f29458b77
3 changed files with 234 additions and 2 deletions

View File

@ -3,6 +3,7 @@
#include "health.h"
#include "lib/eq.h"
#include "lib/mlx5.h"
int mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name)
{
@ -204,3 +205,107 @@ int mlx5e_health_report(struct mlx5e_priv *priv,
return devlink_health_report(reporter, err_str, err_ctx);
}
#define MLX5_HEALTH_DEVLINK_MAX_SIZE 1024
static int mlx5e_health_rsc_fmsg_binary(struct devlink_fmsg *fmsg,
const void *value, u32 value_len)
{
u32 data_size;
u32 offset;
int err;
for (offset = 0; offset < value_len; offset += data_size) {
data_size = value_len - offset;
if (data_size > MLX5_HEALTH_DEVLINK_MAX_SIZE)
data_size = MLX5_HEALTH_DEVLINK_MAX_SIZE;
err = devlink_fmsg_binary_put(fmsg, value + offset, data_size);
if (err)
break;
}
return err;
}
int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key,
struct devlink_fmsg *fmsg)
{
struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5_rsc_dump_cmd *cmd;
struct page *page;
int cmd_err, err;
int end_err;
int size;
if (IS_ERR_OR_NULL(mdev->rsc_dump))
return -EOPNOTSUPP;
page = alloc_page(GFP_KERNEL);
if (!page)
return -ENOMEM;
err = devlink_fmsg_binary_pair_nest_start(fmsg, "data");
if (err)
return err;
cmd = mlx5_rsc_dump_cmd_create(mdev, key);
if (IS_ERR(cmd)) {
err = PTR_ERR(cmd);
goto free_page;
}
do {
cmd_err = mlx5_rsc_dump_next(mdev, cmd, page, &size);
if (cmd_err < 0) {
err = cmd_err;
goto destroy_cmd;
}
err = mlx5e_health_rsc_fmsg_binary(fmsg, page_address(page), size);
if (err)
goto destroy_cmd;
} while (cmd_err > 0);
destroy_cmd:
mlx5_rsc_dump_cmd_destroy(cmd);
end_err = devlink_fmsg_binary_pair_nest_end(fmsg);
if (end_err)
err = end_err;
free_page:
__free_page(page);
return err;
}
int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
int queue_idx, char *lbl)
{
struct mlx5_rsc_key key = {};
int err;
key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
key.index1 = queue_idx;
key.size = PAGE_SIZE;
key.num_of_obj1 = 1;
err = devlink_fmsg_obj_nest_start(fmsg);
if (err)
return err;
err = mlx5e_reporter_named_obj_nest_start(fmsg, lbl);
if (err)
return err;
err = devlink_fmsg_u32_pair_put(fmsg, "index", queue_idx);
if (err)
return err;
err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
if (err)
return err;
err = mlx5e_reporter_named_obj_nest_end(fmsg);
if (err)
return err;
return devlink_fmsg_obj_nest_end(fmsg);
}

View File

@ -5,6 +5,7 @@
#define __MLX5E_EN_HEALTH_H
#include "en.h"
#include "diag/rsc_dump.h"
#define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)
@ -36,6 +37,7 @@ void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq);
struct mlx5e_err_ctx {
int (*recover)(void *ctx);
int (*dump)(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, void *ctx);
void *ctx;
};
@ -48,6 +50,8 @@ int mlx5e_health_report(struct mlx5e_priv *priv,
int mlx5e_health_create_reporters(struct mlx5e_priv *priv);
void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv);
void mlx5e_health_channels_update(struct mlx5e_priv *priv);
int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key,
struct devlink_fmsg *fmsg);
int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
int queue_idx, char *lbl);
#endif

View File

@ -246,6 +246,126 @@ static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
return err;
}
static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
void *ctx)
{
struct mlx5_rsc_key key = {};
struct mlx5e_txqsq *sq = ctx;
int err;
if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
return 0;
err = mlx5e_reporter_named_obj_nest_start(fmsg, "SX Slice");
if (err)
return err;
key.size = PAGE_SIZE;
key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL;
err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
if (err)
return err;
err = mlx5e_reporter_named_obj_nest_end(fmsg);
if (err)
return err;
err = mlx5e_reporter_named_obj_nest_start(fmsg, "SQ");
if (err)
return err;
err = mlx5e_reporter_named_obj_nest_start(fmsg, "QPC");
if (err)
return err;
key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
key.index1 = sq->sqn;
key.num_of_obj1 = 1;
err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
if (err)
return err;
err = mlx5e_reporter_named_obj_nest_end(fmsg);
if (err)
return err;
err = mlx5e_reporter_named_obj_nest_start(fmsg, "send_buff");
if (err)
return err;
key.rsc = MLX5_SGMT_TYPE_SND_BUFF;
key.num_of_obj2 = MLX5_RSC_DUMP_ALL;
err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
if (err)
return err;
err = mlx5e_reporter_named_obj_nest_end(fmsg);
if (err)
return err;
return mlx5e_reporter_named_obj_nest_end(fmsg);
}
static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv,
struct devlink_fmsg *fmsg)
{
struct mlx5_rsc_key key = {};
int i, tc, err;
if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
return 0;
err = mlx5e_reporter_named_obj_nest_start(fmsg, "SX Slice");
if (err)
return err;
key.size = PAGE_SIZE;
key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL;
err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
if (err)
return err;
err = mlx5e_reporter_named_obj_nest_end(fmsg);
if (err)
return err;
err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs");
if (err)
return err;
for (i = 0; i < priv->channels.num; i++) {
struct mlx5e_channel *c = priv->channels.c[i];
for (tc = 0; tc < priv->channels.params.num_tc; tc++) {
struct mlx5e_txqsq *sq = &c->sq[tc];
err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "SQ");
if (err)
return err;
}
}
return devlink_fmsg_arr_pair_nest_end(fmsg);
}
static int mlx5e_tx_reporter_dump_from_ctx(struct mlx5e_priv *priv,
struct mlx5e_err_ctx *err_ctx,
struct devlink_fmsg *fmsg)
{
return err_ctx->dump(priv, fmsg, err_ctx->ctx);
}
static int mlx5e_tx_reporter_dump(struct devlink_health_reporter *reporter,
struct devlink_fmsg *fmsg, void *context,
struct netlink_ext_ack *extack)
{
struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
struct mlx5e_err_ctx *err_ctx = context;
return err_ctx ? mlx5e_tx_reporter_dump_from_ctx(priv, err_ctx, fmsg) :
mlx5e_tx_reporter_dump_all_sqs(priv, fmsg);
}
void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq)
{
struct mlx5e_priv *priv = sq->channel->priv;
@ -254,6 +374,7 @@ void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq)
err_ctx.ctx = sq;
err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover;
err_ctx.dump = mlx5e_tx_reporter_dump_sq;
sprintf(err_str, "ERR CQE on SQ: 0x%x", sq->sqn);
mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
@ -267,6 +388,7 @@ int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq)
err_ctx.ctx = sq;
err_ctx.recover = mlx5e_tx_reporter_timeout_recover;
err_ctx.dump = mlx5e_tx_reporter_dump_sq;
sprintf(err_str,
"TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n",
sq->channel->ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
@ -279,6 +401,7 @@ static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = {
.name = "tx",
.recover = mlx5e_tx_reporter_recover,
.diagnose = mlx5e_tx_reporter_diagnose,
.dump = mlx5e_tx_reporter_dump,
};
#define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500