mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-28 11:18:45 +07:00
net/mlx5: Add Crdump support
Crdump allows the driver to retrieve a dump of the FW PCI crspace. This is useful in case of catastrophic issues which may require FW reset. The crspace dump can be used for later debug. Signed-off-by: Alex Vesker <valex@mellanox.com> Signed-off-by: Moshe Shemesh <moshe@mellanox.com> Reviewed-by: Feras Daoud <ferasda@mellanox.com> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
This commit is contained in:
parent
b25bbc2f24
commit
8b9d8baae1
@ -16,7 +16,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
|
||||
transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
|
||||
fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
|
||||
lib/devcom.o lib/pci_vsc.o diag/fs_tracepoint.o \
|
||||
diag/fw_tracer.o devlink.o
|
||||
diag/fw_tracer.o diag/crdump.o devlink.o
|
||||
|
||||
#
|
||||
# Netdev basic
|
||||
|
106
drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c
Normal file
106
drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c
Normal file
@ -0,0 +1,106 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
|
||||
/* Copyright (c) 2019 Mellanox Technologies */
|
||||
|
||||
#include <linux/mlx5/driver.h>
|
||||
#include "mlx5_core.h"
|
||||
#include "lib/pci_vsc.h"
|
||||
#include "lib/mlx5.h"
|
||||
|
||||
#define BAD_ACCESS 0xBADACCE5
|
||||
#define MLX5_PROTECTED_CR_SCAN_CRSPACE 0x7
|
||||
|
||||
static bool mlx5_crdump_enabled(struct mlx5_core_dev *dev)
|
||||
{
|
||||
return !!dev->priv.health.crdump_size;
|
||||
}
|
||||
|
||||
static int mlx5_crdump_fill(struct mlx5_core_dev *dev, u32 *cr_data)
|
||||
{
|
||||
u32 crdump_size = dev->priv.health.crdump_size;
|
||||
int i, ret;
|
||||
|
||||
for (i = 0; i < (crdump_size / 4); i++)
|
||||
cr_data[i] = BAD_ACCESS;
|
||||
|
||||
ret = mlx5_vsc_gw_read_block_fast(dev, cr_data, crdump_size);
|
||||
if (ret <= 0) {
|
||||
if (ret == 0)
|
||||
return -EIO;
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (crdump_size != ret) {
|
||||
mlx5_core_warn(dev, "failed to read full dump, read %d out of %u\n",
|
||||
ret, crdump_size);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mlx5_crdump_collect(struct mlx5_core_dev *dev, u32 *cr_data)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!mlx5_crdump_enabled(dev))
|
||||
return -ENODEV;
|
||||
|
||||
ret = mlx5_vsc_gw_lock(dev);
|
||||
if (ret) {
|
||||
mlx5_core_warn(dev, "crdump: failed to lock vsc gw err %d\n",
|
||||
ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = mlx5_vsc_gw_set_space(dev, MLX5_VSC_SPACE_SCAN_CRSPACE, NULL);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
ret = mlx5_crdump_fill(dev, cr_data);
|
||||
|
||||
unlock:
|
||||
mlx5_vsc_gw_unlock(dev);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int mlx5_crdump_enable(struct mlx5_core_dev *dev)
|
||||
{
|
||||
struct mlx5_priv *priv = &dev->priv;
|
||||
u32 space_size;
|
||||
int ret;
|
||||
|
||||
if (!mlx5_core_is_pf(dev) || !mlx5_vsc_accessible(dev) ||
|
||||
mlx5_crdump_enabled(dev))
|
||||
return 0;
|
||||
|
||||
ret = mlx5_vsc_gw_lock(dev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Check if space is supported and get space size */
|
||||
ret = mlx5_vsc_gw_set_space(dev, MLX5_VSC_SPACE_SCAN_CRSPACE,
|
||||
&space_size);
|
||||
if (ret) {
|
||||
/* Unlock and mask error since space is not supported */
|
||||
mlx5_vsc_gw_unlock(dev);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!space_size) {
|
||||
mlx5_core_warn(dev, "Invalid Crspace size, zero\n");
|
||||
mlx5_vsc_gw_unlock(dev);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ret = mlx5_vsc_gw_unlock(dev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
priv->health.crdump_size = space_size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void mlx5_crdump_disable(struct mlx5_core_dev *dev)
|
||||
{
|
||||
dev->priv.health.crdump_size = 0;
|
||||
}
|
@ -41,6 +41,9 @@ int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count);
|
||||
void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count);
|
||||
int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index);
|
||||
void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index);
|
||||
int mlx5_crdump_enable(struct mlx5_core_dev *dev);
|
||||
void mlx5_crdump_disable(struct mlx5_core_dev *dev);
|
||||
int mlx5_crdump_collect(struct mlx5_core_dev *dev, u32 *cr_data);
|
||||
|
||||
/* TODO move to lib/events.h */
|
||||
|
||||
|
@ -1313,6 +1313,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id)
|
||||
if (err)
|
||||
goto clean_load;
|
||||
|
||||
err = mlx5_crdump_enable(dev);
|
||||
if (err)
|
||||
dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err);
|
||||
|
||||
pci_save_state(pdev);
|
||||
return 0;
|
||||
|
||||
@ -1334,6 +1338,7 @@ static void remove_one(struct pci_dev *pdev)
|
||||
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
|
||||
struct devlink *devlink = priv_to_devlink(dev);
|
||||
|
||||
mlx5_crdump_disable(dev);
|
||||
mlx5_devlink_unregister(devlink);
|
||||
mlx5_unregister_device(dev);
|
||||
|
||||
|
@ -435,6 +435,7 @@ struct mlx5_core_health {
|
||||
u32 prev;
|
||||
int miss_counter;
|
||||
bool sick;
|
||||
u32 crdump_size;
|
||||
/* wq spinlock to synchronize draining */
|
||||
spinlock_t wq_lock;
|
||||
struct workqueue_struct *wq;
|
||||
|
Loading…
Reference in New Issue
Block a user