mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-16 18:26:43 +07:00
net: qed: introduce critical hardware error handler
MCP may signal driver about generic critical failure. Driver has to collect mdump information (get_retain), it pushes that to logs and triggers generic notification on "hardware attention" event. Signed-off-by: Ariel Elior <ariel.elior@marvell.com> Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com> Signed-off-by: Igor Russkikh <irusskikh@marvell.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
3e99c21110
commit
ebf64bf4df
@ -12400,6 +12400,13 @@ struct load_rsp_stc {
|
||||
#define LOAD_RSP_FLAGS0_DRV_EXISTS (0x1 << 0)
|
||||
};
|
||||
|
||||
struct mdump_retain_data_stc {
|
||||
u32 valid;
|
||||
u32 epoch;
|
||||
u32 pf;
|
||||
u32 status;
|
||||
};
|
||||
|
||||
union drv_union_data {
|
||||
u32 ver_str[MCP_DRV_VER_STR_SIZE_DWORD];
|
||||
struct mcp_mac wol_mac;
|
||||
@ -12488,6 +12495,8 @@ struct public_drv_mb {
|
||||
#define DRV_MSG_CODE_BIST_TEST 0x001e0000
|
||||
#define DRV_MSG_CODE_SET_LED_MODE 0x00200000
|
||||
#define DRV_MSG_CODE_RESOURCE_CMD 0x00230000
|
||||
/* Send crash dump commands with param[3:0] - opcode */
|
||||
#define DRV_MSG_CODE_MDUMP_CMD 0x00250000
|
||||
#define DRV_MSG_CODE_GET_TLV_DONE 0x002f0000
|
||||
#define DRV_MSG_CODE_GET_ENGINE_CONFIG 0x00370000
|
||||
#define DRV_MSG_CODE_GET_PPFID_BITMAP 0x43000000
|
||||
@ -12519,6 +12528,21 @@ struct public_drv_mb {
|
||||
|
||||
#define RESOURCE_DUMP 0
|
||||
|
||||
/* DRV_MSG_CODE_MDUMP_CMD parameters */
|
||||
#define MDUMP_DRV_PARAM_OPCODE_MASK 0x0000000f
|
||||
#define DRV_MSG_CODE_MDUMP_ACK 0x01
|
||||
#define DRV_MSG_CODE_MDUMP_SET_VALUES 0x02
|
||||
#define DRV_MSG_CODE_MDUMP_TRIGGER 0x03
|
||||
#define DRV_MSG_CODE_MDUMP_GET_CONFIG 0x04
|
||||
#define DRV_MSG_CODE_MDUMP_SET_ENABLE 0x05
|
||||
#define DRV_MSG_CODE_MDUMP_CLEAR_LOGS 0x06
|
||||
#define DRV_MSG_CODE_MDUMP_GET_RETAIN 0x07
|
||||
#define DRV_MSG_CODE_MDUMP_CLR_RETAIN 0x08
|
||||
|
||||
#define DRV_MSG_CODE_HW_DUMP_TRIGGER 0x0a
|
||||
#define DRV_MSG_CODE_MDUMP_GEN_MDUMP2 0x0b
|
||||
#define DRV_MSG_CODE_MDUMP_FREE_MDUMP2 0x0c
|
||||
|
||||
#define DRV_MSG_CODE_GET_PF_RDMA_PROTOCOL 0x002b0000
|
||||
#define DRV_MSG_CODE_OS_WOL 0x002e0000
|
||||
|
||||
@ -12697,6 +12721,8 @@ struct public_drv_mb {
|
||||
#define FW_MSG_CODE_DEBUG_NOT_ENABLED 0xb00a0000
|
||||
#define FW_MSG_CODE_DEBUG_DATA_SEND_OK 0xb00b0000
|
||||
|
||||
#define FW_MSG_CODE_MDUMP_INVALID_CMD 0x00030000
|
||||
|
||||
u32 fw_mb_param;
|
||||
#define FW_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_MASK 0xFFFF0000
|
||||
#define FW_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_SHIFT 16
|
||||
@ -12763,7 +12789,7 @@ enum MFW_DRV_MSG_TYPE {
|
||||
MFW_DRV_MSG_GET_RDMA_STATS,
|
||||
MFW_DRV_MSG_FAILURE_DETECTED,
|
||||
MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE,
|
||||
MFW_DRV_MSG_BW_UPDATE11,
|
||||
MFW_DRV_MSG_CRITICAL_ERROR_OCCURRED,
|
||||
MFW_DRV_MSG_RESERVED,
|
||||
MFW_DRV_MSG_GET_TLV_REQ,
|
||||
MFW_DRV_MSG_OEM_CFG_UPDATE,
|
||||
|
@ -1717,6 +1717,116 @@ static void qed_mcp_handle_fan_failure(struct qed_hwfn *p_hwfn,
|
||||
"Fan failure was detected on the network interface card and it's going to be shut down.\n");
|
||||
}
|
||||
|
||||
struct qed_mdump_cmd_params {
|
||||
u32 cmd;
|
||||
void *p_data_src;
|
||||
u8 data_src_size;
|
||||
void *p_data_dst;
|
||||
u8 data_dst_size;
|
||||
u32 mcp_resp;
|
||||
};
|
||||
|
||||
static int
|
||||
qed_mcp_mdump_cmd(struct qed_hwfn *p_hwfn,
|
||||
struct qed_ptt *p_ptt,
|
||||
struct qed_mdump_cmd_params *p_mdump_cmd_params)
|
||||
{
|
||||
struct qed_mcp_mb_params mb_params;
|
||||
int rc;
|
||||
|
||||
memset(&mb_params, 0, sizeof(mb_params));
|
||||
mb_params.cmd = DRV_MSG_CODE_MDUMP_CMD;
|
||||
mb_params.param = p_mdump_cmd_params->cmd;
|
||||
mb_params.p_data_src = p_mdump_cmd_params->p_data_src;
|
||||
mb_params.data_src_size = p_mdump_cmd_params->data_src_size;
|
||||
mb_params.p_data_dst = p_mdump_cmd_params->p_data_dst;
|
||||
mb_params.data_dst_size = p_mdump_cmd_params->data_dst_size;
|
||||
rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
p_mdump_cmd_params->mcp_resp = mb_params.mcp_resp;
|
||||
|
||||
if (p_mdump_cmd_params->mcp_resp == FW_MSG_CODE_MDUMP_INVALID_CMD) {
|
||||
DP_INFO(p_hwfn,
|
||||
"The mdump sub command is unsupported by the MFW [mdump_cmd 0x%x]\n",
|
||||
p_mdump_cmd_params->cmd);
|
||||
rc = -EOPNOTSUPP;
|
||||
} else if (p_mdump_cmd_params->mcp_resp == FW_MSG_CODE_UNSUPPORTED) {
|
||||
DP_INFO(p_hwfn,
|
||||
"The mdump command is not supported by the MFW\n");
|
||||
rc = -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int qed_mcp_mdump_ack(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
|
||||
{
|
||||
struct qed_mdump_cmd_params mdump_cmd_params;
|
||||
|
||||
memset(&mdump_cmd_params, 0, sizeof(mdump_cmd_params));
|
||||
mdump_cmd_params.cmd = DRV_MSG_CODE_MDUMP_ACK;
|
||||
|
||||
return qed_mcp_mdump_cmd(p_hwfn, p_ptt, &mdump_cmd_params);
|
||||
}
|
||||
|
||||
int
|
||||
qed_mcp_mdump_get_retain(struct qed_hwfn *p_hwfn,
|
||||
struct qed_ptt *p_ptt,
|
||||
struct mdump_retain_data_stc *p_mdump_retain)
|
||||
{
|
||||
struct qed_mdump_cmd_params mdump_cmd_params;
|
||||
int rc;
|
||||
|
||||
memset(&mdump_cmd_params, 0, sizeof(mdump_cmd_params));
|
||||
mdump_cmd_params.cmd = DRV_MSG_CODE_MDUMP_GET_RETAIN;
|
||||
mdump_cmd_params.p_data_dst = p_mdump_retain;
|
||||
mdump_cmd_params.data_dst_size = sizeof(*p_mdump_retain);
|
||||
|
||||
rc = qed_mcp_mdump_cmd(p_hwfn, p_ptt, &mdump_cmd_params);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
if (mdump_cmd_params.mcp_resp != FW_MSG_CODE_OK) {
|
||||
DP_INFO(p_hwfn,
|
||||
"Failed to get the mdump retained data [mcp_resp 0x%x]\n",
|
||||
mdump_cmd_params.mcp_resp);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void qed_mcp_handle_critical_error(struct qed_hwfn *p_hwfn,
|
||||
struct qed_ptt *p_ptt)
|
||||
{
|
||||
struct mdump_retain_data_stc mdump_retain;
|
||||
int rc;
|
||||
|
||||
/* In CMT mode - no need for more than a single acknowledgment to the
|
||||
* MFW, and no more than a single notification to the upper driver.
|
||||
*/
|
||||
if (p_hwfn != QED_LEADING_HWFN(p_hwfn->cdev))
|
||||
return;
|
||||
|
||||
rc = qed_mcp_mdump_get_retain(p_hwfn, p_ptt, &mdump_retain);
|
||||
if (rc == 0 && mdump_retain.valid)
|
||||
DP_NOTICE(p_hwfn,
|
||||
"The MFW notified that a critical error occurred in the device [epoch 0x%08x, pf 0x%x, status 0x%08x]\n",
|
||||
mdump_retain.epoch,
|
||||
mdump_retain.pf, mdump_retain.status);
|
||||
else
|
||||
DP_NOTICE(p_hwfn,
|
||||
"The MFW notified that a critical error occurred in the device\n");
|
||||
|
||||
DP_NOTICE(p_hwfn,
|
||||
"Acknowledging the notification to not allow the MFW crash dump [driver debug data collection is preferable]\n");
|
||||
qed_mcp_mdump_ack(p_hwfn, p_ptt);
|
||||
|
||||
qed_hw_err_notify(p_hwfn, p_ptt, QED_HW_ERR_HW_ATTN, NULL);
|
||||
}
|
||||
|
||||
void qed_mcp_read_ufp_config(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
|
||||
{
|
||||
struct public_func shmem_info;
|
||||
@ -1866,6 +1976,9 @@ int qed_mcp_handle_events(struct qed_hwfn *p_hwfn,
|
||||
case MFW_DRV_MSG_FAILURE_DETECTED:
|
||||
qed_mcp_handle_fan_failure(p_hwfn, p_ptt);
|
||||
break;
|
||||
case MFW_DRV_MSG_CRITICAL_ERROR_OCCURRED:
|
||||
qed_mcp_handle_critical_error(p_hwfn, p_ptt);
|
||||
break;
|
||||
case MFW_DRV_MSG_GET_TLV_REQ:
|
||||
qed_mfw_tlv_req(p_hwfn);
|
||||
break;
|
||||
|
@ -1016,6 +1016,19 @@ int __qed_configure_pf_min_bandwidth(struct qed_hwfn *p_hwfn,
|
||||
int qed_mcp_mask_parities(struct qed_hwfn *p_hwfn,
|
||||
struct qed_ptt *p_ptt, u32 mask_parities);
|
||||
|
||||
/* @brief - Gets the mdump retained data from the MFW.
|
||||
*
|
||||
* @param p_hwfn
|
||||
* @param p_ptt
|
||||
* @param p_mdump_retain
|
||||
*
|
||||
* @param return 0 upon success.
|
||||
*/
|
||||
int
|
||||
qed_mcp_mdump_get_retain(struct qed_hwfn *p_hwfn,
|
||||
struct qed_ptt *p_ptt,
|
||||
struct mdump_retain_data_stc *p_mdump_retain);
|
||||
|
||||
/**
|
||||
* @brief - Sets the MFW's max value for the given resource
|
||||
*
|
||||
|
Loading…
Reference in New Issue
Block a user