mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-28 11:18:45 +07:00
Merge branch 'net-qed-qede-critical-hw-error-handling'
Igor Russkikh says: ==================== net: qed/qede: critical hw error handling FastLinQ devices as a complex systems may observe various hardware level error conditions, both severe and recoverable. Driver is able to detect and report this, but so far it only did trace/dmesg based reporting. Here we implement an extended hw error detection, service task handler captures a dump for the later analysis. I also resubmit a patch from Denis Bolotin on tx timeout handler, addressing David's comment regarding recovery procedure as an extra reaction on this event. v2: Removing the patch with ethtool dump and udev magic. Its quite isolated, I'm working on devlink based logic for this separately. v1: https://patchwork.ozlabs.org/project/netdev/cover/cover.1588758463.git.irusskikh@marvell.com/ ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
86b6ba171d
@ -740,12 +740,6 @@ struct qed_dbg_feature {
|
||||
u32 dumped_dwords;
|
||||
};
|
||||
|
||||
struct qed_dbg_params {
|
||||
struct qed_dbg_feature features[DBG_FEATURE_NUM];
|
||||
u8 engine_for_debug;
|
||||
bool print_data;
|
||||
};
|
||||
|
||||
struct qed_dev {
|
||||
u32 dp_module;
|
||||
u8 dp_level;
|
||||
@ -844,6 +838,9 @@ struct qed_dev {
|
||||
/* Recovery */
|
||||
bool recov_in_prog;
|
||||
|
||||
/* Indicates whether should prevent attentions from being reasserted */
|
||||
bool attn_clr_en;
|
||||
|
||||
/* LLH info */
|
||||
u8 ppfid_bitmap;
|
||||
struct qed_llh_info *p_llh_info;
|
||||
@ -872,17 +869,18 @@ struct qed_dev {
|
||||
} protocol_ops;
|
||||
void *ops_cookie;
|
||||
|
||||
struct qed_dbg_params dbg_params;
|
||||
|
||||
#ifdef CONFIG_QED_LL2
|
||||
struct qed_cb_ll2_info *ll2;
|
||||
u8 ll2_mac_address[ETH_ALEN];
|
||||
#endif
|
||||
struct qed_dbg_feature dbg_features[DBG_FEATURE_NUM];
|
||||
u8 engine_for_debug;
|
||||
bool disable_ilt_dump;
|
||||
DECLARE_HASHTABLE(connections, 10);
|
||||
const struct firmware *firmware;
|
||||
|
||||
bool print_dbg_data;
|
||||
|
||||
u32 rdma_max_sge;
|
||||
u32 rdma_max_inline;
|
||||
u32 rdma_max_srq_sge;
|
||||
@ -1020,6 +1018,8 @@ u32 qed_unzip_data(struct qed_hwfn *p_hwfn,
|
||||
u32 input_len, u8 *input_buf,
|
||||
u32 max_size, u8 *unzip_buf);
|
||||
void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn);
|
||||
void qed_hw_error_occurred(struct qed_hwfn *p_hwfn,
|
||||
enum qed_hw_err_type err_type);
|
||||
void qed_get_protocol_stats(struct qed_dev *cdev,
|
||||
enum qed_mcp_protocol_type type,
|
||||
union qed_mcp_protocol_stats *stats);
|
||||
|
@ -7453,7 +7453,7 @@ static enum dbg_status format_feature(struct qed_hwfn *p_hwfn,
|
||||
enum qed_dbg_features feature_idx)
|
||||
{
|
||||
struct qed_dbg_feature *feature =
|
||||
&p_hwfn->cdev->dbg_params.features[feature_idx];
|
||||
&p_hwfn->cdev->dbg_features[feature_idx];
|
||||
u32 text_size_bytes, null_char_pos, i;
|
||||
enum dbg_status rc;
|
||||
char *text_buf;
|
||||
@ -7502,7 +7502,7 @@ static enum dbg_status format_feature(struct qed_hwfn *p_hwfn,
|
||||
text_buf[i] = '\n';
|
||||
|
||||
/* Dump printable feature to log */
|
||||
if (p_hwfn->cdev->dbg_params.print_data)
|
||||
if (p_hwfn->cdev->print_dbg_data)
|
||||
qed_dbg_print_feature(text_buf, text_size_bytes);
|
||||
|
||||
/* Free the old dump_buf and point the dump_buf to the newly allocagted
|
||||
@ -7523,7 +7523,7 @@ static enum dbg_status qed_dbg_dump(struct qed_hwfn *p_hwfn,
|
||||
enum qed_dbg_features feature_idx)
|
||||
{
|
||||
struct qed_dbg_feature *feature =
|
||||
&p_hwfn->cdev->dbg_params.features[feature_idx];
|
||||
&p_hwfn->cdev->dbg_features[feature_idx];
|
||||
u32 buf_size_dwords;
|
||||
enum dbg_status rc;
|
||||
|
||||
@ -7648,7 +7648,7 @@ static int qed_dbg_nvm_image(struct qed_dev *cdev, void *buffer,
|
||||
enum qed_nvm_images image_id)
|
||||
{
|
||||
struct qed_hwfn *p_hwfn =
|
||||
&cdev->hwfns[cdev->dbg_params.engine_for_debug];
|
||||
&cdev->hwfns[cdev->engine_for_debug];
|
||||
u32 len_rounded, i;
|
||||
__be32 val;
|
||||
int rc;
|
||||
@ -7780,7 +7780,7 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer)
|
||||
{
|
||||
u8 cur_engine, omit_engine = 0, org_engine;
|
||||
struct qed_hwfn *p_hwfn =
|
||||
&cdev->hwfns[cdev->dbg_params.engine_for_debug];
|
||||
&cdev->hwfns[cdev->engine_for_debug];
|
||||
struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
|
||||
int grc_params[MAX_DBG_GRC_PARAMS], i;
|
||||
u32 offset = 0, feature_size;
|
||||
@ -8000,7 +8000,7 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer)
|
||||
int qed_dbg_all_data_size(struct qed_dev *cdev)
|
||||
{
|
||||
struct qed_hwfn *p_hwfn =
|
||||
&cdev->hwfns[cdev->dbg_params.engine_for_debug];
|
||||
&cdev->hwfns[cdev->engine_for_debug];
|
||||
u32 regs_len = 0, image_len = 0, ilt_len = 0, total_ilt_len = 0;
|
||||
u8 cur_engine, org_engine;
|
||||
|
||||
@ -8059,9 +8059,9 @@ int qed_dbg_feature(struct qed_dev *cdev, void *buffer,
|
||||
enum qed_dbg_features feature, u32 *num_dumped_bytes)
|
||||
{
|
||||
struct qed_hwfn *p_hwfn =
|
||||
&cdev->hwfns[cdev->dbg_params.engine_for_debug];
|
||||
&cdev->hwfns[cdev->engine_for_debug];
|
||||
struct qed_dbg_feature *qed_feature =
|
||||
&cdev->dbg_params.features[feature];
|
||||
&cdev->dbg_features[feature];
|
||||
enum dbg_status dbg_rc;
|
||||
struct qed_ptt *p_ptt;
|
||||
int rc = 0;
|
||||
@ -8084,7 +8084,7 @@ int qed_dbg_feature(struct qed_dev *cdev, void *buffer,
|
||||
DP_VERBOSE(cdev, QED_MSG_DEBUG,
|
||||
"copying debugfs feature to external buffer\n");
|
||||
memcpy(buffer, qed_feature->dump_buf, qed_feature->buf_size);
|
||||
*num_dumped_bytes = cdev->dbg_params.features[feature].dumped_dwords *
|
||||
*num_dumped_bytes = cdev->dbg_features[feature].dumped_dwords *
|
||||
4;
|
||||
|
||||
out:
|
||||
@ -8095,7 +8095,7 @@ int qed_dbg_feature(struct qed_dev *cdev, void *buffer,
|
||||
int qed_dbg_feature_size(struct qed_dev *cdev, enum qed_dbg_features feature)
|
||||
{
|
||||
struct qed_hwfn *p_hwfn =
|
||||
&cdev->hwfns[cdev->dbg_params.engine_for_debug];
|
||||
&cdev->hwfns[cdev->engine_for_debug];
|
||||
struct qed_dbg_feature *qed_feature = &cdev->dbg_features[feature];
|
||||
struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
|
||||
u32 buf_size_dwords;
|
||||
@ -8120,14 +8120,14 @@ int qed_dbg_feature_size(struct qed_dev *cdev, enum qed_dbg_features feature)
|
||||
|
||||
u8 qed_get_debug_engine(struct qed_dev *cdev)
|
||||
{
|
||||
return cdev->dbg_params.engine_for_debug;
|
||||
return cdev->engine_for_debug;
|
||||
}
|
||||
|
||||
void qed_set_debug_engine(struct qed_dev *cdev, int engine_number)
|
||||
{
|
||||
DP_VERBOSE(cdev, QED_MSG_DEBUG, "set debug engine to %d\n",
|
||||
engine_number);
|
||||
cdev->dbg_params.engine_for_debug = engine_number;
|
||||
cdev->engine_for_debug = engine_number;
|
||||
}
|
||||
|
||||
void qed_dbg_pf_init(struct qed_dev *cdev)
|
||||
@ -8146,7 +8146,7 @@ void qed_dbg_pf_init(struct qed_dev *cdev)
|
||||
}
|
||||
|
||||
/* Set the hwfn to be 0 as default */
|
||||
cdev->dbg_params.engine_for_debug = 0;
|
||||
cdev->engine_for_debug = 0;
|
||||
}
|
||||
|
||||
void qed_dbg_pf_exit(struct qed_dev *cdev)
|
||||
|
@ -3085,7 +3085,9 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params)
|
||||
rc = qed_final_cleanup(p_hwfn, p_hwfn->p_main_ptt,
|
||||
p_hwfn->rel_pf_id, false);
|
||||
if (rc) {
|
||||
DP_NOTICE(p_hwfn, "Final cleanup failed\n");
|
||||
qed_hw_err_notify(p_hwfn, p_hwfn->p_main_ptt,
|
||||
QED_HW_ERR_RAMROD_FAIL,
|
||||
"Final cleanup failed\n");
|
||||
goto load_err;
|
||||
}
|
||||
}
|
||||
|
@ -12400,6 +12400,13 @@ struct load_rsp_stc {
|
||||
#define LOAD_RSP_FLAGS0_DRV_EXISTS (0x1 << 0)
|
||||
};
|
||||
|
||||
struct mdump_retain_data_stc {
|
||||
u32 valid;
|
||||
u32 epoch;
|
||||
u32 pf;
|
||||
u32 status;
|
||||
};
|
||||
|
||||
union drv_union_data {
|
||||
u32 ver_str[MCP_DRV_VER_STR_SIZE_DWORD];
|
||||
struct mcp_mac wol_mac;
|
||||
@ -12488,10 +12495,14 @@ struct public_drv_mb {
|
||||
#define DRV_MSG_CODE_BIST_TEST 0x001e0000
|
||||
#define DRV_MSG_CODE_SET_LED_MODE 0x00200000
|
||||
#define DRV_MSG_CODE_RESOURCE_CMD 0x00230000
|
||||
/* Send crash dump commands with param[3:0] - opcode */
|
||||
#define DRV_MSG_CODE_MDUMP_CMD 0x00250000
|
||||
#define DRV_MSG_CODE_GET_TLV_DONE 0x002f0000
|
||||
#define DRV_MSG_CODE_GET_ENGINE_CONFIG 0x00370000
|
||||
#define DRV_MSG_CODE_GET_PPFID_BITMAP 0x43000000
|
||||
|
||||
#define DRV_MSG_CODE_DEBUG_DATA_SEND 0xc0040000
|
||||
|
||||
#define RESOURCE_CMD_REQ_RESC_MASK 0x0000001F
|
||||
#define RESOURCE_CMD_REQ_RESC_SHIFT 0
|
||||
#define RESOURCE_CMD_REQ_OPCODE_MASK 0x000000E0
|
||||
@ -12517,6 +12528,21 @@ struct public_drv_mb {
|
||||
|
||||
#define RESOURCE_DUMP 0
|
||||
|
||||
/* DRV_MSG_CODE_MDUMP_CMD parameters */
|
||||
#define MDUMP_DRV_PARAM_OPCODE_MASK 0x0000000f
|
||||
#define DRV_MSG_CODE_MDUMP_ACK 0x01
|
||||
#define DRV_MSG_CODE_MDUMP_SET_VALUES 0x02
|
||||
#define DRV_MSG_CODE_MDUMP_TRIGGER 0x03
|
||||
#define DRV_MSG_CODE_MDUMP_GET_CONFIG 0x04
|
||||
#define DRV_MSG_CODE_MDUMP_SET_ENABLE 0x05
|
||||
#define DRV_MSG_CODE_MDUMP_CLEAR_LOGS 0x06
|
||||
#define DRV_MSG_CODE_MDUMP_GET_RETAIN 0x07
|
||||
#define DRV_MSG_CODE_MDUMP_CLR_RETAIN 0x08
|
||||
|
||||
#define DRV_MSG_CODE_HW_DUMP_TRIGGER 0x0a
|
||||
#define DRV_MSG_CODE_MDUMP_GEN_MDUMP2 0x0b
|
||||
#define DRV_MSG_CODE_MDUMP_FREE_MDUMP2 0x0c
|
||||
|
||||
#define DRV_MSG_CODE_GET_PF_RDMA_PROTOCOL 0x002b0000
|
||||
#define DRV_MSG_CODE_OS_WOL 0x002e0000
|
||||
|
||||
@ -12626,6 +12652,17 @@ struct public_drv_mb {
|
||||
#define DRV_MB_PARAM_FEATURE_SUPPORT_PORT_EEE 0x00000002
|
||||
#define DRV_MB_PARAM_FEATURE_SUPPORT_FUNC_VLINK 0x00010000
|
||||
|
||||
/* DRV_MSG_CODE_DEBUG_DATA_SEND parameters */
|
||||
#define DRV_MSG_CODE_DEBUG_DATA_SEND_SIZE_OFFSET 0
|
||||
#define DRV_MSG_CODE_DEBUG_DATA_SEND_SIZE_MASK 0xFF
|
||||
|
||||
/* Driver attributes params */
|
||||
#define DRV_MB_PARAM_ATTRIBUTE_KEY_OFFSET 0
|
||||
#define DRV_MB_PARAM_ATTRIBUTE_KEY_MASK 0x00FFFFFF
|
||||
#define DRV_MB_PARAM_ATTRIBUTE_CMD_OFFSET 24
|
||||
#define DRV_MB_PARAM_ATTRIBUTE_CMD_MASK 0xFF000000
|
||||
|
||||
#define DRV_MB_PARAM_NVM_CFG_OPTION_ID_OFFSET 0
|
||||
#define DRV_MB_PARAM_NVM_CFG_OPTION_ID_SHIFT 0
|
||||
#define DRV_MB_PARAM_NVM_CFG_OPTION_ID_MASK 0x0000FFFF
|
||||
#define DRV_MB_PARAM_NVM_CFG_OPTION_ALL_SHIFT 16
|
||||
@ -12678,6 +12715,14 @@ struct public_drv_mb {
|
||||
#define FW_MSG_CODE_DRV_CFG_PF_VFS_MSIX_DONE 0x00870000
|
||||
#define FW_MSG_SEQ_NUMBER_MASK 0x0000ffff
|
||||
|
||||
#define FW_MSG_CODE_DEBUG_DATA_SEND_INV_ARG 0xb0070000
|
||||
#define FW_MSG_CODE_DEBUG_DATA_SEND_BUF_FULL 0xb0080000
|
||||
#define FW_MSG_CODE_DEBUG_DATA_SEND_NO_BUF 0xb0090000
|
||||
#define FW_MSG_CODE_DEBUG_NOT_ENABLED 0xb00a0000
|
||||
#define FW_MSG_CODE_DEBUG_DATA_SEND_OK 0xb00b0000
|
||||
|
||||
#define FW_MSG_CODE_MDUMP_INVALID_CMD 0x00030000
|
||||
|
||||
u32 fw_mb_param;
|
||||
#define FW_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_MASK 0xFFFF0000
|
||||
#define FW_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_SHIFT 16
|
||||
@ -12742,9 +12787,9 @@ enum MFW_DRV_MSG_TYPE {
|
||||
MFW_DRV_MSG_GET_FCOE_STATS,
|
||||
MFW_DRV_MSG_GET_ISCSI_STATS,
|
||||
MFW_DRV_MSG_GET_RDMA_STATS,
|
||||
MFW_DRV_MSG_BW_UPDATE10,
|
||||
MFW_DRV_MSG_FAILURE_DETECTED,
|
||||
MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE,
|
||||
MFW_DRV_MSG_BW_UPDATE11,
|
||||
MFW_DRV_MSG_CRITICAL_ERROR_OCCURRED,
|
||||
MFW_DRV_MSG_RESERVED,
|
||||
MFW_DRV_MSG_GET_TLV_REQ,
|
||||
MFW_DRV_MSG_OEM_CFG_UPDATE,
|
||||
|
@ -762,9 +762,10 @@ static int qed_dmae_execute_command(struct qed_hwfn *p_hwfn,
|
||||
dst_type,
|
||||
length_cur);
|
||||
if (qed_status) {
|
||||
DP_NOTICE(p_hwfn,
|
||||
"qed_dmae_execute_sub_operation Failed with error 0x%x. source_addr 0x%llx, destination addr 0x%llx, size_in_dwords 0x%x\n",
|
||||
qed_status, src_addr, dst_addr, length_cur);
|
||||
qed_hw_err_notify(p_hwfn, p_ptt, QED_HW_ERR_DMAE_FAIL,
|
||||
"qed_dmae_execute_sub_operation Failed with error 0x%x. source_addr 0x%llx, destination addr 0x%llx, size_in_dwords 0x%x\n",
|
||||
qed_status, src_addr,
|
||||
dst_addr, length_cur);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -837,6 +838,41 @@ int qed_dmae_host2host(struct qed_hwfn *p_hwfn,
|
||||
return rc;
|
||||
}
|
||||
|
||||
void qed_hw_err_notify(struct qed_hwfn *p_hwfn,
|
||||
struct qed_ptt *p_ptt,
|
||||
enum qed_hw_err_type err_type, char *fmt, ...)
|
||||
{
|
||||
char buf[QED_HW_ERR_MAX_STR_SIZE];
|
||||
va_list vl;
|
||||
int len;
|
||||
|
||||
if (fmt) {
|
||||
va_start(vl, fmt);
|
||||
len = vsnprintf(buf, QED_HW_ERR_MAX_STR_SIZE, fmt, vl);
|
||||
va_end(vl);
|
||||
|
||||
if (len > QED_HW_ERR_MAX_STR_SIZE - 1)
|
||||
len = QED_HW_ERR_MAX_STR_SIZE - 1;
|
||||
|
||||
DP_NOTICE(p_hwfn, "%s", buf);
|
||||
}
|
||||
|
||||
/* Fan failure cannot be masked by handling of another HW error */
|
||||
if (p_hwfn->cdev->recov_in_prog &&
|
||||
err_type != QED_HW_ERR_FAN_FAIL) {
|
||||
DP_VERBOSE(p_hwfn,
|
||||
NETIF_MSG_DRV,
|
||||
"Recovery is in progress. Avoid notifying about HW error %d.\n",
|
||||
err_type);
|
||||
return;
|
||||
}
|
||||
|
||||
qed_hw_error_occurred(p_hwfn, err_type);
|
||||
|
||||
if (fmt)
|
||||
qed_mcp_send_raw_debug_data(p_hwfn, p_ptt, buf, len);
|
||||
}
|
||||
|
||||
int qed_dmae_sanity(struct qed_hwfn *p_hwfn,
|
||||
struct qed_ptt *p_ptt, const char *phase)
|
||||
{
|
||||
|
@ -315,4 +315,19 @@ int qed_init_fw_data(struct qed_dev *cdev,
|
||||
int qed_dmae_sanity(struct qed_hwfn *p_hwfn,
|
||||
struct qed_ptt *p_ptt, const char *phase);
|
||||
|
||||
#define QED_HW_ERR_MAX_STR_SIZE 256
|
||||
|
||||
/**
|
||||
* @brief qed_hw_err_notify - Notify upper layer driver and management FW
|
||||
* about a HW error.
|
||||
*
|
||||
* @param p_hwfn
|
||||
* @param p_ptt
|
||||
* @param err_type
|
||||
* @param fmt - debug data buffer to send to the MFW
|
||||
* @param ... - buffer format args
|
||||
*/
|
||||
void qed_hw_err_notify(struct qed_hwfn *p_hwfn,
|
||||
struct qed_ptt *p_ptt,
|
||||
enum qed_hw_err_type err_type, char *fmt, ...);
|
||||
#endif
|
||||
|
@ -96,6 +96,7 @@ struct aeu_invert_reg_bit {
|
||||
#define ATTENTION_BB(value) (value << ATTENTION_BB_SHIFT)
|
||||
#define ATTENTION_BB_DIFFERENT BIT(23)
|
||||
|
||||
#define ATTENTION_CLEAR_ENABLE BIT(28)
|
||||
unsigned int flags;
|
||||
|
||||
/* Callback to call if attention will be triggered */
|
||||
@ -363,6 +364,21 @@ static int qed_pglueb_rbc_attn_cb(struct qed_hwfn *p_hwfn)
|
||||
return qed_pglueb_rbc_attn_handler(p_hwfn, p_hwfn->p_dpc_ptt);
|
||||
}
|
||||
|
||||
static int qed_fw_assertion(struct qed_hwfn *p_hwfn)
|
||||
{
|
||||
qed_hw_err_notify(p_hwfn, p_hwfn->p_dpc_ptt, QED_HW_ERR_FW_ASSERT,
|
||||
"FW assertion!\n");
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int qed_general_attention_35(struct qed_hwfn *p_hwfn)
|
||||
{
|
||||
DP_INFO(p_hwfn, "General attention 35!\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define QED_DORQ_ATTENTION_REASON_MASK (0xfffff)
|
||||
#define QED_DORQ_ATTENTION_OPAQUE_MASK (0xffff)
|
||||
#define QED_DORQ_ATTENTION_OPAQUE_SHIFT (0x0)
|
||||
@ -605,13 +621,15 @@ static struct aeu_invert_reg aeu_descs[NUM_ATTN_REGS] = {
|
||||
|
||||
{
|
||||
{ /* After Invert 4 */
|
||||
{"General Attention 32", ATTENTION_SINGLE,
|
||||
NULL, MAX_BLOCK_ID},
|
||||
{"General Attention 32", ATTENTION_SINGLE |
|
||||
ATTENTION_CLEAR_ENABLE, qed_fw_assertion,
|
||||
MAX_BLOCK_ID},
|
||||
{"General Attention %d",
|
||||
(2 << ATTENTION_LENGTH_SHIFT) |
|
||||
(33 << ATTENTION_OFFSET_SHIFT), NULL, MAX_BLOCK_ID},
|
||||
{"General Attention 35", ATTENTION_SINGLE,
|
||||
NULL, MAX_BLOCK_ID},
|
||||
{"General Attention 35", ATTENTION_SINGLE |
|
||||
ATTENTION_CLEAR_ENABLE, qed_general_attention_35,
|
||||
MAX_BLOCK_ID},
|
||||
{"NWS Parity",
|
||||
ATTENTION_PAR | ATTENTION_BB_DIFFERENT |
|
||||
ATTENTION_BB(AEU_INVERT_REG_SPECIAL_CNIG_0),
|
||||
@ -927,9 +945,12 @@ qed_int_deassertion_aeu_bit(struct qed_hwfn *p_hwfn,
|
||||
qed_int_attn_print(p_hwfn, p_aeu->block_index,
|
||||
ATTN_TYPE_INTERRUPT, !b_fatal);
|
||||
|
||||
|
||||
/* If the attention is benign, no need to prevent it */
|
||||
if (!rc)
|
||||
/* Reach assertion if attention is fatal */
|
||||
if (b_fatal)
|
||||
qed_hw_err_notify(p_hwfn, p_hwfn->p_dpc_ptt, QED_HW_ERR_HW_ATTN,
|
||||
"`%s': Fatal attention\n",
|
||||
p_bit_name);
|
||||
else /* If the attention is benign, no need to prevent it */
|
||||
goto out;
|
||||
|
||||
/* Prevent this Attention from being asserted in the future */
|
||||
@ -2349,6 +2370,11 @@ void qed_int_disable_post_isr_release(struct qed_dev *cdev)
|
||||
cdev->hwfns[i].b_int_requested = false;
|
||||
}
|
||||
|
||||
void qed_int_attn_clr_enable(struct qed_dev *cdev, bool clr_enable)
|
||||
{
|
||||
cdev->attn_clr_en = clr_enable;
|
||||
}
|
||||
|
||||
int qed_int_set_timer_res(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
|
||||
u8 timer_res, u16 sb_id, bool tx)
|
||||
{
|
||||
|
@ -190,6 +190,17 @@ void qed_int_get_num_sbs(struct qed_hwfn *p_hwfn,
|
||||
*/
|
||||
void qed_int_disable_post_isr_release(struct qed_dev *cdev);
|
||||
|
||||
/**
|
||||
* @brief qed_int_attn_clr_enable - sets whether the general behavior is
|
||||
* preventing attentions from being reasserted, or following the
|
||||
* attributes of the specific attention.
|
||||
*
|
||||
* @param cdev
|
||||
* @param clr_enable
|
||||
*
|
||||
*/
|
||||
void qed_int_attn_clr_enable(struct qed_dev *cdev, bool clr_enable);
|
||||
|
||||
/**
|
||||
* @brief - Doorbell Recovery handler.
|
||||
* Run doorbell recovery in case of PF overflow (and flush DORQ if
|
||||
|
@ -2468,6 +2468,39 @@ void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn)
|
||||
ops->schedule_recovery_handler(cookie);
|
||||
}
|
||||
|
||||
char *qed_hw_err_type_descr[] = {
|
||||
[QED_HW_ERR_FAN_FAIL] = "Fan Failure",
|
||||
[QED_HW_ERR_MFW_RESP_FAIL] = "MFW Response Failure",
|
||||
[QED_HW_ERR_HW_ATTN] = "HW Attention",
|
||||
[QED_HW_ERR_DMAE_FAIL] = "DMAE Failure",
|
||||
[QED_HW_ERR_RAMROD_FAIL] = "Ramrod Failure",
|
||||
[QED_HW_ERR_FW_ASSERT] = "FW Assertion",
|
||||
[QED_HW_ERR_LAST] = "Unknown",
|
||||
};
|
||||
|
||||
void qed_hw_error_occurred(struct qed_hwfn *p_hwfn,
|
||||
enum qed_hw_err_type err_type)
|
||||
{
|
||||
struct qed_common_cb_ops *ops = p_hwfn->cdev->protocol_ops.common;
|
||||
void *cookie = p_hwfn->cdev->ops_cookie;
|
||||
char *err_str;
|
||||
|
||||
if (err_type > QED_HW_ERR_LAST)
|
||||
err_type = QED_HW_ERR_LAST;
|
||||
err_str = qed_hw_err_type_descr[err_type];
|
||||
|
||||
DP_NOTICE(p_hwfn, "HW error occurred [%s]\n", err_str);
|
||||
|
||||
/* Call the HW error handler of the protocol driver.
|
||||
* If it is not available - perform a minimal handling of preventing
|
||||
* HW attentions from being reasserted.
|
||||
*/
|
||||
if (ops && ops->schedule_hw_err_handler)
|
||||
ops->schedule_hw_err_handler(cookie, err_type);
|
||||
else
|
||||
qed_int_attn_clr_enable(p_hwfn->cdev, true);
|
||||
}
|
||||
|
||||
static int qed_set_coalesce(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal,
|
||||
void *handle)
|
||||
{
|
||||
@ -2689,6 +2722,7 @@ const struct qed_common_ops qed_common_ops_pass = {
|
||||
.set_led = &qed_set_led,
|
||||
.recovery_process = &qed_recovery_process,
|
||||
.recovery_prolog = &qed_recovery_prolog,
|
||||
.attn_clr_enable = &qed_int_attn_clr_enable,
|
||||
.update_drv_state = &qed_update_drv_state,
|
||||
.update_mac = &qed_update_mac,
|
||||
.update_mtu = &qed_update_mtu,
|
||||
|
@ -575,6 +575,8 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
|
||||
if (!QED_MB_FLAGS_IS_SET(p_mb_params, AVOID_BLOCK))
|
||||
qed_mcp_cmd_set_blocking(p_hwfn, true);
|
||||
|
||||
qed_hw_err_notify(p_hwfn, p_ptt,
|
||||
QED_HW_ERR_MFW_RESP_FAIL, NULL);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
@ -1704,6 +1706,127 @@ static void qed_mcp_update_stag(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
|
||||
&resp, ¶m);
|
||||
}
|
||||
|
||||
static void qed_mcp_handle_fan_failure(struct qed_hwfn *p_hwfn,
|
||||
struct qed_ptt *p_ptt)
|
||||
{
|
||||
/* A single notification should be sent to upper driver in CMT mode */
|
||||
if (p_hwfn != QED_LEADING_HWFN(p_hwfn->cdev))
|
||||
return;
|
||||
|
||||
qed_hw_err_notify(p_hwfn, p_ptt, QED_HW_ERR_FAN_FAIL,
|
||||
"Fan failure was detected on the network interface card and it's going to be shut down.\n");
|
||||
}
|
||||
|
||||
struct qed_mdump_cmd_params {
|
||||
u32 cmd;
|
||||
void *p_data_src;
|
||||
u8 data_src_size;
|
||||
void *p_data_dst;
|
||||
u8 data_dst_size;
|
||||
u32 mcp_resp;
|
||||
};
|
||||
|
||||
static int
|
||||
qed_mcp_mdump_cmd(struct qed_hwfn *p_hwfn,
|
||||
struct qed_ptt *p_ptt,
|
||||
struct qed_mdump_cmd_params *p_mdump_cmd_params)
|
||||
{
|
||||
struct qed_mcp_mb_params mb_params;
|
||||
int rc;
|
||||
|
||||
memset(&mb_params, 0, sizeof(mb_params));
|
||||
mb_params.cmd = DRV_MSG_CODE_MDUMP_CMD;
|
||||
mb_params.param = p_mdump_cmd_params->cmd;
|
||||
mb_params.p_data_src = p_mdump_cmd_params->p_data_src;
|
||||
mb_params.data_src_size = p_mdump_cmd_params->data_src_size;
|
||||
mb_params.p_data_dst = p_mdump_cmd_params->p_data_dst;
|
||||
mb_params.data_dst_size = p_mdump_cmd_params->data_dst_size;
|
||||
rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
p_mdump_cmd_params->mcp_resp = mb_params.mcp_resp;
|
||||
|
||||
if (p_mdump_cmd_params->mcp_resp == FW_MSG_CODE_MDUMP_INVALID_CMD) {
|
||||
DP_INFO(p_hwfn,
|
||||
"The mdump sub command is unsupported by the MFW [mdump_cmd 0x%x]\n",
|
||||
p_mdump_cmd_params->cmd);
|
||||
rc = -EOPNOTSUPP;
|
||||
} else if (p_mdump_cmd_params->mcp_resp == FW_MSG_CODE_UNSUPPORTED) {
|
||||
DP_INFO(p_hwfn,
|
||||
"The mdump command is not supported by the MFW\n");
|
||||
rc = -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int qed_mcp_mdump_ack(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
|
||||
{
|
||||
struct qed_mdump_cmd_params mdump_cmd_params;
|
||||
|
||||
memset(&mdump_cmd_params, 0, sizeof(mdump_cmd_params));
|
||||
mdump_cmd_params.cmd = DRV_MSG_CODE_MDUMP_ACK;
|
||||
|
||||
return qed_mcp_mdump_cmd(p_hwfn, p_ptt, &mdump_cmd_params);
|
||||
}
|
||||
|
||||
int
|
||||
qed_mcp_mdump_get_retain(struct qed_hwfn *p_hwfn,
|
||||
struct qed_ptt *p_ptt,
|
||||
struct mdump_retain_data_stc *p_mdump_retain)
|
||||
{
|
||||
struct qed_mdump_cmd_params mdump_cmd_params;
|
||||
int rc;
|
||||
|
||||
memset(&mdump_cmd_params, 0, sizeof(mdump_cmd_params));
|
||||
mdump_cmd_params.cmd = DRV_MSG_CODE_MDUMP_GET_RETAIN;
|
||||
mdump_cmd_params.p_data_dst = p_mdump_retain;
|
||||
mdump_cmd_params.data_dst_size = sizeof(*p_mdump_retain);
|
||||
|
||||
rc = qed_mcp_mdump_cmd(p_hwfn, p_ptt, &mdump_cmd_params);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
if (mdump_cmd_params.mcp_resp != FW_MSG_CODE_OK) {
|
||||
DP_INFO(p_hwfn,
|
||||
"Failed to get the mdump retained data [mcp_resp 0x%x]\n",
|
||||
mdump_cmd_params.mcp_resp);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void qed_mcp_handle_critical_error(struct qed_hwfn *p_hwfn,
|
||||
struct qed_ptt *p_ptt)
|
||||
{
|
||||
struct mdump_retain_data_stc mdump_retain;
|
||||
int rc;
|
||||
|
||||
/* In CMT mode - no need for more than a single acknowledgment to the
|
||||
* MFW, and no more than a single notification to the upper driver.
|
||||
*/
|
||||
if (p_hwfn != QED_LEADING_HWFN(p_hwfn->cdev))
|
||||
return;
|
||||
|
||||
rc = qed_mcp_mdump_get_retain(p_hwfn, p_ptt, &mdump_retain);
|
||||
if (rc == 0 && mdump_retain.valid)
|
||||
DP_NOTICE(p_hwfn,
|
||||
"The MFW notified that a critical error occurred in the device [epoch 0x%08x, pf 0x%x, status 0x%08x]\n",
|
||||
mdump_retain.epoch,
|
||||
mdump_retain.pf, mdump_retain.status);
|
||||
else
|
||||
DP_NOTICE(p_hwfn,
|
||||
"The MFW notified that a critical error occurred in the device\n");
|
||||
|
||||
DP_NOTICE(p_hwfn,
|
||||
"Acknowledging the notification to not allow the MFW crash dump [driver debug data collection is preferable]\n");
|
||||
qed_mcp_mdump_ack(p_hwfn, p_ptt);
|
||||
|
||||
qed_hw_err_notify(p_hwfn, p_ptt, QED_HW_ERR_HW_ATTN, NULL);
|
||||
}
|
||||
|
||||
void qed_mcp_read_ufp_config(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
|
||||
{
|
||||
struct public_func shmem_info;
|
||||
@ -1850,6 +1973,12 @@ int qed_mcp_handle_events(struct qed_hwfn *p_hwfn,
|
||||
case MFW_DRV_MSG_S_TAG_UPDATE:
|
||||
qed_mcp_update_stag(p_hwfn, p_ptt);
|
||||
break;
|
||||
case MFW_DRV_MSG_FAILURE_DETECTED:
|
||||
qed_mcp_handle_fan_failure(p_hwfn, p_ptt);
|
||||
break;
|
||||
case MFW_DRV_MSG_CRITICAL_ERROR_OCCURRED:
|
||||
qed_mcp_handle_critical_error(p_hwfn, p_ptt);
|
||||
break;
|
||||
case MFW_DRV_MSG_GET_TLV_REQ:
|
||||
qed_mfw_tlv_req(p_hwfn);
|
||||
break;
|
||||
@ -3819,3 +3948,127 @@ int qed_mcp_nvm_set_cfg(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
|
||||
DRV_MSG_CODE_SET_NVM_CFG_OPTION,
|
||||
mb_param, &resp, ¶m, len, (u32 *)p_buf);
|
||||
}
|
||||
|
||||
#define QED_MCP_DBG_DATA_MAX_SIZE MCP_DRV_NVM_BUF_LEN
|
||||
#define QED_MCP_DBG_DATA_MAX_HEADER_SIZE sizeof(u32)
|
||||
#define QED_MCP_DBG_DATA_MAX_PAYLOAD_SIZE \
|
||||
(QED_MCP_DBG_DATA_MAX_SIZE - QED_MCP_DBG_DATA_MAX_HEADER_SIZE)
|
||||
|
||||
static int
|
||||
__qed_mcp_send_debug_data(struct qed_hwfn *p_hwfn,
|
||||
struct qed_ptt *p_ptt, u8 *p_buf, u8 size)
|
||||
{
|
||||
struct qed_mcp_mb_params mb_params;
|
||||
int rc;
|
||||
|
||||
if (size > QED_MCP_DBG_DATA_MAX_SIZE) {
|
||||
DP_ERR(p_hwfn,
|
||||
"Debug data size is %d while it should not exceed %d\n",
|
||||
size, QED_MCP_DBG_DATA_MAX_SIZE);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
memset(&mb_params, 0, sizeof(mb_params));
|
||||
mb_params.cmd = DRV_MSG_CODE_DEBUG_DATA_SEND;
|
||||
SET_MFW_FIELD(mb_params.param, DRV_MSG_CODE_DEBUG_DATA_SEND_SIZE, size);
|
||||
mb_params.p_data_src = p_buf;
|
||||
mb_params.data_src_size = size;
|
||||
rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
if (mb_params.mcp_resp == FW_MSG_CODE_UNSUPPORTED) {
|
||||
DP_INFO(p_hwfn,
|
||||
"The DEBUG_DATA_SEND command is unsupported by the MFW\n");
|
||||
return -EOPNOTSUPP;
|
||||
} else if (mb_params.mcp_resp == (u32)FW_MSG_CODE_DEBUG_NOT_ENABLED) {
|
||||
DP_INFO(p_hwfn, "The DEBUG_DATA_SEND command is not enabled\n");
|
||||
return -EBUSY;
|
||||
} else if (mb_params.mcp_resp != (u32)FW_MSG_CODE_DEBUG_DATA_SEND_OK) {
|
||||
DP_NOTICE(p_hwfn,
|
||||
"Failed to send debug data to the MFW [resp 0x%08x]\n",
|
||||
mb_params.mcp_resp);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
enum qed_mcp_dbg_data_type {
|
||||
QED_MCP_DBG_DATA_TYPE_RAW,
|
||||
};
|
||||
|
||||
/* Header format: [31:28] PFID, [27:20] flags, [19:12] type, [11:0] S/N */
|
||||
#define QED_MCP_DBG_DATA_HDR_SN_OFFSET 0
|
||||
#define QED_MCP_DBG_DATA_HDR_SN_MASK 0x00000fff
|
||||
#define QED_MCP_DBG_DATA_HDR_TYPE_OFFSET 12
|
||||
#define QED_MCP_DBG_DATA_HDR_TYPE_MASK 0x000ff000
|
||||
#define QED_MCP_DBG_DATA_HDR_FLAGS_OFFSET 20
|
||||
#define QED_MCP_DBG_DATA_HDR_FLAGS_MASK 0x0ff00000
|
||||
#define QED_MCP_DBG_DATA_HDR_PF_OFFSET 28
|
||||
#define QED_MCP_DBG_DATA_HDR_PF_MASK 0xf0000000
|
||||
|
||||
#define QED_MCP_DBG_DATA_HDR_FLAGS_FIRST 0x1
|
||||
#define QED_MCP_DBG_DATA_HDR_FLAGS_LAST 0x2
|
||||
|
||||
static int
|
||||
qed_mcp_send_debug_data(struct qed_hwfn *p_hwfn,
|
||||
struct qed_ptt *p_ptt,
|
||||
enum qed_mcp_dbg_data_type type, u8 *p_buf, u32 size)
|
||||
{
|
||||
u8 raw_data[QED_MCP_DBG_DATA_MAX_SIZE], *p_tmp_buf = p_buf;
|
||||
u32 tmp_size = size, *p_header, *p_payload;
|
||||
u8 flags = 0;
|
||||
u16 seq;
|
||||
int rc;
|
||||
|
||||
p_header = (u32 *)raw_data;
|
||||
p_payload = (u32 *)(raw_data + QED_MCP_DBG_DATA_MAX_HEADER_SIZE);
|
||||
|
||||
seq = (u16)atomic_inc_return(&p_hwfn->mcp_info->dbg_data_seq);
|
||||
|
||||
/* First chunk is marked as 'first' */
|
||||
flags |= QED_MCP_DBG_DATA_HDR_FLAGS_FIRST;
|
||||
|
||||
*p_header = 0;
|
||||
SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_SN, seq);
|
||||
SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_TYPE, type);
|
||||
SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_FLAGS, flags);
|
||||
SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_PF, p_hwfn->abs_pf_id);
|
||||
|
||||
while (tmp_size > QED_MCP_DBG_DATA_MAX_PAYLOAD_SIZE) {
|
||||
memcpy(p_payload, p_tmp_buf, QED_MCP_DBG_DATA_MAX_PAYLOAD_SIZE);
|
||||
rc = __qed_mcp_send_debug_data(p_hwfn, p_ptt, raw_data,
|
||||
QED_MCP_DBG_DATA_MAX_SIZE);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
/* Clear the 'first' marking after sending the first chunk */
|
||||
if (p_tmp_buf == p_buf) {
|
||||
flags &= ~QED_MCP_DBG_DATA_HDR_FLAGS_FIRST;
|
||||
SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_FLAGS,
|
||||
flags);
|
||||
}
|
||||
|
||||
p_tmp_buf += QED_MCP_DBG_DATA_MAX_PAYLOAD_SIZE;
|
||||
tmp_size -= QED_MCP_DBG_DATA_MAX_PAYLOAD_SIZE;
|
||||
}
|
||||
|
||||
/* Last chunk is marked as 'last' */
|
||||
flags |= QED_MCP_DBG_DATA_HDR_FLAGS_LAST;
|
||||
SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_FLAGS, flags);
|
||||
memcpy(p_payload, p_tmp_buf, tmp_size);
|
||||
|
||||
/* Casting the left size to u8 is ok since at this point it is <= 32 */
|
||||
return __qed_mcp_send_debug_data(p_hwfn, p_ptt, raw_data,
|
||||
(u8)(QED_MCP_DBG_DATA_MAX_HEADER_SIZE +
|
||||
tmp_size));
|
||||
}
|
||||
|
||||
int
|
||||
qed_mcp_send_raw_debug_data(struct qed_hwfn *p_hwfn,
|
||||
struct qed_ptt *p_ptt, u8 *p_buf, u32 size)
|
||||
{
|
||||
return qed_mcp_send_debug_data(p_hwfn, p_ptt,
|
||||
QED_MCP_DBG_DATA_TYPE_RAW, p_buf, size);
|
||||
}
|
||||
|
@ -685,6 +685,18 @@ int qed_mcp_bist_nvm_get_image_att(struct qed_hwfn *p_hwfn,
|
||||
*/
|
||||
int qed_mfw_process_tlv_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
|
||||
|
||||
/**
|
||||
* @brief Send raw debug data to the MFW
|
||||
*
|
||||
* @param p_hwfn
|
||||
* @param p_ptt
|
||||
* @param p_buf - raw debug data buffer
|
||||
* @param size - buffer size
|
||||
*/
|
||||
int
|
||||
qed_mcp_send_raw_debug_data(struct qed_hwfn *p_hwfn,
|
||||
struct qed_ptt *p_ptt, u8 *p_buf, u32 size);
|
||||
|
||||
/* Using hwfn number (and not pf_num) is required since in CMT mode,
|
||||
* same pf_num may be used by two different hwfn
|
||||
* TODO - this shouldn't really be in .h file, but until all fields
|
||||
@ -731,6 +743,9 @@ struct qed_mcp_info {
|
||||
|
||||
/* Capabilties negotiated with the MFW */
|
||||
u32 capabilities;
|
||||
|
||||
/* S/N for debug data mailbox commands */
|
||||
atomic_t dbg_data_seq;
|
||||
};
|
||||
|
||||
struct qed_mcp_mb_params {
|
||||
@ -1001,6 +1016,19 @@ int __qed_configure_pf_min_bandwidth(struct qed_hwfn *p_hwfn,
|
||||
int qed_mcp_mask_parities(struct qed_hwfn *p_hwfn,
|
||||
struct qed_ptt *p_ptt, u32 mask_parities);
|
||||
|
||||
/* @brief - Gets the mdump retained data from the MFW.
|
||||
*
|
||||
* @param p_hwfn
|
||||
* @param p_ptt
|
||||
* @param p_mdump_retain
|
||||
*
|
||||
* @param return 0 upon success.
|
||||
*/
|
||||
int
|
||||
qed_mcp_mdump_get_retain(struct qed_hwfn *p_hwfn,
|
||||
struct qed_ptt *p_ptt,
|
||||
struct mdump_retain_data_stc *p_mdump_retain);
|
||||
|
||||
/**
|
||||
* @brief - Sets the MFW's max value for the given resource
|
||||
*
|
||||
|
@ -160,12 +160,16 @@ static int qed_spq_block(struct qed_hwfn *p_hwfn,
|
||||
return 0;
|
||||
}
|
||||
err:
|
||||
DP_NOTICE(p_hwfn,
|
||||
"Ramrod is stuck [CID %08x cmd %02x protocol %02x echo %04x]\n",
|
||||
le32_to_cpu(p_ent->elem.hdr.cid),
|
||||
p_ent->elem.hdr.cmd_id,
|
||||
p_ent->elem.hdr.protocol_id,
|
||||
le16_to_cpu(p_ent->elem.hdr.echo));
|
||||
p_ptt = qed_ptt_acquire(p_hwfn);
|
||||
if (!p_ptt)
|
||||
return -EBUSY;
|
||||
qed_hw_err_notify(p_hwfn, p_ptt, QED_HW_ERR_RAMROD_FAIL,
|
||||
"Ramrod is stuck [CID %08x cmd %02x protocol %02x echo %04x]\n",
|
||||
le32_to_cpu(p_ent->elem.hdr.cid),
|
||||
p_ent->elem.hdr.cmd_id,
|
||||
p_ent->elem.hdr.protocol_id,
|
||||
le16_to_cpu(p_ent->elem.hdr.echo));
|
||||
qed_ptt_release(p_hwfn, p_ptt);
|
||||
|
||||
return -EBUSY;
|
||||
}
|
||||
|
@ -278,6 +278,14 @@ struct qede_dev {
|
||||
struct qede_rdma_dev rdma_info;
|
||||
|
||||
struct bpf_prog *xdp_prog;
|
||||
|
||||
unsigned long err_flags;
|
||||
#define QEDE_ERR_IS_HANDLED 31
|
||||
#define QEDE_ERR_ATTN_CLR_EN 0
|
||||
#define QEDE_ERR_GET_DBG_INFO 1
|
||||
#define QEDE_ERR_IS_RECOVERABLE 2
|
||||
#define QEDE_ERR_WARN 3
|
||||
|
||||
struct qede_dump_info dump_info;
|
||||
};
|
||||
|
||||
@ -485,12 +493,15 @@ struct qede_fastpath {
|
||||
|
||||
#define QEDE_SP_RECOVERY 0
|
||||
#define QEDE_SP_RX_MODE 1
|
||||
#define QEDE_SP_RSVD1 2
|
||||
#define QEDE_SP_RSVD2 3
|
||||
#define QEDE_SP_HW_ERR 4
|
||||
#define QEDE_SP_ARFS_CONFIG 5
|
||||
#define QEDE_SP_AER 7
|
||||
|
||||
#ifdef CONFIG_RFS_ACCEL
|
||||
int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
|
||||
u16 rxq_index, u32 flow_id);
|
||||
#define QEDE_SP_ARFS_CONFIG 4
|
||||
#define QEDE_SP_TASK_POLL_DELAY (5 * HZ)
|
||||
#endif
|
||||
|
||||
@ -522,7 +533,6 @@ u16 qede_select_queue(struct net_device *dev, struct sk_buff *skb,
|
||||
netdev_features_t qede_features_check(struct sk_buff *skb,
|
||||
struct net_device *dev,
|
||||
netdev_features_t features);
|
||||
void qede_tx_log_print(struct qede_dev *edev, struct qede_fastpath *fp);
|
||||
int qede_alloc_rx_buffer(struct qede_rx_queue *rxq, bool allow_lazy);
|
||||
int qede_free_tx_pkt(struct qede_dev *edev,
|
||||
struct qede_tx_queue *txq, int *len);
|
||||
|
@ -190,12 +190,14 @@ static const struct {
|
||||
enum {
|
||||
QEDE_PRI_FLAG_CMT,
|
||||
QEDE_PRI_FLAG_SMART_AN_SUPPORT, /* MFW supports SmartAN */
|
||||
QEDE_PRI_FLAG_RECOVER_ON_ERROR,
|
||||
QEDE_PRI_FLAG_LEN,
|
||||
};
|
||||
|
||||
static const char qede_private_arr[QEDE_PRI_FLAG_LEN][ETH_GSTRING_LEN] = {
|
||||
"Coupled-Function",
|
||||
"SmartAN capable",
|
||||
"Recover on error",
|
||||
};
|
||||
|
||||
enum qede_ethtool_tests {
|
||||
@ -417,9 +419,30 @@ static u32 qede_get_priv_flags(struct net_device *dev)
|
||||
if (edev->dev_info.common.smart_an)
|
||||
flags |= BIT(QEDE_PRI_FLAG_SMART_AN_SUPPORT);
|
||||
|
||||
if (edev->err_flags & BIT(QEDE_ERR_IS_RECOVERABLE))
|
||||
flags |= BIT(QEDE_PRI_FLAG_RECOVER_ON_ERROR);
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
static int qede_set_priv_flags(struct net_device *dev, u32 flags)
|
||||
{
|
||||
struct qede_dev *edev = netdev_priv(dev);
|
||||
u32 cflags = qede_get_priv_flags(dev);
|
||||
u32 dflags = flags ^ cflags;
|
||||
|
||||
/* can only change RECOVER_ON_ERROR flag */
|
||||
if (dflags & ~BIT(QEDE_PRI_FLAG_RECOVER_ON_ERROR))
|
||||
return -EINVAL;
|
||||
|
||||
if (flags & BIT(QEDE_PRI_FLAG_RECOVER_ON_ERROR))
|
||||
set_bit(QEDE_ERR_IS_RECOVERABLE, &edev->err_flags);
|
||||
else
|
||||
clear_bit(QEDE_ERR_IS_RECOVERABLE, &edev->err_flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct qede_link_mode_mapping {
|
||||
u32 qed_link_mode;
|
||||
u32 ethtool_link_mode;
|
||||
@ -2098,6 +2121,7 @@ static const struct ethtool_ops qede_ethtool_ops = {
|
||||
.set_phys_id = qede_set_phys_id,
|
||||
.get_ethtool_stats = qede_get_ethtool_stats,
|
||||
.get_priv_flags = qede_get_priv_flags,
|
||||
.set_priv_flags = qede_set_priv_flags,
|
||||
.get_sset_count = qede_get_sset_count,
|
||||
.get_rxnfc = qede_get_rxnfc,
|
||||
.set_rxnfc = qede_set_rxnfc,
|
||||
|
@ -139,10 +139,12 @@ static void qede_shutdown(struct pci_dev *pdev);
|
||||
static void qede_link_update(void *dev, struct qed_link_output *link);
|
||||
static void qede_schedule_recovery_handler(void *dev);
|
||||
static void qede_recovery_handler(struct qede_dev *edev);
|
||||
static void qede_schedule_hw_err_handler(void *dev,
|
||||
enum qed_hw_err_type err_type);
|
||||
static void qede_get_eth_tlv_data(void *edev, void *data);
|
||||
static void qede_get_generic_tlv_data(void *edev,
|
||||
struct qed_generic_tlvs *data);
|
||||
|
||||
static void qede_generic_hw_err_handler(struct qede_dev *edev);
|
||||
#ifdef CONFIG_QED_SRIOV
|
||||
static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos,
|
||||
__be16 vlan_proto)
|
||||
@ -230,6 +232,7 @@ static struct qed_eth_cb_ops qede_ll_ops = {
|
||||
#endif
|
||||
.link_update = qede_link_update,
|
||||
.schedule_recovery_handler = qede_schedule_recovery_handler,
|
||||
.schedule_hw_err_handler = qede_schedule_hw_err_handler,
|
||||
.get_generic_tlv_data = qede_get_generic_tlv_data,
|
||||
.get_protocol_tlv_data = qede_get_eth_tlv_data,
|
||||
},
|
||||
@ -536,6 +539,51 @@ static int qede_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void qede_tx_log_print(struct qede_dev *edev, struct qede_tx_queue *txq)
|
||||
{
|
||||
DP_NOTICE(edev,
|
||||
"Txq[%d]: FW cons [host] %04x, SW cons %04x, SW prod %04x [Jiffies %lu]\n",
|
||||
txq->index, le16_to_cpu(*txq->hw_cons_ptr),
|
||||
qed_chain_get_cons_idx(&txq->tx_pbl),
|
||||
qed_chain_get_prod_idx(&txq->tx_pbl),
|
||||
jiffies);
|
||||
}
|
||||
|
||||
static void qede_tx_timeout(struct net_device *dev, unsigned int txqueue)
|
||||
{
|
||||
struct qede_dev *edev = netdev_priv(dev);
|
||||
struct qede_tx_queue *txq;
|
||||
int cos;
|
||||
|
||||
netif_carrier_off(dev);
|
||||
DP_NOTICE(edev, "TX timeout on queue %u!\n", txqueue);
|
||||
|
||||
if (!(edev->fp_array[txqueue].type & QEDE_FASTPATH_TX))
|
||||
return;
|
||||
|
||||
for_each_cos_in_txq(edev, cos) {
|
||||
txq = &edev->fp_array[txqueue].txq[cos];
|
||||
|
||||
if (qed_chain_get_cons_idx(&txq->tx_pbl) !=
|
||||
qed_chain_get_prod_idx(&txq->tx_pbl))
|
||||
qede_tx_log_print(edev, txq);
|
||||
}
|
||||
|
||||
if (IS_VF(edev))
|
||||
return;
|
||||
|
||||
if (test_and_set_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags) ||
|
||||
edev->state == QEDE_STATE_RECOVERY) {
|
||||
DP_INFO(edev,
|
||||
"Avoid handling a Tx timeout while another HW error is being handled\n");
|
||||
return;
|
||||
}
|
||||
|
||||
set_bit(QEDE_ERR_GET_DBG_INFO, &edev->err_flags);
|
||||
set_bit(QEDE_SP_HW_ERR, &edev->sp_flags);
|
||||
schedule_delayed_work(&edev->sp_task, 0);
|
||||
}
|
||||
|
||||
static int qede_setup_tc(struct net_device *ndev, u8 num_tc)
|
||||
{
|
||||
struct qede_dev *edev = netdev_priv(ndev);
|
||||
@ -623,6 +671,7 @@ static const struct net_device_ops qede_netdev_ops = {
|
||||
.ndo_validate_addr = eth_validate_addr,
|
||||
.ndo_change_mtu = qede_change_mtu,
|
||||
.ndo_do_ioctl = qede_ioctl,
|
||||
.ndo_tx_timeout = qede_tx_timeout,
|
||||
#ifdef CONFIG_QED_SRIOV
|
||||
.ndo_set_vf_mac = qede_set_vf_mac,
|
||||
.ndo_set_vf_vlan = qede_set_vf_vlan,
|
||||
@ -1009,6 +1058,8 @@ static void qede_sp_task(struct work_struct *work)
|
||||
qede_process_arfs_filters(edev, false);
|
||||
}
|
||||
#endif
|
||||
if (test_and_clear_bit(QEDE_SP_HW_ERR, &edev->sp_flags))
|
||||
qede_generic_hw_err_handler(edev);
|
||||
__qede_unlock(edev);
|
||||
|
||||
if (test_and_clear_bit(QEDE_SP_AER, &edev->sp_flags)) {
|
||||
@ -2509,6 +2560,100 @@ static void qede_recovery_handler(struct qede_dev *edev)
|
||||
qede_recovery_failed(edev);
|
||||
}
|
||||
|
||||
static void qede_atomic_hw_err_handler(struct qede_dev *edev)
|
||||
{
|
||||
struct qed_dev *cdev = edev->cdev;
|
||||
|
||||
DP_NOTICE(edev,
|
||||
"Generic non-sleepable HW error handling started - err_flags 0x%lx\n",
|
||||
edev->err_flags);
|
||||
|
||||
/* Get a call trace of the flow that led to the error */
|
||||
WARN_ON(test_bit(QEDE_ERR_WARN, &edev->err_flags));
|
||||
|
||||
/* Prevent HW attentions from being reasserted */
|
||||
if (test_bit(QEDE_ERR_ATTN_CLR_EN, &edev->err_flags))
|
||||
edev->ops->common->attn_clr_enable(cdev, true);
|
||||
|
||||
DP_NOTICE(edev, "Generic non-sleepable HW error handling is done\n");
|
||||
}
|
||||
|
||||
static void qede_generic_hw_err_handler(struct qede_dev *edev)
|
||||
{
|
||||
struct qed_dev *cdev = edev->cdev;
|
||||
|
||||
DP_NOTICE(edev,
|
||||
"Generic sleepable HW error handling started - err_flags 0x%lx\n",
|
||||
edev->err_flags);
|
||||
|
||||
/* Trigger a recovery process.
|
||||
* This is placed in the sleep requiring section just to make
|
||||
* sure it is the last one, and that all the other operations
|
||||
* were completed.
|
||||
*/
|
||||
if (test_bit(QEDE_ERR_IS_RECOVERABLE, &edev->err_flags))
|
||||
edev->ops->common->recovery_process(cdev);
|
||||
|
||||
clear_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags);
|
||||
|
||||
DP_NOTICE(edev, "Generic sleepable HW error handling is done\n");
|
||||
}
|
||||
|
||||
static void qede_set_hw_err_flags(struct qede_dev *edev,
|
||||
enum qed_hw_err_type err_type)
|
||||
{
|
||||
unsigned long err_flags = 0;
|
||||
|
||||
switch (err_type) {
|
||||
case QED_HW_ERR_DMAE_FAIL:
|
||||
set_bit(QEDE_ERR_WARN, &err_flags);
|
||||
fallthrough;
|
||||
case QED_HW_ERR_MFW_RESP_FAIL:
|
||||
case QED_HW_ERR_HW_ATTN:
|
||||
case QED_HW_ERR_RAMROD_FAIL:
|
||||
case QED_HW_ERR_FW_ASSERT:
|
||||
set_bit(QEDE_ERR_ATTN_CLR_EN, &err_flags);
|
||||
set_bit(QEDE_ERR_GET_DBG_INFO, &err_flags);
|
||||
break;
|
||||
|
||||
default:
|
||||
DP_NOTICE(edev, "Unexpected HW error [%d]\n", err_type);
|
||||
break;
|
||||
}
|
||||
|
||||
edev->err_flags |= err_flags;
|
||||
}
|
||||
|
||||
static void qede_schedule_hw_err_handler(void *dev,
|
||||
enum qed_hw_err_type err_type)
|
||||
{
|
||||
struct qede_dev *edev = dev;
|
||||
|
||||
/* Fan failure cannot be masked by handling of another HW error or by a
|
||||
* concurrent recovery process.
|
||||
*/
|
||||
if ((test_and_set_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags) ||
|
||||
edev->state == QEDE_STATE_RECOVERY) &&
|
||||
err_type != QED_HW_ERR_FAN_FAIL) {
|
||||
DP_INFO(edev,
|
||||
"Avoid scheduling an error handling while another HW error is being handled\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (err_type >= QED_HW_ERR_LAST) {
|
||||
DP_NOTICE(edev, "Unknown HW error [%d]\n", err_type);
|
||||
clear_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags);
|
||||
return;
|
||||
}
|
||||
|
||||
qede_set_hw_err_flags(edev, err_type);
|
||||
qede_atomic_hw_err_handler(edev);
|
||||
set_bit(QEDE_SP_HW_ERR, &edev->sp_flags);
|
||||
schedule_delayed_work(&edev->sp_task, 0);
|
||||
|
||||
DP_INFO(edev, "Scheduled a error handler [err_type %d]\n", err_type);
|
||||
}
|
||||
|
||||
static bool qede_is_txq_full(struct qede_dev *edev, struct qede_tx_queue *txq)
|
||||
{
|
||||
struct netdev_queue *netdev_txq;
|
||||
|
@ -607,6 +607,16 @@ struct qed_sb_info {
|
||||
struct qed_dev *cdev;
|
||||
};
|
||||
|
||||
enum qed_hw_err_type {
|
||||
QED_HW_ERR_FAN_FAIL,
|
||||
QED_HW_ERR_MFW_RESP_FAIL,
|
||||
QED_HW_ERR_HW_ATTN,
|
||||
QED_HW_ERR_DMAE_FAIL,
|
||||
QED_HW_ERR_RAMROD_FAIL,
|
||||
QED_HW_ERR_FW_ASSERT,
|
||||
QED_HW_ERR_LAST,
|
||||
};
|
||||
|
||||
enum qed_dev_type {
|
||||
QED_DEV_TYPE_BB,
|
||||
QED_DEV_TYPE_AH,
|
||||
@ -811,10 +821,11 @@ enum qed_nvm_flash_cmd {
|
||||
|
||||
struct qed_common_cb_ops {
|
||||
void (*arfs_filter_op)(void *dev, void *fltr, u8 fw_rc);
|
||||
void (*link_update)(void *dev,
|
||||
struct qed_link_output *link);
|
||||
void (*link_update)(void *dev, struct qed_link_output *link);
|
||||
void (*schedule_recovery_handler)(void *dev);
|
||||
void (*dcbx_aen)(void *dev, struct qed_dcbx_get *get, u32 mib_type);
|
||||
void (*schedule_hw_err_handler)(void *dev,
|
||||
enum qed_hw_err_type err_type);
|
||||
void (*dcbx_aen)(void *dev, struct qed_dcbx_get *get, u32 mib_type);
|
||||
void (*get_generic_tlv_data)(void *dev, struct qed_generic_tlvs *data);
|
||||
void (*get_protocol_tlv_data)(void *dev, void *data);
|
||||
};
|
||||
@ -1034,6 +1045,15 @@ struct qed_common_ops {
|
||||
*/
|
||||
int (*set_led)(struct qed_dev *cdev,
|
||||
enum qed_led_mode mode);
|
||||
|
||||
/**
|
||||
* @brief attn_clr_enable - Prevent attentions from being reasserted
|
||||
*
|
||||
* @param cdev
|
||||
* @param clr_enable
|
||||
*/
|
||||
void (*attn_clr_enable)(struct qed_dev *cdev, bool clr_enable);
|
||||
|
||||
/**
|
||||
* @brief db_recovery_add - add doorbell information to the doorbell
|
||||
* recovery mechanism.
|
||||
|
Loading…
Reference in New Issue
Block a user