Merge branch 'net-qed-qede-critical-hw-error-handling'

Igor Russkikh says:

====================
net: qed/qede: critical hw error handling

FastLinQ devices as a complex systems may observe various hardware
level error conditions, both severe and recoverable.

Driver is able to detect and report this, but so far it only did
trace/dmesg based reporting.

Here we implement an extended hw error detection, service task
handler captures a dump for the later analysis.

I also resubmit a patch from Denis Bolotin on tx timeout handler,
addressing David's comment regarding recovery procedure as an extra
reaction on this event.

v2:

Removing the patch with ethtool dump and udev magic. Its quite isolated,
I'm working on devlink based logic for this separately.

v1:

https://patchwork.ozlabs.org/project/netdev/cover/cover.1588758463.git.irusskikh@marvell.com/
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2020-05-14 13:25:47 -07:00
commit 86b6ba171d
16 changed files with 699 additions and 46 deletions

View File

@ -740,12 +740,6 @@ struct qed_dbg_feature {
u32 dumped_dwords;
};
struct qed_dbg_params {
struct qed_dbg_feature features[DBG_FEATURE_NUM];
u8 engine_for_debug;
bool print_data;
};
struct qed_dev {
u32 dp_module;
u8 dp_level;
@ -844,6 +838,9 @@ struct qed_dev {
/* Recovery */
bool recov_in_prog;
/* Indicates whether should prevent attentions from being reasserted */
bool attn_clr_en;
/* LLH info */
u8 ppfid_bitmap;
struct qed_llh_info *p_llh_info;
@ -872,17 +869,18 @@ struct qed_dev {
} protocol_ops;
void *ops_cookie;
struct qed_dbg_params dbg_params;
#ifdef CONFIG_QED_LL2
struct qed_cb_ll2_info *ll2;
u8 ll2_mac_address[ETH_ALEN];
#endif
struct qed_dbg_feature dbg_features[DBG_FEATURE_NUM];
u8 engine_for_debug;
bool disable_ilt_dump;
DECLARE_HASHTABLE(connections, 10);
const struct firmware *firmware;
bool print_dbg_data;
u32 rdma_max_sge;
u32 rdma_max_inline;
u32 rdma_max_srq_sge;
@ -1020,6 +1018,8 @@ u32 qed_unzip_data(struct qed_hwfn *p_hwfn,
u32 input_len, u8 *input_buf,
u32 max_size, u8 *unzip_buf);
void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn);
void qed_hw_error_occurred(struct qed_hwfn *p_hwfn,
enum qed_hw_err_type err_type);
void qed_get_protocol_stats(struct qed_dev *cdev,
enum qed_mcp_protocol_type type,
union qed_mcp_protocol_stats *stats);

View File

@ -7453,7 +7453,7 @@ static enum dbg_status format_feature(struct qed_hwfn *p_hwfn,
enum qed_dbg_features feature_idx)
{
struct qed_dbg_feature *feature =
&p_hwfn->cdev->dbg_params.features[feature_idx];
&p_hwfn->cdev->dbg_features[feature_idx];
u32 text_size_bytes, null_char_pos, i;
enum dbg_status rc;
char *text_buf;
@ -7502,7 +7502,7 @@ static enum dbg_status format_feature(struct qed_hwfn *p_hwfn,
text_buf[i] = '\n';
/* Dump printable feature to log */
if (p_hwfn->cdev->dbg_params.print_data)
if (p_hwfn->cdev->print_dbg_data)
qed_dbg_print_feature(text_buf, text_size_bytes);
/* Free the old dump_buf and point the dump_buf to the newly allocagted
@ -7523,7 +7523,7 @@ static enum dbg_status qed_dbg_dump(struct qed_hwfn *p_hwfn,
enum qed_dbg_features feature_idx)
{
struct qed_dbg_feature *feature =
&p_hwfn->cdev->dbg_params.features[feature_idx];
&p_hwfn->cdev->dbg_features[feature_idx];
u32 buf_size_dwords;
enum dbg_status rc;
@ -7648,7 +7648,7 @@ static int qed_dbg_nvm_image(struct qed_dev *cdev, void *buffer,
enum qed_nvm_images image_id)
{
struct qed_hwfn *p_hwfn =
&cdev->hwfns[cdev->dbg_params.engine_for_debug];
&cdev->hwfns[cdev->engine_for_debug];
u32 len_rounded, i;
__be32 val;
int rc;
@ -7780,7 +7780,7 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer)
{
u8 cur_engine, omit_engine = 0, org_engine;
struct qed_hwfn *p_hwfn =
&cdev->hwfns[cdev->dbg_params.engine_for_debug];
&cdev->hwfns[cdev->engine_for_debug];
struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
int grc_params[MAX_DBG_GRC_PARAMS], i;
u32 offset = 0, feature_size;
@ -8000,7 +8000,7 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer)
int qed_dbg_all_data_size(struct qed_dev *cdev)
{
struct qed_hwfn *p_hwfn =
&cdev->hwfns[cdev->dbg_params.engine_for_debug];
&cdev->hwfns[cdev->engine_for_debug];
u32 regs_len = 0, image_len = 0, ilt_len = 0, total_ilt_len = 0;
u8 cur_engine, org_engine;
@ -8059,9 +8059,9 @@ int qed_dbg_feature(struct qed_dev *cdev, void *buffer,
enum qed_dbg_features feature, u32 *num_dumped_bytes)
{
struct qed_hwfn *p_hwfn =
&cdev->hwfns[cdev->dbg_params.engine_for_debug];
&cdev->hwfns[cdev->engine_for_debug];
struct qed_dbg_feature *qed_feature =
&cdev->dbg_params.features[feature];
&cdev->dbg_features[feature];
enum dbg_status dbg_rc;
struct qed_ptt *p_ptt;
int rc = 0;
@ -8084,7 +8084,7 @@ int qed_dbg_feature(struct qed_dev *cdev, void *buffer,
DP_VERBOSE(cdev, QED_MSG_DEBUG,
"copying debugfs feature to external buffer\n");
memcpy(buffer, qed_feature->dump_buf, qed_feature->buf_size);
*num_dumped_bytes = cdev->dbg_params.features[feature].dumped_dwords *
*num_dumped_bytes = cdev->dbg_features[feature].dumped_dwords *
4;
out:
@ -8095,7 +8095,7 @@ int qed_dbg_feature(struct qed_dev *cdev, void *buffer,
int qed_dbg_feature_size(struct qed_dev *cdev, enum qed_dbg_features feature)
{
struct qed_hwfn *p_hwfn =
&cdev->hwfns[cdev->dbg_params.engine_for_debug];
&cdev->hwfns[cdev->engine_for_debug];
struct qed_dbg_feature *qed_feature = &cdev->dbg_features[feature];
struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
u32 buf_size_dwords;
@ -8120,14 +8120,14 @@ int qed_dbg_feature_size(struct qed_dev *cdev, enum qed_dbg_features feature)
u8 qed_get_debug_engine(struct qed_dev *cdev)
{
return cdev->dbg_params.engine_for_debug;
return cdev->engine_for_debug;
}
void qed_set_debug_engine(struct qed_dev *cdev, int engine_number)
{
DP_VERBOSE(cdev, QED_MSG_DEBUG, "set debug engine to %d\n",
engine_number);
cdev->dbg_params.engine_for_debug = engine_number;
cdev->engine_for_debug = engine_number;
}
void qed_dbg_pf_init(struct qed_dev *cdev)
@ -8146,7 +8146,7 @@ void qed_dbg_pf_init(struct qed_dev *cdev)
}
/* Set the hwfn to be 0 as default */
cdev->dbg_params.engine_for_debug = 0;
cdev->engine_for_debug = 0;
}
void qed_dbg_pf_exit(struct qed_dev *cdev)

View File

@ -3085,7 +3085,9 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params)
rc = qed_final_cleanup(p_hwfn, p_hwfn->p_main_ptt,
p_hwfn->rel_pf_id, false);
if (rc) {
DP_NOTICE(p_hwfn, "Final cleanup failed\n");
qed_hw_err_notify(p_hwfn, p_hwfn->p_main_ptt,
QED_HW_ERR_RAMROD_FAIL,
"Final cleanup failed\n");
goto load_err;
}
}

View File

@ -12400,6 +12400,13 @@ struct load_rsp_stc {
#define LOAD_RSP_FLAGS0_DRV_EXISTS (0x1 << 0)
};
struct mdump_retain_data_stc {
u32 valid;
u32 epoch;
u32 pf;
u32 status;
};
union drv_union_data {
u32 ver_str[MCP_DRV_VER_STR_SIZE_DWORD];
struct mcp_mac wol_mac;
@ -12488,10 +12495,14 @@ struct public_drv_mb {
#define DRV_MSG_CODE_BIST_TEST 0x001e0000
#define DRV_MSG_CODE_SET_LED_MODE 0x00200000
#define DRV_MSG_CODE_RESOURCE_CMD 0x00230000
/* Send crash dump commands with param[3:0] - opcode */
#define DRV_MSG_CODE_MDUMP_CMD 0x00250000
#define DRV_MSG_CODE_GET_TLV_DONE 0x002f0000
#define DRV_MSG_CODE_GET_ENGINE_CONFIG 0x00370000
#define DRV_MSG_CODE_GET_PPFID_BITMAP 0x43000000
#define DRV_MSG_CODE_DEBUG_DATA_SEND 0xc0040000
#define RESOURCE_CMD_REQ_RESC_MASK 0x0000001F
#define RESOURCE_CMD_REQ_RESC_SHIFT 0
#define RESOURCE_CMD_REQ_OPCODE_MASK 0x000000E0
@ -12517,6 +12528,21 @@ struct public_drv_mb {
#define RESOURCE_DUMP 0
/* DRV_MSG_CODE_MDUMP_CMD parameters */
#define MDUMP_DRV_PARAM_OPCODE_MASK 0x0000000f
#define DRV_MSG_CODE_MDUMP_ACK 0x01
#define DRV_MSG_CODE_MDUMP_SET_VALUES 0x02
#define DRV_MSG_CODE_MDUMP_TRIGGER 0x03
#define DRV_MSG_CODE_MDUMP_GET_CONFIG 0x04
#define DRV_MSG_CODE_MDUMP_SET_ENABLE 0x05
#define DRV_MSG_CODE_MDUMP_CLEAR_LOGS 0x06
#define DRV_MSG_CODE_MDUMP_GET_RETAIN 0x07
#define DRV_MSG_CODE_MDUMP_CLR_RETAIN 0x08
#define DRV_MSG_CODE_HW_DUMP_TRIGGER 0x0a
#define DRV_MSG_CODE_MDUMP_GEN_MDUMP2 0x0b
#define DRV_MSG_CODE_MDUMP_FREE_MDUMP2 0x0c
#define DRV_MSG_CODE_GET_PF_RDMA_PROTOCOL 0x002b0000
#define DRV_MSG_CODE_OS_WOL 0x002e0000
@ -12626,6 +12652,17 @@ struct public_drv_mb {
#define DRV_MB_PARAM_FEATURE_SUPPORT_PORT_EEE 0x00000002
#define DRV_MB_PARAM_FEATURE_SUPPORT_FUNC_VLINK 0x00010000
/* DRV_MSG_CODE_DEBUG_DATA_SEND parameters */
#define DRV_MSG_CODE_DEBUG_DATA_SEND_SIZE_OFFSET 0
#define DRV_MSG_CODE_DEBUG_DATA_SEND_SIZE_MASK 0xFF
/* Driver attributes params */
#define DRV_MB_PARAM_ATTRIBUTE_KEY_OFFSET 0
#define DRV_MB_PARAM_ATTRIBUTE_KEY_MASK 0x00FFFFFF
#define DRV_MB_PARAM_ATTRIBUTE_CMD_OFFSET 24
#define DRV_MB_PARAM_ATTRIBUTE_CMD_MASK 0xFF000000
#define DRV_MB_PARAM_NVM_CFG_OPTION_ID_OFFSET 0
#define DRV_MB_PARAM_NVM_CFG_OPTION_ID_SHIFT 0
#define DRV_MB_PARAM_NVM_CFG_OPTION_ID_MASK 0x0000FFFF
#define DRV_MB_PARAM_NVM_CFG_OPTION_ALL_SHIFT 16
@ -12678,6 +12715,14 @@ struct public_drv_mb {
#define FW_MSG_CODE_DRV_CFG_PF_VFS_MSIX_DONE 0x00870000
#define FW_MSG_SEQ_NUMBER_MASK 0x0000ffff
#define FW_MSG_CODE_DEBUG_DATA_SEND_INV_ARG 0xb0070000
#define FW_MSG_CODE_DEBUG_DATA_SEND_BUF_FULL 0xb0080000
#define FW_MSG_CODE_DEBUG_DATA_SEND_NO_BUF 0xb0090000
#define FW_MSG_CODE_DEBUG_NOT_ENABLED 0xb00a0000
#define FW_MSG_CODE_DEBUG_DATA_SEND_OK 0xb00b0000
#define FW_MSG_CODE_MDUMP_INVALID_CMD 0x00030000
u32 fw_mb_param;
#define FW_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_MASK 0xFFFF0000
#define FW_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_SHIFT 16
@ -12742,9 +12787,9 @@ enum MFW_DRV_MSG_TYPE {
MFW_DRV_MSG_GET_FCOE_STATS,
MFW_DRV_MSG_GET_ISCSI_STATS,
MFW_DRV_MSG_GET_RDMA_STATS,
MFW_DRV_MSG_BW_UPDATE10,
MFW_DRV_MSG_FAILURE_DETECTED,
MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE,
MFW_DRV_MSG_BW_UPDATE11,
MFW_DRV_MSG_CRITICAL_ERROR_OCCURRED,
MFW_DRV_MSG_RESERVED,
MFW_DRV_MSG_GET_TLV_REQ,
MFW_DRV_MSG_OEM_CFG_UPDATE,

View File

@ -762,9 +762,10 @@ static int qed_dmae_execute_command(struct qed_hwfn *p_hwfn,
dst_type,
length_cur);
if (qed_status) {
DP_NOTICE(p_hwfn,
"qed_dmae_execute_sub_operation Failed with error 0x%x. source_addr 0x%llx, destination addr 0x%llx, size_in_dwords 0x%x\n",
qed_status, src_addr, dst_addr, length_cur);
qed_hw_err_notify(p_hwfn, p_ptt, QED_HW_ERR_DMAE_FAIL,
"qed_dmae_execute_sub_operation Failed with error 0x%x. source_addr 0x%llx, destination addr 0x%llx, size_in_dwords 0x%x\n",
qed_status, src_addr,
dst_addr, length_cur);
break;
}
}
@ -837,6 +838,41 @@ int qed_dmae_host2host(struct qed_hwfn *p_hwfn,
return rc;
}
void qed_hw_err_notify(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt,
enum qed_hw_err_type err_type, char *fmt, ...)
{
char buf[QED_HW_ERR_MAX_STR_SIZE];
va_list vl;
int len;
if (fmt) {
va_start(vl, fmt);
len = vsnprintf(buf, QED_HW_ERR_MAX_STR_SIZE, fmt, vl);
va_end(vl);
if (len > QED_HW_ERR_MAX_STR_SIZE - 1)
len = QED_HW_ERR_MAX_STR_SIZE - 1;
DP_NOTICE(p_hwfn, "%s", buf);
}
/* Fan failure cannot be masked by handling of another HW error */
if (p_hwfn->cdev->recov_in_prog &&
err_type != QED_HW_ERR_FAN_FAIL) {
DP_VERBOSE(p_hwfn,
NETIF_MSG_DRV,
"Recovery is in progress. Avoid notifying about HW error %d.\n",
err_type);
return;
}
qed_hw_error_occurred(p_hwfn, err_type);
if (fmt)
qed_mcp_send_raw_debug_data(p_hwfn, p_ptt, buf, len);
}
int qed_dmae_sanity(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt, const char *phase)
{

View File

@ -315,4 +315,19 @@ int qed_init_fw_data(struct qed_dev *cdev,
int qed_dmae_sanity(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt, const char *phase);
#define QED_HW_ERR_MAX_STR_SIZE 256
/**
* @brief qed_hw_err_notify - Notify upper layer driver and management FW
* about a HW error.
*
* @param p_hwfn
* @param p_ptt
* @param err_type
* @param fmt - debug data buffer to send to the MFW
* @param ... - buffer format args
*/
void qed_hw_err_notify(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt,
enum qed_hw_err_type err_type, char *fmt, ...);
#endif

View File

@ -96,6 +96,7 @@ struct aeu_invert_reg_bit {
#define ATTENTION_BB(value) (value << ATTENTION_BB_SHIFT)
#define ATTENTION_BB_DIFFERENT BIT(23)
#define ATTENTION_CLEAR_ENABLE BIT(28)
unsigned int flags;
/* Callback to call if attention will be triggered */
@ -363,6 +364,21 @@ static int qed_pglueb_rbc_attn_cb(struct qed_hwfn *p_hwfn)
return qed_pglueb_rbc_attn_handler(p_hwfn, p_hwfn->p_dpc_ptt);
}
static int qed_fw_assertion(struct qed_hwfn *p_hwfn)
{
qed_hw_err_notify(p_hwfn, p_hwfn->p_dpc_ptt, QED_HW_ERR_FW_ASSERT,
"FW assertion!\n");
return -EINVAL;
}
static int qed_general_attention_35(struct qed_hwfn *p_hwfn)
{
DP_INFO(p_hwfn, "General attention 35!\n");
return 0;
}
#define QED_DORQ_ATTENTION_REASON_MASK (0xfffff)
#define QED_DORQ_ATTENTION_OPAQUE_MASK (0xffff)
#define QED_DORQ_ATTENTION_OPAQUE_SHIFT (0x0)
@ -605,13 +621,15 @@ static struct aeu_invert_reg aeu_descs[NUM_ATTN_REGS] = {
{
{ /* After Invert 4 */
{"General Attention 32", ATTENTION_SINGLE,
NULL, MAX_BLOCK_ID},
{"General Attention 32", ATTENTION_SINGLE |
ATTENTION_CLEAR_ENABLE, qed_fw_assertion,
MAX_BLOCK_ID},
{"General Attention %d",
(2 << ATTENTION_LENGTH_SHIFT) |
(33 << ATTENTION_OFFSET_SHIFT), NULL, MAX_BLOCK_ID},
{"General Attention 35", ATTENTION_SINGLE,
NULL, MAX_BLOCK_ID},
{"General Attention 35", ATTENTION_SINGLE |
ATTENTION_CLEAR_ENABLE, qed_general_attention_35,
MAX_BLOCK_ID},
{"NWS Parity",
ATTENTION_PAR | ATTENTION_BB_DIFFERENT |
ATTENTION_BB(AEU_INVERT_REG_SPECIAL_CNIG_0),
@ -927,9 +945,12 @@ qed_int_deassertion_aeu_bit(struct qed_hwfn *p_hwfn,
qed_int_attn_print(p_hwfn, p_aeu->block_index,
ATTN_TYPE_INTERRUPT, !b_fatal);
/* If the attention is benign, no need to prevent it */
if (!rc)
/* Reach assertion if attention is fatal */
if (b_fatal)
qed_hw_err_notify(p_hwfn, p_hwfn->p_dpc_ptt, QED_HW_ERR_HW_ATTN,
"`%s': Fatal attention\n",
p_bit_name);
else /* If the attention is benign, no need to prevent it */
goto out;
/* Prevent this Attention from being asserted in the future */
@ -2349,6 +2370,11 @@ void qed_int_disable_post_isr_release(struct qed_dev *cdev)
cdev->hwfns[i].b_int_requested = false;
}
void qed_int_attn_clr_enable(struct qed_dev *cdev, bool clr_enable)
{
cdev->attn_clr_en = clr_enable;
}
int qed_int_set_timer_res(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
u8 timer_res, u16 sb_id, bool tx)
{

View File

@ -190,6 +190,17 @@ void qed_int_get_num_sbs(struct qed_hwfn *p_hwfn,
*/
void qed_int_disable_post_isr_release(struct qed_dev *cdev);
/**
* @brief qed_int_attn_clr_enable - sets whether the general behavior is
* preventing attentions from being reasserted, or following the
* attributes of the specific attention.
*
* @param cdev
* @param clr_enable
*
*/
void qed_int_attn_clr_enable(struct qed_dev *cdev, bool clr_enable);
/**
* @brief - Doorbell Recovery handler.
* Run doorbell recovery in case of PF overflow (and flush DORQ if

View File

@ -2468,6 +2468,39 @@ void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn)
ops->schedule_recovery_handler(cookie);
}
char *qed_hw_err_type_descr[] = {
[QED_HW_ERR_FAN_FAIL] = "Fan Failure",
[QED_HW_ERR_MFW_RESP_FAIL] = "MFW Response Failure",
[QED_HW_ERR_HW_ATTN] = "HW Attention",
[QED_HW_ERR_DMAE_FAIL] = "DMAE Failure",
[QED_HW_ERR_RAMROD_FAIL] = "Ramrod Failure",
[QED_HW_ERR_FW_ASSERT] = "FW Assertion",
[QED_HW_ERR_LAST] = "Unknown",
};
void qed_hw_error_occurred(struct qed_hwfn *p_hwfn,
enum qed_hw_err_type err_type)
{
struct qed_common_cb_ops *ops = p_hwfn->cdev->protocol_ops.common;
void *cookie = p_hwfn->cdev->ops_cookie;
char *err_str;
if (err_type > QED_HW_ERR_LAST)
err_type = QED_HW_ERR_LAST;
err_str = qed_hw_err_type_descr[err_type];
DP_NOTICE(p_hwfn, "HW error occurred [%s]\n", err_str);
/* Call the HW error handler of the protocol driver.
* If it is not available - perform a minimal handling of preventing
* HW attentions from being reasserted.
*/
if (ops && ops->schedule_hw_err_handler)
ops->schedule_hw_err_handler(cookie, err_type);
else
qed_int_attn_clr_enable(p_hwfn->cdev, true);
}
static int qed_set_coalesce(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal,
void *handle)
{
@ -2689,6 +2722,7 @@ const struct qed_common_ops qed_common_ops_pass = {
.set_led = &qed_set_led,
.recovery_process = &qed_recovery_process,
.recovery_prolog = &qed_recovery_prolog,
.attn_clr_enable = &qed_int_attn_clr_enable,
.update_drv_state = &qed_update_drv_state,
.update_mac = &qed_update_mac,
.update_mtu = &qed_update_mtu,

View File

@ -575,6 +575,8 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
if (!QED_MB_FLAGS_IS_SET(p_mb_params, AVOID_BLOCK))
qed_mcp_cmd_set_blocking(p_hwfn, true);
qed_hw_err_notify(p_hwfn, p_ptt,
QED_HW_ERR_MFW_RESP_FAIL, NULL);
return -EAGAIN;
}
@ -1704,6 +1706,127 @@ static void qed_mcp_update_stag(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
&resp, &param);
}
static void qed_mcp_handle_fan_failure(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt)
{
/* A single notification should be sent to upper driver in CMT mode */
if (p_hwfn != QED_LEADING_HWFN(p_hwfn->cdev))
return;
qed_hw_err_notify(p_hwfn, p_ptt, QED_HW_ERR_FAN_FAIL,
"Fan failure was detected on the network interface card and it's going to be shut down.\n");
}
struct qed_mdump_cmd_params {
u32 cmd;
void *p_data_src;
u8 data_src_size;
void *p_data_dst;
u8 data_dst_size;
u32 mcp_resp;
};
static int
qed_mcp_mdump_cmd(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt,
struct qed_mdump_cmd_params *p_mdump_cmd_params)
{
struct qed_mcp_mb_params mb_params;
int rc;
memset(&mb_params, 0, sizeof(mb_params));
mb_params.cmd = DRV_MSG_CODE_MDUMP_CMD;
mb_params.param = p_mdump_cmd_params->cmd;
mb_params.p_data_src = p_mdump_cmd_params->p_data_src;
mb_params.data_src_size = p_mdump_cmd_params->data_src_size;
mb_params.p_data_dst = p_mdump_cmd_params->p_data_dst;
mb_params.data_dst_size = p_mdump_cmd_params->data_dst_size;
rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
if (rc)
return rc;
p_mdump_cmd_params->mcp_resp = mb_params.mcp_resp;
if (p_mdump_cmd_params->mcp_resp == FW_MSG_CODE_MDUMP_INVALID_CMD) {
DP_INFO(p_hwfn,
"The mdump sub command is unsupported by the MFW [mdump_cmd 0x%x]\n",
p_mdump_cmd_params->cmd);
rc = -EOPNOTSUPP;
} else if (p_mdump_cmd_params->mcp_resp == FW_MSG_CODE_UNSUPPORTED) {
DP_INFO(p_hwfn,
"The mdump command is not supported by the MFW\n");
rc = -EOPNOTSUPP;
}
return rc;
}
static int qed_mcp_mdump_ack(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
{
struct qed_mdump_cmd_params mdump_cmd_params;
memset(&mdump_cmd_params, 0, sizeof(mdump_cmd_params));
mdump_cmd_params.cmd = DRV_MSG_CODE_MDUMP_ACK;
return qed_mcp_mdump_cmd(p_hwfn, p_ptt, &mdump_cmd_params);
}
int
qed_mcp_mdump_get_retain(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt,
struct mdump_retain_data_stc *p_mdump_retain)
{
struct qed_mdump_cmd_params mdump_cmd_params;
int rc;
memset(&mdump_cmd_params, 0, sizeof(mdump_cmd_params));
mdump_cmd_params.cmd = DRV_MSG_CODE_MDUMP_GET_RETAIN;
mdump_cmd_params.p_data_dst = p_mdump_retain;
mdump_cmd_params.data_dst_size = sizeof(*p_mdump_retain);
rc = qed_mcp_mdump_cmd(p_hwfn, p_ptt, &mdump_cmd_params);
if (rc)
return rc;
if (mdump_cmd_params.mcp_resp != FW_MSG_CODE_OK) {
DP_INFO(p_hwfn,
"Failed to get the mdump retained data [mcp_resp 0x%x]\n",
mdump_cmd_params.mcp_resp);
return -EINVAL;
}
return 0;
}
static void qed_mcp_handle_critical_error(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt)
{
struct mdump_retain_data_stc mdump_retain;
int rc;
/* In CMT mode - no need for more than a single acknowledgment to the
* MFW, and no more than a single notification to the upper driver.
*/
if (p_hwfn != QED_LEADING_HWFN(p_hwfn->cdev))
return;
rc = qed_mcp_mdump_get_retain(p_hwfn, p_ptt, &mdump_retain);
if (rc == 0 && mdump_retain.valid)
DP_NOTICE(p_hwfn,
"The MFW notified that a critical error occurred in the device [epoch 0x%08x, pf 0x%x, status 0x%08x]\n",
mdump_retain.epoch,
mdump_retain.pf, mdump_retain.status);
else
DP_NOTICE(p_hwfn,
"The MFW notified that a critical error occurred in the device\n");
DP_NOTICE(p_hwfn,
"Acknowledging the notification to not allow the MFW crash dump [driver debug data collection is preferable]\n");
qed_mcp_mdump_ack(p_hwfn, p_ptt);
qed_hw_err_notify(p_hwfn, p_ptt, QED_HW_ERR_HW_ATTN, NULL);
}
void qed_mcp_read_ufp_config(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
{
struct public_func shmem_info;
@ -1850,6 +1973,12 @@ int qed_mcp_handle_events(struct qed_hwfn *p_hwfn,
case MFW_DRV_MSG_S_TAG_UPDATE:
qed_mcp_update_stag(p_hwfn, p_ptt);
break;
case MFW_DRV_MSG_FAILURE_DETECTED:
qed_mcp_handle_fan_failure(p_hwfn, p_ptt);
break;
case MFW_DRV_MSG_CRITICAL_ERROR_OCCURRED:
qed_mcp_handle_critical_error(p_hwfn, p_ptt);
break;
case MFW_DRV_MSG_GET_TLV_REQ:
qed_mfw_tlv_req(p_hwfn);
break;
@ -3819,3 +3948,127 @@ int qed_mcp_nvm_set_cfg(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
DRV_MSG_CODE_SET_NVM_CFG_OPTION,
mb_param, &resp, &param, len, (u32 *)p_buf);
}
#define QED_MCP_DBG_DATA_MAX_SIZE MCP_DRV_NVM_BUF_LEN
#define QED_MCP_DBG_DATA_MAX_HEADER_SIZE sizeof(u32)
#define QED_MCP_DBG_DATA_MAX_PAYLOAD_SIZE \
(QED_MCP_DBG_DATA_MAX_SIZE - QED_MCP_DBG_DATA_MAX_HEADER_SIZE)
static int
__qed_mcp_send_debug_data(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt, u8 *p_buf, u8 size)
{
struct qed_mcp_mb_params mb_params;
int rc;
if (size > QED_MCP_DBG_DATA_MAX_SIZE) {
DP_ERR(p_hwfn,
"Debug data size is %d while it should not exceed %d\n",
size, QED_MCP_DBG_DATA_MAX_SIZE);
return -EINVAL;
}
memset(&mb_params, 0, sizeof(mb_params));
mb_params.cmd = DRV_MSG_CODE_DEBUG_DATA_SEND;
SET_MFW_FIELD(mb_params.param, DRV_MSG_CODE_DEBUG_DATA_SEND_SIZE, size);
mb_params.p_data_src = p_buf;
mb_params.data_src_size = size;
rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
if (rc)
return rc;
if (mb_params.mcp_resp == FW_MSG_CODE_UNSUPPORTED) {
DP_INFO(p_hwfn,
"The DEBUG_DATA_SEND command is unsupported by the MFW\n");
return -EOPNOTSUPP;
} else if (mb_params.mcp_resp == (u32)FW_MSG_CODE_DEBUG_NOT_ENABLED) {
DP_INFO(p_hwfn, "The DEBUG_DATA_SEND command is not enabled\n");
return -EBUSY;
} else if (mb_params.mcp_resp != (u32)FW_MSG_CODE_DEBUG_DATA_SEND_OK) {
DP_NOTICE(p_hwfn,
"Failed to send debug data to the MFW [resp 0x%08x]\n",
mb_params.mcp_resp);
return -EINVAL;
}
return 0;
}
enum qed_mcp_dbg_data_type {
QED_MCP_DBG_DATA_TYPE_RAW,
};
/* Header format: [31:28] PFID, [27:20] flags, [19:12] type, [11:0] S/N */
#define QED_MCP_DBG_DATA_HDR_SN_OFFSET 0
#define QED_MCP_DBG_DATA_HDR_SN_MASK 0x00000fff
#define QED_MCP_DBG_DATA_HDR_TYPE_OFFSET 12
#define QED_MCP_DBG_DATA_HDR_TYPE_MASK 0x000ff000
#define QED_MCP_DBG_DATA_HDR_FLAGS_OFFSET 20
#define QED_MCP_DBG_DATA_HDR_FLAGS_MASK 0x0ff00000
#define QED_MCP_DBG_DATA_HDR_PF_OFFSET 28
#define QED_MCP_DBG_DATA_HDR_PF_MASK 0xf0000000
#define QED_MCP_DBG_DATA_HDR_FLAGS_FIRST 0x1
#define QED_MCP_DBG_DATA_HDR_FLAGS_LAST 0x2
static int
qed_mcp_send_debug_data(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt,
enum qed_mcp_dbg_data_type type, u8 *p_buf, u32 size)
{
u8 raw_data[QED_MCP_DBG_DATA_MAX_SIZE], *p_tmp_buf = p_buf;
u32 tmp_size = size, *p_header, *p_payload;
u8 flags = 0;
u16 seq;
int rc;
p_header = (u32 *)raw_data;
p_payload = (u32 *)(raw_data + QED_MCP_DBG_DATA_MAX_HEADER_SIZE);
seq = (u16)atomic_inc_return(&p_hwfn->mcp_info->dbg_data_seq);
/* First chunk is marked as 'first' */
flags |= QED_MCP_DBG_DATA_HDR_FLAGS_FIRST;
*p_header = 0;
SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_SN, seq);
SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_TYPE, type);
SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_FLAGS, flags);
SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_PF, p_hwfn->abs_pf_id);
while (tmp_size > QED_MCP_DBG_DATA_MAX_PAYLOAD_SIZE) {
memcpy(p_payload, p_tmp_buf, QED_MCP_DBG_DATA_MAX_PAYLOAD_SIZE);
rc = __qed_mcp_send_debug_data(p_hwfn, p_ptt, raw_data,
QED_MCP_DBG_DATA_MAX_SIZE);
if (rc)
return rc;
/* Clear the 'first' marking after sending the first chunk */
if (p_tmp_buf == p_buf) {
flags &= ~QED_MCP_DBG_DATA_HDR_FLAGS_FIRST;
SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_FLAGS,
flags);
}
p_tmp_buf += QED_MCP_DBG_DATA_MAX_PAYLOAD_SIZE;
tmp_size -= QED_MCP_DBG_DATA_MAX_PAYLOAD_SIZE;
}
/* Last chunk is marked as 'last' */
flags |= QED_MCP_DBG_DATA_HDR_FLAGS_LAST;
SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_FLAGS, flags);
memcpy(p_payload, p_tmp_buf, tmp_size);
/* Casting the left size to u8 is ok since at this point it is <= 32 */
return __qed_mcp_send_debug_data(p_hwfn, p_ptt, raw_data,
(u8)(QED_MCP_DBG_DATA_MAX_HEADER_SIZE +
tmp_size));
}
int
qed_mcp_send_raw_debug_data(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt, u8 *p_buf, u32 size)
{
return qed_mcp_send_debug_data(p_hwfn, p_ptt,
QED_MCP_DBG_DATA_TYPE_RAW, p_buf, size);
}

View File

@ -685,6 +685,18 @@ int qed_mcp_bist_nvm_get_image_att(struct qed_hwfn *p_hwfn,
*/
int qed_mfw_process_tlv_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
/**
* @brief Send raw debug data to the MFW
*
* @param p_hwfn
* @param p_ptt
* @param p_buf - raw debug data buffer
* @param size - buffer size
*/
int
qed_mcp_send_raw_debug_data(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt, u8 *p_buf, u32 size);
/* Using hwfn number (and not pf_num) is required since in CMT mode,
* same pf_num may be used by two different hwfn
* TODO - this shouldn't really be in .h file, but until all fields
@ -731,6 +743,9 @@ struct qed_mcp_info {
/* Capabilties negotiated with the MFW */
u32 capabilities;
/* S/N for debug data mailbox commands */
atomic_t dbg_data_seq;
};
struct qed_mcp_mb_params {
@ -1001,6 +1016,19 @@ int __qed_configure_pf_min_bandwidth(struct qed_hwfn *p_hwfn,
int qed_mcp_mask_parities(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt, u32 mask_parities);
/* @brief - Gets the mdump retained data from the MFW.
*
* @param p_hwfn
* @param p_ptt
* @param p_mdump_retain
*
* @param return 0 upon success.
*/
int
qed_mcp_mdump_get_retain(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt,
struct mdump_retain_data_stc *p_mdump_retain);
/**
* @brief - Sets the MFW's max value for the given resource
*

View File

@ -160,12 +160,16 @@ static int qed_spq_block(struct qed_hwfn *p_hwfn,
return 0;
}
err:
DP_NOTICE(p_hwfn,
"Ramrod is stuck [CID %08x cmd %02x protocol %02x echo %04x]\n",
le32_to_cpu(p_ent->elem.hdr.cid),
p_ent->elem.hdr.cmd_id,
p_ent->elem.hdr.protocol_id,
le16_to_cpu(p_ent->elem.hdr.echo));
p_ptt = qed_ptt_acquire(p_hwfn);
if (!p_ptt)
return -EBUSY;
qed_hw_err_notify(p_hwfn, p_ptt, QED_HW_ERR_RAMROD_FAIL,
"Ramrod is stuck [CID %08x cmd %02x protocol %02x echo %04x]\n",
le32_to_cpu(p_ent->elem.hdr.cid),
p_ent->elem.hdr.cmd_id,
p_ent->elem.hdr.protocol_id,
le16_to_cpu(p_ent->elem.hdr.echo));
qed_ptt_release(p_hwfn, p_ptt);
return -EBUSY;
}

View File

@ -278,6 +278,14 @@ struct qede_dev {
struct qede_rdma_dev rdma_info;
struct bpf_prog *xdp_prog;
unsigned long err_flags;
#define QEDE_ERR_IS_HANDLED 31
#define QEDE_ERR_ATTN_CLR_EN 0
#define QEDE_ERR_GET_DBG_INFO 1
#define QEDE_ERR_IS_RECOVERABLE 2
#define QEDE_ERR_WARN 3
struct qede_dump_info dump_info;
};
@ -485,12 +493,15 @@ struct qede_fastpath {
#define QEDE_SP_RECOVERY 0
#define QEDE_SP_RX_MODE 1
#define QEDE_SP_RSVD1 2
#define QEDE_SP_RSVD2 3
#define QEDE_SP_HW_ERR 4
#define QEDE_SP_ARFS_CONFIG 5
#define QEDE_SP_AER 7
#ifdef CONFIG_RFS_ACCEL
int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
u16 rxq_index, u32 flow_id);
#define QEDE_SP_ARFS_CONFIG 4
#define QEDE_SP_TASK_POLL_DELAY (5 * HZ)
#endif
@ -522,7 +533,6 @@ u16 qede_select_queue(struct net_device *dev, struct sk_buff *skb,
netdev_features_t qede_features_check(struct sk_buff *skb,
struct net_device *dev,
netdev_features_t features);
void qede_tx_log_print(struct qede_dev *edev, struct qede_fastpath *fp);
int qede_alloc_rx_buffer(struct qede_rx_queue *rxq, bool allow_lazy);
int qede_free_tx_pkt(struct qede_dev *edev,
struct qede_tx_queue *txq, int *len);

View File

@ -190,12 +190,14 @@ static const struct {
enum {
QEDE_PRI_FLAG_CMT,
QEDE_PRI_FLAG_SMART_AN_SUPPORT, /* MFW supports SmartAN */
QEDE_PRI_FLAG_RECOVER_ON_ERROR,
QEDE_PRI_FLAG_LEN,
};
static const char qede_private_arr[QEDE_PRI_FLAG_LEN][ETH_GSTRING_LEN] = {
"Coupled-Function",
"SmartAN capable",
"Recover on error",
};
enum qede_ethtool_tests {
@ -417,9 +419,30 @@ static u32 qede_get_priv_flags(struct net_device *dev)
if (edev->dev_info.common.smart_an)
flags |= BIT(QEDE_PRI_FLAG_SMART_AN_SUPPORT);
if (edev->err_flags & BIT(QEDE_ERR_IS_RECOVERABLE))
flags |= BIT(QEDE_PRI_FLAG_RECOVER_ON_ERROR);
return flags;
}
static int qede_set_priv_flags(struct net_device *dev, u32 flags)
{
struct qede_dev *edev = netdev_priv(dev);
u32 cflags = qede_get_priv_flags(dev);
u32 dflags = flags ^ cflags;
/* can only change RECOVER_ON_ERROR flag */
if (dflags & ~BIT(QEDE_PRI_FLAG_RECOVER_ON_ERROR))
return -EINVAL;
if (flags & BIT(QEDE_PRI_FLAG_RECOVER_ON_ERROR))
set_bit(QEDE_ERR_IS_RECOVERABLE, &edev->err_flags);
else
clear_bit(QEDE_ERR_IS_RECOVERABLE, &edev->err_flags);
return 0;
}
struct qede_link_mode_mapping {
u32 qed_link_mode;
u32 ethtool_link_mode;
@ -2098,6 +2121,7 @@ static const struct ethtool_ops qede_ethtool_ops = {
.set_phys_id = qede_set_phys_id,
.get_ethtool_stats = qede_get_ethtool_stats,
.get_priv_flags = qede_get_priv_flags,
.set_priv_flags = qede_set_priv_flags,
.get_sset_count = qede_get_sset_count,
.get_rxnfc = qede_get_rxnfc,
.set_rxnfc = qede_set_rxnfc,

View File

@ -139,10 +139,12 @@ static void qede_shutdown(struct pci_dev *pdev);
static void qede_link_update(void *dev, struct qed_link_output *link);
static void qede_schedule_recovery_handler(void *dev);
static void qede_recovery_handler(struct qede_dev *edev);
static void qede_schedule_hw_err_handler(void *dev,
enum qed_hw_err_type err_type);
static void qede_get_eth_tlv_data(void *edev, void *data);
static void qede_get_generic_tlv_data(void *edev,
struct qed_generic_tlvs *data);
static void qede_generic_hw_err_handler(struct qede_dev *edev);
#ifdef CONFIG_QED_SRIOV
static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos,
__be16 vlan_proto)
@ -230,6 +232,7 @@ static struct qed_eth_cb_ops qede_ll_ops = {
#endif
.link_update = qede_link_update,
.schedule_recovery_handler = qede_schedule_recovery_handler,
.schedule_hw_err_handler = qede_schedule_hw_err_handler,
.get_generic_tlv_data = qede_get_generic_tlv_data,
.get_protocol_tlv_data = qede_get_eth_tlv_data,
},
@ -536,6 +539,51 @@ static int qede_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return 0;
}
static void qede_tx_log_print(struct qede_dev *edev, struct qede_tx_queue *txq)
{
DP_NOTICE(edev,
"Txq[%d]: FW cons [host] %04x, SW cons %04x, SW prod %04x [Jiffies %lu]\n",
txq->index, le16_to_cpu(*txq->hw_cons_ptr),
qed_chain_get_cons_idx(&txq->tx_pbl),
qed_chain_get_prod_idx(&txq->tx_pbl),
jiffies);
}
static void qede_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct qede_dev *edev = netdev_priv(dev);
struct qede_tx_queue *txq;
int cos;
netif_carrier_off(dev);
DP_NOTICE(edev, "TX timeout on queue %u!\n", txqueue);
if (!(edev->fp_array[txqueue].type & QEDE_FASTPATH_TX))
return;
for_each_cos_in_txq(edev, cos) {
txq = &edev->fp_array[txqueue].txq[cos];
if (qed_chain_get_cons_idx(&txq->tx_pbl) !=
qed_chain_get_prod_idx(&txq->tx_pbl))
qede_tx_log_print(edev, txq);
}
if (IS_VF(edev))
return;
if (test_and_set_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags) ||
edev->state == QEDE_STATE_RECOVERY) {
DP_INFO(edev,
"Avoid handling a Tx timeout while another HW error is being handled\n");
return;
}
set_bit(QEDE_ERR_GET_DBG_INFO, &edev->err_flags);
set_bit(QEDE_SP_HW_ERR, &edev->sp_flags);
schedule_delayed_work(&edev->sp_task, 0);
}
static int qede_setup_tc(struct net_device *ndev, u8 num_tc)
{
struct qede_dev *edev = netdev_priv(ndev);
@ -623,6 +671,7 @@ static const struct net_device_ops qede_netdev_ops = {
.ndo_validate_addr = eth_validate_addr,
.ndo_change_mtu = qede_change_mtu,
.ndo_do_ioctl = qede_ioctl,
.ndo_tx_timeout = qede_tx_timeout,
#ifdef CONFIG_QED_SRIOV
.ndo_set_vf_mac = qede_set_vf_mac,
.ndo_set_vf_vlan = qede_set_vf_vlan,
@ -1009,6 +1058,8 @@ static void qede_sp_task(struct work_struct *work)
qede_process_arfs_filters(edev, false);
}
#endif
if (test_and_clear_bit(QEDE_SP_HW_ERR, &edev->sp_flags))
qede_generic_hw_err_handler(edev);
__qede_unlock(edev);
if (test_and_clear_bit(QEDE_SP_AER, &edev->sp_flags)) {
@ -2509,6 +2560,100 @@ static void qede_recovery_handler(struct qede_dev *edev)
qede_recovery_failed(edev);
}
static void qede_atomic_hw_err_handler(struct qede_dev *edev)
{
struct qed_dev *cdev = edev->cdev;
DP_NOTICE(edev,
"Generic non-sleepable HW error handling started - err_flags 0x%lx\n",
edev->err_flags);
/* Get a call trace of the flow that led to the error */
WARN_ON(test_bit(QEDE_ERR_WARN, &edev->err_flags));
/* Prevent HW attentions from being reasserted */
if (test_bit(QEDE_ERR_ATTN_CLR_EN, &edev->err_flags))
edev->ops->common->attn_clr_enable(cdev, true);
DP_NOTICE(edev, "Generic non-sleepable HW error handling is done\n");
}
static void qede_generic_hw_err_handler(struct qede_dev *edev)
{
struct qed_dev *cdev = edev->cdev;
DP_NOTICE(edev,
"Generic sleepable HW error handling started - err_flags 0x%lx\n",
edev->err_flags);
/* Trigger a recovery process.
* This is placed in the sleep requiring section just to make
* sure it is the last one, and that all the other operations
* were completed.
*/
if (test_bit(QEDE_ERR_IS_RECOVERABLE, &edev->err_flags))
edev->ops->common->recovery_process(cdev);
clear_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags);
DP_NOTICE(edev, "Generic sleepable HW error handling is done\n");
}
static void qede_set_hw_err_flags(struct qede_dev *edev,
enum qed_hw_err_type err_type)
{
unsigned long err_flags = 0;
switch (err_type) {
case QED_HW_ERR_DMAE_FAIL:
set_bit(QEDE_ERR_WARN, &err_flags);
fallthrough;
case QED_HW_ERR_MFW_RESP_FAIL:
case QED_HW_ERR_HW_ATTN:
case QED_HW_ERR_RAMROD_FAIL:
case QED_HW_ERR_FW_ASSERT:
set_bit(QEDE_ERR_ATTN_CLR_EN, &err_flags);
set_bit(QEDE_ERR_GET_DBG_INFO, &err_flags);
break;
default:
DP_NOTICE(edev, "Unexpected HW error [%d]\n", err_type);
break;
}
edev->err_flags |= err_flags;
}
static void qede_schedule_hw_err_handler(void *dev,
enum qed_hw_err_type err_type)
{
struct qede_dev *edev = dev;
/* Fan failure cannot be masked by handling of another HW error or by a
* concurrent recovery process.
*/
if ((test_and_set_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags) ||
edev->state == QEDE_STATE_RECOVERY) &&
err_type != QED_HW_ERR_FAN_FAIL) {
DP_INFO(edev,
"Avoid scheduling an error handling while another HW error is being handled\n");
return;
}
if (err_type >= QED_HW_ERR_LAST) {
DP_NOTICE(edev, "Unknown HW error [%d]\n", err_type);
clear_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags);
return;
}
qede_set_hw_err_flags(edev, err_type);
qede_atomic_hw_err_handler(edev);
set_bit(QEDE_SP_HW_ERR, &edev->sp_flags);
schedule_delayed_work(&edev->sp_task, 0);
DP_INFO(edev, "Scheduled a error handler [err_type %d]\n", err_type);
}
static bool qede_is_txq_full(struct qede_dev *edev, struct qede_tx_queue *txq)
{
struct netdev_queue *netdev_txq;

View File

@ -607,6 +607,16 @@ struct qed_sb_info {
struct qed_dev *cdev;
};
enum qed_hw_err_type {
QED_HW_ERR_FAN_FAIL,
QED_HW_ERR_MFW_RESP_FAIL,
QED_HW_ERR_HW_ATTN,
QED_HW_ERR_DMAE_FAIL,
QED_HW_ERR_RAMROD_FAIL,
QED_HW_ERR_FW_ASSERT,
QED_HW_ERR_LAST,
};
enum qed_dev_type {
QED_DEV_TYPE_BB,
QED_DEV_TYPE_AH,
@ -811,10 +821,11 @@ enum qed_nvm_flash_cmd {
struct qed_common_cb_ops {
void (*arfs_filter_op)(void *dev, void *fltr, u8 fw_rc);
void (*link_update)(void *dev,
struct qed_link_output *link);
void (*link_update)(void *dev, struct qed_link_output *link);
void (*schedule_recovery_handler)(void *dev);
void (*dcbx_aen)(void *dev, struct qed_dcbx_get *get, u32 mib_type);
void (*schedule_hw_err_handler)(void *dev,
enum qed_hw_err_type err_type);
void (*dcbx_aen)(void *dev, struct qed_dcbx_get *get, u32 mib_type);
void (*get_generic_tlv_data)(void *dev, struct qed_generic_tlvs *data);
void (*get_protocol_tlv_data)(void *dev, void *data);
};
@ -1034,6 +1045,15 @@ struct qed_common_ops {
*/
int (*set_led)(struct qed_dev *cdev,
enum qed_led_mode mode);
/**
* @brief attn_clr_enable - Prevent attentions from being reasserted
*
* @param cdev
* @param clr_enable
*/
void (*attn_clr_enable)(struct qed_dev *cdev, bool clr_enable);
/**
* @brief db_recovery_add - add doorbell information to the doorbell
* recovery mechanism.