qed: Use the doorbell overflow recovery mechanism in case of doorbell overflow

In case of an attention from the doorbell queue block, analyze the HW
indications. In case of a doorbell overflow, execute a doorbell recovery.
Since there can be spurious indications (race conditions between multiple PFs),
schedule a periodic task for checking whether a doorbell overflow may have been
missed. After a set time with no indications, terminate the periodic task.

Signed-off-by: Ariel Elior <Ariel.Elior@cavium.com>
Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
Signed-off-by: Tomer Tayar <Tomer.Tayar@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Ariel Elior 2018-11-28 18:16:03 +02:00 committed by David S. Miller
parent 36907cd5cd
commit a1b469b8b1
6 changed files with 280 additions and 24 deletions

View File

@ -536,6 +536,7 @@ struct qed_simd_fp_handler {
enum qed_slowpath_wq_flag {
QED_SLOWPATH_MFW_TLV_REQ,
QED_SLOWPATH_PERIODIC_DB_REC,
};
struct qed_hwfn {
@ -669,11 +670,12 @@ struct qed_hwfn {
struct delayed_work iov_task;
unsigned long iov_task_flags;
#endif
struct z_stream_s *stream;
bool slowpath_wq_active;
struct workqueue_struct *slowpath_wq;
struct delayed_work slowpath_task;
unsigned long slowpath_task_flags;
u32 periodic_db_rec_count;
};
struct pci_params {
@ -914,6 +916,12 @@ u16 qed_get_cm_pq_idx_llt_mtc(struct qed_hwfn *p_hwfn, u8 tc);
#define QED_LEADING_HWFN(dev) (&dev->hwfns[0])
/* doorbell recovery mechanism */
void qed_db_recovery_dp(struct qed_hwfn *p_hwfn);
void qed_db_recovery_execute(struct qed_hwfn *p_hwfn,
enum qed_db_rec_exec db_exec);
bool qed_edpm_enabled(struct qed_hwfn *p_hwfn);
/* Other Linux specific common definitions */
#define DP_NAME(cdev) ((cdev)->name)
@ -948,4 +956,6 @@ int qed_mfw_fill_tlv_data(struct qed_hwfn *hwfn,
union qed_mfw_tlv_data *tlv_data);
void qed_hw_info_set_offload_tc(struct qed_hw_info *p_info, u8 tc);
void qed_periodic_db_rec_start(struct qed_hwfn *p_hwfn);
#endif /* _QED_H */

View File

@ -1788,6 +1788,14 @@ enum QED_ROCE_EDPM_MODE {
QED_ROCE_EDPM_MODE_DISABLE = 2,
};
bool qed_edpm_enabled(struct qed_hwfn *p_hwfn)
{
if (p_hwfn->dcbx_no_edpm || p_hwfn->db_bar_no_edpm)
return false;
return true;
}
static int
qed_hw_init_pf_doorbell_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
{
@ -1857,13 +1865,13 @@ qed_hw_init_pf_doorbell_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
p_hwfn->wid_count = (u16) n_cpus;
DP_INFO(p_hwfn,
"doorbell bar: normal_region_size=%d, pwm_region_size=%d, dpi_size=%d, dpi_count=%d, roce_edpm=%s\n",
"doorbell bar: normal_region_size=%d, pwm_region_size=%d, dpi_size=%d, dpi_count=%d, roce_edpm=%s, page_size=%lu\n",
norm_regsize,
pwm_regsize,
p_hwfn->dpi_size,
p_hwfn->dpi_count,
((p_hwfn->dcbx_no_edpm) || (p_hwfn->db_bar_no_edpm)) ?
"disabled" : "enabled");
(!qed_edpm_enabled(p_hwfn)) ?
"disabled" : "enabled", PAGE_SIZE);
if (rc) {
DP_ERR(p_hwfn,

View File

@ -363,27 +363,145 @@ static int qed_pglub_rbc_attn_cb(struct qed_hwfn *p_hwfn)
#define QED_DORQ_ATTENTION_REASON_MASK (0xfffff)
#define QED_DORQ_ATTENTION_OPAQUE_MASK (0xffff)
#define QED_DORQ_ATTENTION_OPAQUE_SHIFT (0x0)
#define QED_DORQ_ATTENTION_SIZE_MASK (0x7f)
#define QED_DORQ_ATTENTION_SIZE_SHIFT (16)
#define QED_DB_REC_COUNT 1000
#define QED_DB_REC_INTERVAL 100
static int qed_db_rec_flush_queue(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt)
{
u32 count = QED_DB_REC_COUNT;
u32 usage = 1;
/* wait for usage to zero or count to run out. This is necessary since
* EDPM doorbell transactions can take multiple 64b cycles, and as such
* can "split" over the pci. Possibly, the doorbell drop can happen with
* half an EDPM in the queue and other half dropped. Another EDPM
* doorbell to the same address (from doorbell recovery mechanism or
* from the doorbelling entity) could have first half dropped and second
* half interpreted as continuation of the first. To prevent such
* malformed doorbells from reaching the device, flush the queue before
* releasing the overflow sticky indication.
*/
while (count-- && usage) {
usage = qed_rd(p_hwfn, p_ptt, DORQ_REG_PF_USAGE_CNT);
udelay(QED_DB_REC_INTERVAL);
}
/* should have been depleted by now */
if (usage) {
DP_NOTICE(p_hwfn->cdev,
"DB recovery: doorbell usage failed to zero after %d usec. usage was %x\n",
QED_DB_REC_INTERVAL * QED_DB_REC_COUNT, usage);
return -EBUSY;
}
return 0;
}
int qed_db_rec_handler(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
{
u32 overflow;
int rc;
overflow = qed_rd(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY);
DP_NOTICE(p_hwfn, "PF Overflow sticky 0x%x\n", overflow);
if (!overflow) {
qed_db_recovery_execute(p_hwfn, DB_REC_ONCE);
return 0;
}
if (qed_edpm_enabled(p_hwfn)) {
rc = qed_db_rec_flush_queue(p_hwfn, p_ptt);
if (rc)
return rc;
}
/* Flush any pending (e)dpm as they may never arrive */
qed_wr(p_hwfn, p_ptt, DORQ_REG_DPM_FORCE_ABORT, 0x1);
/* Release overflow sticky indication (stop silently dropping everything) */
qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY, 0x0);
/* Repeat all last doorbells (doorbell drop recovery) */
qed_db_recovery_execute(p_hwfn, DB_REC_REAL_DEAL);
return 0;
}
static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn)
{
u32 reason;
u32 int_sts, first_drop_reason, details, address, all_drops_reason;
struct qed_ptt *p_ptt = p_hwfn->p_dpc_ptt;
int rc;
reason = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt, DORQ_REG_DB_DROP_REASON) &
int_sts = qed_rd(p_hwfn, p_ptt, DORQ_REG_INT_STS);
DP_NOTICE(p_hwfn->cdev, "DORQ attention. int_sts was %x\n", int_sts);
/* int_sts may be zero since all PFs were interrupted for doorbell
* overflow but another one already handled it. Can abort here. If
* This PF also requires overflow recovery we will be interrupted again.
* The masked almost full indication may also be set. Ignoring.
*/
if (!(int_sts & ~DORQ_REG_INT_STS_DORQ_FIFO_AFULL))
return 0;
/* check if db_drop or overflow happened */
if (int_sts & (DORQ_REG_INT_STS_DB_DROP |
DORQ_REG_INT_STS_DORQ_FIFO_OVFL_ERR)) {
/* Obtain data about db drop/overflow */
first_drop_reason = qed_rd(p_hwfn, p_ptt,
DORQ_REG_DB_DROP_REASON) &
QED_DORQ_ATTENTION_REASON_MASK;
if (reason) {
u32 details = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
DORQ_REG_DB_DROP_DETAILS);
details = qed_rd(p_hwfn, p_ptt, DORQ_REG_DB_DROP_DETAILS);
address = qed_rd(p_hwfn, p_ptt,
DORQ_REG_DB_DROP_DETAILS_ADDRESS);
all_drops_reason = qed_rd(p_hwfn, p_ptt,
DORQ_REG_DB_DROP_DETAILS_REASON);
DP_INFO(p_hwfn->cdev,
"DORQ db_drop: address 0x%08x Opaque FID 0x%04x Size [bytes] 0x%08x Reason: 0x%08x\n",
qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
DORQ_REG_DB_DROP_DETAILS_ADDRESS),
(u16)(details & QED_DORQ_ATTENTION_OPAQUE_MASK),
/* Log info */
DP_NOTICE(p_hwfn->cdev,
"Doorbell drop occurred\n"
"Address\t\t0x%08x\t(second BAR address)\n"
"FID\t\t0x%04x\t\t(Opaque FID)\n"
"Size\t\t0x%04x\t\t(in bytes)\n"
"1st drop reason\t0x%08x\t(details on first drop since last handling)\n"
"Sticky reasons\t0x%08x\t(all drop reasons since last handling)\n",
address,
GET_FIELD(details, QED_DORQ_ATTENTION_OPAQUE),
GET_FIELD(details, QED_DORQ_ATTENTION_SIZE) * 4,
reason);
first_drop_reason, all_drops_reason);
rc = qed_db_rec_handler(p_hwfn, p_ptt);
qed_periodic_db_rec_start(p_hwfn);
if (rc)
return rc;
/* Clear the doorbell drop details and prepare for next drop */
qed_wr(p_hwfn, p_ptt, DORQ_REG_DB_DROP_DETAILS_REL, 0);
/* Mark interrupt as handled (note: even if drop was due to a different
* reason than overflow we mark as handled)
*/
qed_wr(p_hwfn,
p_ptt,
DORQ_REG_INT_STS_WR,
DORQ_REG_INT_STS_DB_DROP |
DORQ_REG_INT_STS_DORQ_FIFO_OVFL_ERR);
/* If there are no indications other than drop indications, success */
if ((int_sts & ~(DORQ_REG_INT_STS_DB_DROP |
DORQ_REG_INT_STS_DORQ_FIFO_OVFL_ERR |
DORQ_REG_INT_STS_DORQ_FIFO_AFULL)) == 0)
return 0;
}
/* Some other indication was present - non recoverable */
DP_INFO(p_hwfn, "DORQ fatal attention\n");
return -EINVAL;
}

View File

@ -190,6 +190,16 @@ void qed_int_get_num_sbs(struct qed_hwfn *p_hwfn,
*/
void qed_int_disable_post_isr_release(struct qed_dev *cdev);
/**
* @brief - Doorbell Recovery handler.
* Run DB_REAL_DEAL doorbell recovery in case of PF overflow
* (and flush DORQ if needed), otherwise run DB_REC_ONCE.
*
* @param p_hwfn
* @param p_ptt
*/
int qed_db_rec_handler(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
#define QED_CAU_DEF_RX_TIMER_RES 0
#define QED_CAU_DEF_TX_TIMER_RES 0

View File

@ -966,9 +966,47 @@ static void qed_update_pf_params(struct qed_dev *cdev,
}
}
#define QED_PERIODIC_DB_REC_COUNT 100
#define QED_PERIODIC_DB_REC_INTERVAL_MS 100
#define QED_PERIODIC_DB_REC_INTERVAL \
msecs_to_jiffies(QED_PERIODIC_DB_REC_INTERVAL_MS)
#define QED_PERIODIC_DB_REC_WAIT_COUNT 10
#define QED_PERIODIC_DB_REC_WAIT_INTERVAL \
(QED_PERIODIC_DB_REC_INTERVAL_MS / QED_PERIODIC_DB_REC_WAIT_COUNT)
static int qed_slowpath_delayed_work(struct qed_hwfn *hwfn,
enum qed_slowpath_wq_flag wq_flag,
unsigned long delay)
{
if (!hwfn->slowpath_wq_active)
return -EINVAL;
/* Memory barrier for setting atomic bit */
smp_mb__before_atomic();
set_bit(wq_flag, &hwfn->slowpath_task_flags);
smp_mb__after_atomic();
queue_delayed_work(hwfn->slowpath_wq, &hwfn->slowpath_task, delay);
return 0;
}
void qed_periodic_db_rec_start(struct qed_hwfn *p_hwfn)
{
/* Reset periodic Doorbell Recovery counter */
p_hwfn->periodic_db_rec_count = QED_PERIODIC_DB_REC_COUNT;
/* Don't schedule periodic Doorbell Recovery if already scheduled */
if (test_bit(QED_SLOWPATH_PERIODIC_DB_REC,
&p_hwfn->slowpath_task_flags))
return;
qed_slowpath_delayed_work(p_hwfn, QED_SLOWPATH_PERIODIC_DB_REC,
QED_PERIODIC_DB_REC_INTERVAL);
}
static void qed_slowpath_wq_stop(struct qed_dev *cdev)
{
int i;
int i, sleep_count = QED_PERIODIC_DB_REC_WAIT_COUNT;
if (IS_VF(cdev))
return;
@ -977,6 +1015,15 @@ static void qed_slowpath_wq_stop(struct qed_dev *cdev)
if (!cdev->hwfns[i].slowpath_wq)
continue;
/* Stop queuing new delayed works */
cdev->hwfns[i].slowpath_wq_active = false;
/* Wait until the last periodic doorbell recovery is executed */
while (test_bit(QED_SLOWPATH_PERIODIC_DB_REC,
&cdev->hwfns[i].slowpath_task_flags) &&
sleep_count--)
msleep(QED_PERIODIC_DB_REC_WAIT_INTERVAL);
flush_workqueue(cdev->hwfns[i].slowpath_wq);
destroy_workqueue(cdev->hwfns[i].slowpath_wq);
}
@ -989,7 +1036,10 @@ static void qed_slowpath_task(struct work_struct *work)
struct qed_ptt *ptt = qed_ptt_acquire(hwfn);
if (!ptt) {
queue_delayed_work(hwfn->slowpath_wq, &hwfn->slowpath_task, 0);
if (hwfn->slowpath_wq_active)
queue_delayed_work(hwfn->slowpath_wq,
&hwfn->slowpath_task, 0);
return;
}
@ -997,6 +1047,15 @@ static void qed_slowpath_task(struct work_struct *work)
&hwfn->slowpath_task_flags))
qed_mfw_process_tlv_req(hwfn, ptt);
if (test_and_clear_bit(QED_SLOWPATH_PERIODIC_DB_REC,
&hwfn->slowpath_task_flags)) {
qed_db_rec_handler(hwfn, ptt);
if (hwfn->periodic_db_rec_count--)
qed_slowpath_delayed_work(hwfn,
QED_SLOWPATH_PERIODIC_DB_REC,
QED_PERIODIC_DB_REC_INTERVAL);
}
qed_ptt_release(hwfn, ptt);
}
@ -1023,6 +1082,7 @@ static int qed_slowpath_wq_start(struct qed_dev *cdev)
}
INIT_DELAYED_WORK(&hwfn->slowpath_task, qed_slowpath_task);
hwfn->slowpath_wq_active = true;
}
return 0;

View File

@ -1243,6 +1243,56 @@
0x1701534UL
#define TSEM_REG_DBG_FORCE_FRAME \
0x1701538UL
#define DORQ_REG_PF_USAGE_CNT \
0x1009c0UL
#define DORQ_REG_PF_OVFL_STICKY \
0x1009d0UL
#define DORQ_REG_DPM_FORCE_ABORT \
0x1009d8UL
#define DORQ_REG_INT_STS \
0x100180UL
#define DORQ_REG_INT_STS_ADDRESS_ERROR \
(0x1UL << 0)
#define DORQ_REG_INT_STS_WR \
0x100188UL
#define DORQ_REG_DB_DROP_DETAILS_REL \
0x100a28UL
#define DORQ_REG_INT_STS_ADDRESS_ERROR_SHIFT \
0
#define DORQ_REG_INT_STS_DB_DROP \
(0x1UL << 1)
#define DORQ_REG_INT_STS_DB_DROP_SHIFT \
1
#define DORQ_REG_INT_STS_DORQ_FIFO_OVFL_ERR \
(0x1UL << 2)
#define DORQ_REG_INT_STS_DORQ_FIFO_OVFL_ERR_SHIFT \
2
#define DORQ_REG_INT_STS_DORQ_FIFO_AFULL\
(0x1UL << 3)
#define DORQ_REG_INT_STS_DORQ_FIFO_AFULL_SHIFT \
3
#define DORQ_REG_INT_STS_CFC_BYP_VALIDATION_ERR \
(0x1UL << 4)
#define DORQ_REG_INT_STS_CFC_BYP_VALIDATION_ERR_SHIFT \
4
#define DORQ_REG_INT_STS_CFC_LD_RESP_ERR \
(0x1UL << 5)
#define DORQ_REG_INT_STS_CFC_LD_RESP_ERR_SHIFT \
5
#define DORQ_REG_INT_STS_XCM_DONE_CNT_ERR \
(0x1UL << 6)
#define DORQ_REG_INT_STS_XCM_DONE_CNT_ERR_SHIFT \
6
#define DORQ_REG_INT_STS_CFC_LD_REQ_FIFO_OVFL_ERR \
(0x1UL << 7)
#define DORQ_REG_INT_STS_CFC_LD_REQ_FIFO_OVFL_ERR_SHIFT \
7
#define DORQ_REG_INT_STS_CFC_LD_REQ_FIFO_UNDER_ERR \
(0x1UL << 8)
#define DORQ_REG_INT_STS_CFC_LD_REQ_FIFO_UNDER_ERR_SHIFT \
8
#define DORQ_REG_DB_DROP_DETAILS_REASON \
0x100a20UL
#define MSEM_REG_DBG_SELECT \
0x1801528UL
#define MSEM_REG_DBG_DWORD_ENABLE \