From b353ce556d521351eb7daf609e446f3595a6fad6 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Thu, 7 Feb 2019 01:31:23 -0500 Subject: [PATCH] RDMA/bnxt_re: Add 64bit doorbells for 57500 series The new chip series has 64 bit doorbell for notification queues. Thus, both control and data path event queues need new routines to write 64 bit doorbell. Adding the same. There is new doorbell interface between the chip and driver. Changing the chip specific data structure definitions. Additional significant changes are listed below - bnxt_re_net_ring_free/alloc takes a new argument - bnxt_qplib_enable_nq and enable_rcfw uses new doorbell offset for new chip. - DB mapping for NQ and CREQ now maps 8 bytes. - DBR_DBR_* macros renames to DBC_DBC_* - store nq_db_offset in a 32bit data type. - got rid of __iowrite64_copy, used writeq instead. - changed the DB header initialization to simpler scheme. Signed-off-by: Selvin Xavier Signed-off-by: Devesh Sharma Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/Kconfig | 1 + drivers/infiniband/hw/bnxt_re/ib_verbs.c | 4 +- drivers/infiniband/hw/bnxt_re/main.c | 85 ++++++++++------ drivers/infiniband/hw/bnxt_re/qplib_fp.c | 112 ++++++++++----------- drivers/infiniband/hw/bnxt_re/qplib_fp.h | 43 +++++++- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 31 ++++-- drivers/infiniband/hw/bnxt_re/qplib_rcfw.h | 44 +++++++- drivers/infiniband/hw/bnxt_re/qplib_res.h | 13 +++ drivers/infiniband/hw/bnxt_re/roce_hsi.h | 92 +++++++++-------- 9 files changed, 274 insertions(+), 151 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/Kconfig b/drivers/infiniband/hw/bnxt_re/Kconfig index 18f5ed082f41..be2fdad248eb 100644 --- a/drivers/infiniband/hw/bnxt_re/Kconfig +++ b/drivers/infiniband/hw/bnxt_re/Kconfig @@ -1,5 +1,6 @@ config INFINIBAND_BNXT_RE tristate "Broadcom Netxtreme HCA support" + depends on 64BIT depends on ETHERNET && NETDEVICES && PCI && INET && DCB depends on MAY_USE_DEVLINK select NET_VENDOR_BROADCOM diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 9bc637e49faa..3e3e481b2fb8 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -3299,10 +3299,10 @@ int bnxt_re_req_notify_cq(struct ib_cq *ib_cq, spin_lock_irqsave(&cq->cq_lock, flags); /* Trigger on the very next completion */ if (ib_cqn_flags & IB_CQ_NEXT_COMP) - type = DBR_DBR_TYPE_CQ_ARMALL; + type = DBC_DBC_TYPE_CQ_ARMALL; /* Trigger on the next solicited completion */ else if (ib_cqn_flags & IB_CQ_SOLICITED) - type = DBR_DBR_TYPE_CQ_ARMSE; + type = DBC_DBC_TYPE_CQ_ARMSE; /* Poll to see if there are missed events */ if ((ib_cqn_flags & IB_CQ_REPORT_MISSED_EVENTS) && diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 60b5c4876354..0df180850a75 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -369,7 +369,8 @@ static void bnxt_re_fill_fw_msg(struct bnxt_fw_msg *fw_msg, void *msg, fw_msg->timeout = timeout; } -static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id) +static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, + u16 fw_ring_id, int type) { struct bnxt_en_dev *en_dev = rdev->en_dev; struct hwrm_ring_free_input req = {0}; @@ -383,7 +384,7 @@ static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id) memset(&fw_msg, 0, sizeof(fw_msg)); bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_FREE, -1, -1); - req.ring_type = RING_ALLOC_REQ_RING_TYPE_L2_CMPL; + req.ring_type = type; req.ring_id = cpu_to_le16(fw_ring_id); bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); @@ -420,7 +421,7 @@ static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev, dma_addr_t *dma_arr, /* Association of ring index with doorbell index and MSIX number */ req.logical_id = cpu_to_le16(map_index); req.length = cpu_to_le32(ring_mask + 1); - req.ring_type = RING_ALLOC_REQ_RING_TYPE_L2_CMPL; + req.ring_type = type; req.int_mode = RING_ALLOC_REQ_INT_MODE_MSIX; bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); @@ -884,6 +885,12 @@ static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq, return 0; } +static u32 bnxt_re_get_nqdb_offset(struct bnxt_re_dev *rdev, u16 indx) +{ + return bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx) ? + 0x10000 : rdev->msix_entries[indx].db_offset; +} + static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev) { int i; @@ -897,18 +904,18 @@ static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev) static int bnxt_re_init_res(struct bnxt_re_dev *rdev) { - int rc = 0, i; int num_vec_enabled = 0; + int rc = 0, i; + u32 db_offt; bnxt_qplib_init_res(&rdev->qplib_res); for (i = 1; i < rdev->num_msix ; i++) { + db_offt = bnxt_re_get_nqdb_offset(rdev, i); rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nq[i - 1], i - 1, rdev->msix_entries[i].vector, - rdev->msix_entries[i].db_offset, - &bnxt_re_cqn_handler, + db_offt, &bnxt_re_cqn_handler, &bnxt_re_srqn_handler); - if (rc) { dev_err(rdev_to_dev(rdev), "Failed to enable NQ with rc = 0x%x", rc); @@ -920,17 +927,18 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev) fail: for (i = num_vec_enabled; i >= 0; i--) bnxt_qplib_disable_nq(&rdev->nq[i]); - return rc; } static void bnxt_re_free_nq_res(struct bnxt_re_dev *rdev) { + u8 type; int i; for (i = 0; i < rdev->num_msix - 1; i++) { + type = bnxt_qplib_get_ring_type(&rdev->chip_ctx); + bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, type); rdev->nq[i].res = NULL; - bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id); bnxt_qplib_free_nq(&rdev->nq[i]); } } @@ -952,8 +960,11 @@ static void bnxt_re_free_res(struct bnxt_re_dev *rdev) static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) { - int rc = 0, i; int num_vec_created = 0; + dma_addr_t *pg_map; + int rc = 0, i; + int pages; + u8 type; /* Configure and allocate resources for qplib */ rdev->qplib_res.rcfw = &rdev->rcfw; @@ -983,13 +994,13 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) i, rc); goto free_nq; } - rc = bnxt_re_net_ring_alloc - (rdev, rdev->nq[i].hwq.pbl[PBL_LVL_0].pg_map_arr, - rdev->nq[i].hwq.pbl[rdev->nq[i].hwq.level].pg_count, - HWRM_RING_ALLOC_CMPL, - BNXT_QPLIB_NQE_MAX_CNT - 1, - rdev->msix_entries[i + 1].ring_idx, - &rdev->nq[i].ring_id); + type = bnxt_qplib_get_ring_type(&rdev->chip_ctx); + pg_map = rdev->nq[i].hwq.pbl[PBL_LVL_0].pg_map_arr; + pages = rdev->nq[i].hwq.pbl[rdev->nq[i].hwq.level].pg_count; + rc = bnxt_re_net_ring_alloc(rdev, pg_map, pages, type, + BNXT_QPLIB_NQE_MAX_CNT - 1, + rdev->msix_entries[i + 1].ring_idx, + &rdev->nq[i].ring_id); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to allocate NQ fw id with rc = 0x%x", @@ -1002,7 +1013,8 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) return 0; free_nq: for (i = num_vec_created; i >= 0; i--) { - bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id); + type = bnxt_qplib_get_ring_type(&rdev->chip_ctx); + bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, type); bnxt_qplib_free_nq(&rdev->nq[i]); } bnxt_qplib_dealloc_dpi(&rdev->qplib_res, @@ -1256,6 +1268,7 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev) static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev) { + u8 type; int rc; if (test_and_clear_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) { @@ -1279,7 +1292,8 @@ static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev) bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id); bnxt_qplib_free_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx); bnxt_qplib_disable_rcfw_channel(&rdev->rcfw); - bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id); + type = bnxt_qplib_get_ring_type(&rdev->chip_ctx); + bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id, type); bnxt_qplib_free_rcfw_channel(&rdev->rcfw); } if (test_and_clear_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags)) { @@ -1310,9 +1324,12 @@ static void bnxt_re_worker(struct work_struct *work) static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) { - int rc; - + dma_addr_t *pg_map; + u32 db_offt, ridx; + int pages, vid; bool locked; + u8 type; + int rc; /* Acquire rtnl lock through out this function */ rtnl_lock(); @@ -1356,21 +1373,22 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) pr_err("Failed to allocate RCFW Channel: %#x\n", rc); goto fail; } - rc = bnxt_re_net_ring_alloc - (rdev, rdev->rcfw.creq.pbl[PBL_LVL_0].pg_map_arr, - rdev->rcfw.creq.pbl[rdev->rcfw.creq.level].pg_count, - HWRM_RING_ALLOC_CMPL, BNXT_QPLIB_CREQE_MAX_CNT - 1, - rdev->msix_entries[BNXT_RE_AEQ_IDX].ring_idx, - &rdev->rcfw.creq_ring_id); + type = bnxt_qplib_get_ring_type(&rdev->chip_ctx); + pg_map = rdev->rcfw.creq.pbl[PBL_LVL_0].pg_map_arr; + pages = rdev->rcfw.creq.pbl[rdev->rcfw.creq.level].pg_count; + ridx = rdev->msix_entries[BNXT_RE_AEQ_IDX].ring_idx; + rc = bnxt_re_net_ring_alloc(rdev, pg_map, pages, type, + BNXT_QPLIB_CREQE_MAX_CNT - 1, + ridx, &rdev->rcfw.creq_ring_id); if (rc) { pr_err("Failed to allocate CREQ: %#x\n", rc); goto free_rcfw; } - rc = bnxt_qplib_enable_rcfw_channel - (rdev->en_dev->pdev, &rdev->rcfw, - rdev->msix_entries[BNXT_RE_AEQ_IDX].vector, - rdev->msix_entries[BNXT_RE_AEQ_IDX].db_offset, - rdev->is_virtfn, &bnxt_re_aeq_handler); + db_offt = bnxt_re_get_nqdb_offset(rdev, BNXT_RE_AEQ_IDX); + vid = rdev->msix_entries[BNXT_RE_AEQ_IDX].vector; + rc = bnxt_qplib_enable_rcfw_channel(rdev->en_dev->pdev, &rdev->rcfw, + vid, db_offt, rdev->is_virtfn, + &bnxt_re_aeq_handler); if (rc) { pr_err("Failed to enable RCFW channel: %#x\n", rc); goto free_ring; @@ -1454,7 +1472,8 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) disable_rcfw: bnxt_qplib_disable_rcfw_channel(&rdev->rcfw); free_ring: - bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id); + type = bnxt_qplib_get_ring_type(&rdev->chip_ctx); + bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id, type); free_rcfw: bnxt_qplib_free_rcfw_channel(&rdev->rcfw); fail: diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index b98b054148cd..01ea3d010017 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -244,6 +244,7 @@ static void bnxt_qplib_service_nq(unsigned long data) u16 type; int budget = nq->budget; uintptr_t q_handle; + bool gen_p5 = bnxt_qplib_is_chip_gen_p5(nq->res->cctx); /* Service the NQ until empty */ raw_cons = hwq->cons; @@ -290,7 +291,7 @@ static void bnxt_qplib_service_nq(unsigned long data) q_handle |= (u64)le32_to_cpu(nqsrqe->srq_handle_high) << 32; bnxt_qplib_arm_srq((struct bnxt_qplib_srq *)q_handle, - DBR_DBR_TYPE_SRQ_ARMENA); + DBC_DBC_TYPE_SRQ_ARMENA); if (!nq->srqn_handler(nq, (struct bnxt_qplib_srq *)q_handle, nqsrqe->event)) @@ -312,7 +313,9 @@ static void bnxt_qplib_service_nq(unsigned long data) } if (hwq->cons != raw_cons) { hwq->cons = raw_cons; - NQ_DB_REARM(nq->bar_reg_iomem, hwq->cons, hwq->max_elements); + bnxt_qplib_ring_nq_db_rearm(nq->bar_reg_iomem, hwq->cons, + hwq->max_elements, nq->ring_id, + gen_p5); } } @@ -336,9 +339,11 @@ static irqreturn_t bnxt_qplib_nq_irq(int irq, void *dev_instance) void bnxt_qplib_nq_stop_irq(struct bnxt_qplib_nq *nq, bool kill) { + bool gen_p5 = bnxt_qplib_is_chip_gen_p5(nq->res->cctx); tasklet_disable(&nq->worker); /* Mask h/w interrupt */ - NQ_DB(nq->bar_reg_iomem, nq->hwq.cons, nq->hwq.max_elements); + bnxt_qplib_ring_nq_db(nq->bar_reg_iomem, nq->hwq.cons, + nq->hwq.max_elements, nq->ring_id, gen_p5); /* Sync with last running IRQ handler */ synchronize_irq(nq->vector); if (kill) @@ -373,6 +378,7 @@ void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq) int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx, int msix_vector, bool need_init) { + bool gen_p5 = bnxt_qplib_is_chip_gen_p5(nq->res->cctx); int rc; if (nq->requested) @@ -399,7 +405,8 @@ int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx, nq->vector, nq_indx); } nq->requested = true; - NQ_DB_REARM(nq->bar_reg_iomem, nq->hwq.cons, nq->hwq.max_elements); + bnxt_qplib_ring_nq_db_rearm(nq->bar_reg_iomem, nq->hwq.cons, + nq->hwq.max_elements, nq->ring_id, gen_p5); return rc; } @@ -433,7 +440,8 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq, rc = -ENOMEM; goto fail; } - nq->bar_reg_iomem = ioremap_nocache(nq_base + nq->bar_reg_off, 4); + /* Unconditionally map 8 bytes to support 57500 series */ + nq->bar_reg_iomem = ioremap_nocache(nq_base + nq->bar_reg_off, 8); if (!nq->bar_reg_iomem) { rc = -ENOMEM; goto fail; @@ -462,15 +470,17 @@ void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq) int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq) { + u8 hwq_type; + nq->pdev = pdev; if (!nq->hwq.max_elements || nq->hwq.max_elements > BNXT_QPLIB_NQE_MAX_CNT) nq->hwq.max_elements = BNXT_QPLIB_NQE_MAX_CNT; - + hwq_type = bnxt_qplib_get_hwq_type(nq->res); if (bnxt_qplib_alloc_init_hwq(nq->pdev, &nq->hwq, NULL, 0, &nq->hwq.max_elements, BNXT_QPLIB_MAX_NQE_ENTRY_SIZE, 0, - PAGE_SIZE, HWQ_TYPE_L2_CMPL)) + PAGE_SIZE, hwq_type)) return -ENOMEM; nq->budget = 8; @@ -481,21 +491,19 @@ int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq) static void bnxt_qplib_arm_srq(struct bnxt_qplib_srq *srq, u32 arm_type) { struct bnxt_qplib_hwq *srq_hwq = &srq->hwq; - struct dbr_dbr db_msg = { 0 }; void __iomem *db; - u32 sw_prod = 0; + u32 sw_prod; + u64 val = 0; /* Ring DB */ - sw_prod = (arm_type == DBR_DBR_TYPE_SRQ_ARM) ? srq->threshold : - HWQ_CMP(srq_hwq->prod, srq_hwq); - db_msg.index = cpu_to_le32((sw_prod << DBR_DBR_INDEX_SFT) & - DBR_DBR_INDEX_MASK); - db_msg.type_xid = cpu_to_le32(((srq->id << DBR_DBR_XID_SFT) & - DBR_DBR_XID_MASK) | arm_type); - db = (arm_type == DBR_DBR_TYPE_SRQ_ARMENA) ? - srq->dbr_base : srq->dpi->dbr; - wmb(); /* barrier before db ring */ - __iowrite64_copy(db, &db_msg, sizeof(db_msg) / sizeof(u64)); + sw_prod = (arm_type == DBC_DBC_TYPE_SRQ_ARM) ? + srq->threshold : HWQ_CMP(srq_hwq->prod, srq_hwq); + db = (arm_type == DBC_DBC_TYPE_SRQ_ARMENA) ? srq->dbr_base : + srq->dpi->dbr; + val = ((srq->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) | arm_type; + val <<= 32; + val |= (sw_prod << DBC_DBC_INDEX_SFT) & DBC_DBC_INDEX_MASK; + writeq(val, db); } int bnxt_qplib_destroy_srq(struct bnxt_qplib_res *res, @@ -590,7 +598,7 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, srq->id = le32_to_cpu(resp.xid); srq->dbr_base = res->dpi_tbl.dbr_bar_reg_iomem; if (srq->threshold) - bnxt_qplib_arm_srq(srq, DBR_DBR_TYPE_SRQ_ARMENA); + bnxt_qplib_arm_srq(srq, DBC_DBC_TYPE_SRQ_ARMENA); srq->arm_req = false; return 0; @@ -614,7 +622,7 @@ int bnxt_qplib_modify_srq(struct bnxt_qplib_res *res, srq_hwq->max_elements - sw_cons + sw_prod; if (count > srq->threshold) { srq->arm_req = false; - bnxt_qplib_arm_srq(srq, DBR_DBR_TYPE_SRQ_ARM); + bnxt_qplib_arm_srq(srq, DBC_DBC_TYPE_SRQ_ARM); } else { /* Deferred arming */ srq->arm_req = true; @@ -702,10 +710,10 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq, srq_hwq->max_elements - sw_cons + sw_prod; spin_unlock(&srq_hwq->lock); /* Ring DB */ - bnxt_qplib_arm_srq(srq, DBR_DBR_TYPE_SRQ); + bnxt_qplib_arm_srq(srq, DBC_DBC_TYPE_SRQ); if (srq->arm_req == true && count > srq->threshold) { srq->arm_req = false; - bnxt_qplib_arm_srq(srq, DBR_DBR_TYPE_SRQ_ARM); + bnxt_qplib_arm_srq(srq, DBC_DBC_TYPE_SRQ_ARM); } done: return rc; @@ -1494,19 +1502,16 @@ void *bnxt_qplib_get_qp1_rq_buf(struct bnxt_qplib_qp *qp, void bnxt_qplib_post_send_db(struct bnxt_qplib_qp *qp) { struct bnxt_qplib_q *sq = &qp->sq; - struct dbr_dbr db_msg = { 0 }; u32 sw_prod; + u64 val = 0; + val = (((qp->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) | + DBC_DBC_TYPE_SQ); + val <<= 32; sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq); - - db_msg.index = cpu_to_le32((sw_prod << DBR_DBR_INDEX_SFT) & - DBR_DBR_INDEX_MASK); - db_msg.type_xid = - cpu_to_le32(((qp->id << DBR_DBR_XID_SFT) & DBR_DBR_XID_MASK) | - DBR_DBR_TYPE_SQ); + val |= (sw_prod << DBC_DBC_INDEX_SFT) & DBC_DBC_INDEX_MASK; /* Flush all the WQE writes to HW */ - wmb(); - __iowrite64_copy(qp->dpi->dbr, &db_msg, sizeof(db_msg) / sizeof(u64)); + writeq(val, qp->dpi->dbr); } int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, @@ -1785,19 +1790,16 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, void bnxt_qplib_post_recv_db(struct bnxt_qplib_qp *qp) { struct bnxt_qplib_q *rq = &qp->rq; - struct dbr_dbr db_msg = { 0 }; u32 sw_prod; + u64 val = 0; + val = (((qp->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) | + DBC_DBC_TYPE_RQ); + val <<= 32; sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq); - db_msg.index = cpu_to_le32((sw_prod << DBR_DBR_INDEX_SFT) & - DBR_DBR_INDEX_MASK); - db_msg.type_xid = - cpu_to_le32(((qp->id << DBR_DBR_XID_SFT) & DBR_DBR_XID_MASK) | - DBR_DBR_TYPE_RQ); - + val |= (sw_prod << DBC_DBC_INDEX_SFT) & DBC_DBC_INDEX_MASK; /* Flush the writes to HW Rx WQE before the ringing Rx DB */ - wmb(); - __iowrite64_copy(qp->dpi->dbr, &db_msg, sizeof(db_msg) / sizeof(u64)); + writeq(val, qp->dpi->dbr); } int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp, @@ -1881,32 +1883,28 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp, /* Spinlock must be held */ static void bnxt_qplib_arm_cq_enable(struct bnxt_qplib_cq *cq) { - struct dbr_dbr db_msg = { 0 }; + u64 val = 0; - db_msg.type_xid = - cpu_to_le32(((cq->id << DBR_DBR_XID_SFT) & DBR_DBR_XID_MASK) | - DBR_DBR_TYPE_CQ_ARMENA); + val = ((cq->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) | + DBC_DBC_TYPE_CQ_ARMENA; + val <<= 32; /* Flush memory writes before enabling the CQ */ - wmb(); - __iowrite64_copy(cq->dbr_base, &db_msg, sizeof(db_msg) / sizeof(u64)); + writeq(val, cq->dbr_base); } static void bnxt_qplib_arm_cq(struct bnxt_qplib_cq *cq, u32 arm_type) { struct bnxt_qplib_hwq *cq_hwq = &cq->hwq; - struct dbr_dbr db_msg = { 0 }; u32 sw_cons; + u64 val = 0; /* Ring DB */ + val = ((cq->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) | arm_type; + val <<= 32; sw_cons = HWQ_CMP(cq_hwq->cons, cq_hwq); - db_msg.index = cpu_to_le32((sw_cons << DBR_DBR_INDEX_SFT) & - DBR_DBR_INDEX_MASK); - db_msg.type_xid = - cpu_to_le32(((cq->id << DBR_DBR_XID_SFT) & DBR_DBR_XID_MASK) | - arm_type); + val |= (sw_cons << DBC_DBC_INDEX_SFT) & DBC_DBC_INDEX_MASK; /* flush memory writes before arming the CQ */ - wmb(); - __iowrite64_copy(cq->dpi->dbr, &db_msg, sizeof(db_msg) / sizeof(u64)); + writeq(val, cq->dpi->dbr); } int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) @@ -2125,7 +2123,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, sq->send_phantom = true; /* TODO: Only ARM if the previous SQE is ARMALL */ - bnxt_qplib_arm_cq(cq, DBR_DBR_TYPE_CQ_ARMALL); + bnxt_qplib_arm_cq(cq, DBC_DBC_TYPE_CQ_ARMALL); rc = -EAGAIN; goto out; @@ -2794,7 +2792,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, } if (cq->hwq.cons != raw_cons) { cq->hwq.cons = raw_cons; - bnxt_qplib_arm_cq(cq, DBR_DBR_TYPE_CQ); + bnxt_qplib_arm_cq(cq, DBC_DBC_TYPE_CQ); } exit: return num_cqes - budget; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index f317bae36dd0..a00f1b019da0 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -432,10 +432,43 @@ struct bnxt_qplib_cq { #define NQ_DB_CP_FLAGS (NQ_DB_KEY_CP | \ NQ_DB_IDX_VALID | \ NQ_DB_IRQ_DIS) -#define NQ_DB_REARM(db, raw_cons, cp_bit) \ - writel(NQ_DB_CP_FLAGS_REARM | ((raw_cons) & ((cp_bit) - 1)), db) -#define NQ_DB(db, raw_cons, cp_bit) \ - writel(NQ_DB_CP_FLAGS | ((raw_cons) & ((cp_bit) - 1)), db) + +static inline void bnxt_qplib_ring_nq_db64(void __iomem *db, u32 index, + u32 xid, bool arm) +{ + u64 val; + + val = xid & DBC_DBC_XID_MASK; + val |= DBC_DBC_PATH_ROCE; + val |= arm ? DBC_DBC_TYPE_NQ_ARM : DBC_DBC_TYPE_NQ; + val <<= 32; + val |= index & DBC_DBC_INDEX_MASK; + writeq(val, db); +} + +static inline void bnxt_qplib_ring_nq_db_rearm(void __iomem *db, u32 raw_cons, + u32 max_elements, u32 xid, + bool gen_p5) +{ + u32 index = raw_cons & (max_elements - 1); + + if (gen_p5) + bnxt_qplib_ring_nq_db64(db, index, xid, true); + else + writel(NQ_DB_CP_FLAGS_REARM | (index & DBC_DBC32_XID_MASK), db); +} + +static inline void bnxt_qplib_ring_nq_db(void __iomem *db, u32 raw_cons, + u32 max_elements, u32 xid, + bool gen_p5) +{ + u32 index = raw_cons & (max_elements - 1); + + if (gen_p5) + bnxt_qplib_ring_nq_db64(db, index, xid, false); + else + writel(NQ_DB_CP_FLAGS | (index & DBC_DBC32_XID_MASK), db); +} struct bnxt_qplib_nq { struct pci_dev *pdev; @@ -449,7 +482,7 @@ struct bnxt_qplib_nq { struct bnxt_qplib_hwq hwq; u16 bar_reg; - u16 bar_reg_off; + u32 bar_reg_off; u16 ring_id; void __iomem *bar_reg_iomem; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 65e17de220f6..9c000245a6de 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -359,11 +359,12 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, static void bnxt_qplib_service_creq(unsigned long data) { struct bnxt_qplib_rcfw *rcfw = (struct bnxt_qplib_rcfw *)data; + bool gen_p5 = bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx); struct bnxt_qplib_hwq *creq = &rcfw->creq; + u32 type, budget = CREQ_ENTRY_POLL_BUDGET; struct creq_base *creqe, **creq_ptr; u32 sw_cons, raw_cons; unsigned long flags; - u32 type, budget = CREQ_ENTRY_POLL_BUDGET; /* Service the CREQ until budget is over */ spin_lock_irqsave(&creq->lock, flags); @@ -407,8 +408,9 @@ static void bnxt_qplib_service_creq(unsigned long data) if (creq->cons != raw_cons) { creq->cons = raw_cons; - CREQ_DB_REARM(rcfw->creq_bar_reg_iomem, raw_cons, - creq->max_elements); + bnxt_qplib_ring_creq_db_rearm(rcfw->creq_bar_reg_iomem, + raw_cons, creq->max_elements, + rcfw->creq_ring_id, gen_p5); } spin_unlock_irqrestore(&creq->lock, flags); } @@ -560,12 +562,15 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev, struct bnxt_qplib_ctx *ctx, int qp_tbl_sz) { + u8 hwq_type; + rcfw->pdev = pdev; rcfw->creq.max_elements = BNXT_QPLIB_CREQE_MAX_CNT; + hwq_type = bnxt_qplib_get_hwq_type(rcfw->res); if (bnxt_qplib_alloc_init_hwq(rcfw->pdev, &rcfw->creq, NULL, 0, &rcfw->creq.max_elements, - BNXT_QPLIB_CREQE_UNITS, 0, PAGE_SIZE, - HWQ_TYPE_L2_CMPL)) { + BNXT_QPLIB_CREQE_UNITS, + 0, PAGE_SIZE, hwq_type)) { dev_err(&rcfw->pdev->dev, "HW channel CREQ allocation failed\n"); goto fail; @@ -607,10 +612,13 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev, void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill) { + bool gen_p5 = bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx); + tasklet_disable(&rcfw->worker); /* Mask h/w interrupts */ - CREQ_DB(rcfw->creq_bar_reg_iomem, rcfw->creq.cons, - rcfw->creq.max_elements); + bnxt_qplib_ring_creq_db(rcfw->creq_bar_reg_iomem, rcfw->creq.cons, + rcfw->creq.max_elements, rcfw->creq_ring_id, + gen_p5); /* Sync with last running IRQ-handler */ synchronize_irq(rcfw->vector); if (kill) @@ -647,6 +655,7 @@ void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw) int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector, bool need_init) { + bool gen_p5 = bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx); int rc; if (rcfw->requested) @@ -663,8 +672,9 @@ int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector, if (rc) return rc; rcfw->requested = true; - CREQ_DB_REARM(rcfw->creq_bar_reg_iomem, rcfw->creq.cons, - rcfw->creq.max_elements); + bnxt_qplib_ring_creq_db_rearm(rcfw->creq_bar_reg_iomem, + rcfw->creq.cons, rcfw->creq.max_elements, + rcfw->creq_ring_id, gen_p5); return 0; } @@ -717,8 +727,9 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev, dev_err(&rcfw->pdev->dev, "CREQ BAR region %d resc start is 0!\n", rcfw->creq_bar_reg); + /* Unconditionally map 8 bytes to support 57500 series */ rcfw->creq_bar_reg_iomem = ioremap_nocache(res_base + cp_bar_reg_off, - 4); + 8); if (!rcfw->creq_bar_reg_iomem) { dev_err(&rcfw->pdev->dev, "CREQ BAR region %d mapping failed\n", rcfw->creq_bar_reg); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index 8011d674391e..2138533bb642 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -157,10 +157,46 @@ static inline u32 get_creq_idx(u32 val) #define CREQ_DB_CP_FLAGS (CREQ_DB_KEY_CP | \ CREQ_DB_IDX_VALID | \ CREQ_DB_IRQ_DIS) -#define CREQ_DB_REARM(db, raw_cons, cp_bit) \ - writel(CREQ_DB_CP_FLAGS_REARM | ((raw_cons) & ((cp_bit) - 1)), db) -#define CREQ_DB(db, raw_cons, cp_bit) \ - writel(CREQ_DB_CP_FLAGS | ((raw_cons) & ((cp_bit) - 1)), db) + +static inline void bnxt_qplib_ring_creq_db64(void __iomem *db, u32 index, + u32 xid, bool arm) +{ + u64 val = 0; + + val = xid & DBC_DBC_XID_MASK; + val |= DBC_DBC_PATH_ROCE; + val |= arm ? DBC_DBC_TYPE_NQ_ARM : DBC_DBC_TYPE_NQ; + val <<= 32; + val |= index & DBC_DBC_INDEX_MASK; + + writeq(val, db); +} + +static inline void bnxt_qplib_ring_creq_db_rearm(void __iomem *db, u32 raw_cons, + u32 max_elements, u32 xid, + bool gen_p5) +{ + u32 index = raw_cons & (max_elements - 1); + + if (gen_p5) + bnxt_qplib_ring_creq_db64(db, index, xid, true); + else + writel(CREQ_DB_CP_FLAGS_REARM | (index & DBC_DBC32_XID_MASK), + db); +} + +static inline void bnxt_qplib_ring_creq_db(void __iomem *db, u32 raw_cons, + u32 max_elements, u32 xid, + bool gen_p5) +{ + u32 index = raw_cons & (max_elements - 1); + + if (gen_p5) + bnxt_qplib_ring_creq_db64(db, index, xid, true); + else + writel(CREQ_DB_CP_FLAGS | (index & DBC_DBC32_XID_MASK), + db); +} #define CREQ_ENTRY_POLL_BUDGET 0x100 diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 6a0c45827053..35d862b44908 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -206,6 +206,19 @@ static inline bool bnxt_qplib_is_chip_gen_p5(struct bnxt_qplib_chip_ctx *cctx) return (cctx->chip_num == CHIP_NUM_57500); } +static inline u8 bnxt_qplib_get_hwq_type(struct bnxt_qplib_res *res) +{ + return bnxt_qplib_is_chip_gen_p5(res->cctx) ? + HWQ_TYPE_QUEUE : HWQ_TYPE_L2_CMPL; +} + +static inline u8 bnxt_qplib_get_ring_type(struct bnxt_qplib_chip_ctx *cctx) +{ + return bnxt_qplib_is_chip_gen_p5(cctx) ? + RING_ALLOC_REQ_RING_TYPE_NQ : + RING_ALLOC_REQ_RING_TYPE_ROCE_CMPL; +} + #define to_bnxt_qplib(ptr, type, member) \ container_of(ptr, type, member) diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h index 8a9ead419ac2..73e0982fc48f 100644 --- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h +++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h @@ -49,11 +49,11 @@ struct cmpl_doorbell { #define CMPL_DOORBELL_IDX_SFT 0 #define CMPL_DOORBELL_RESERVED_MASK 0x3000000UL #define CMPL_DOORBELL_RESERVED_SFT 24 - #define CMPL_DOORBELL_IDX_VALID 0x4000000UL + #define CMPL_DOORBELL_IDX_VALID 0x4000000UL #define CMPL_DOORBELL_MASK 0x8000000UL #define CMPL_DOORBELL_KEY_MASK 0xf0000000UL #define CMPL_DOORBELL_KEY_SFT 28 - #define CMPL_DOORBELL_KEY_CMPL (0x2UL << 28) + #define CMPL_DOORBELL_KEY_CMPL (0x2UL << 28) }; /* Status Door Bell Format (4 bytes) */ @@ -71,46 +71,56 @@ struct status_doorbell { /* RoCE Host Structures */ /* Doorbell Structures */ -/* 64b Doorbell Format (8 bytes) */ -struct dbr_dbr { - __le32 index; - #define DBR_DBR_INDEX_MASK 0xfffffUL - #define DBR_DBR_INDEX_SFT 0 - #define DBR_DBR_RESERVED12_MASK 0xfff00000UL - #define DBR_DBR_RESERVED12_SFT 20 - __le32 type_xid; - #define DBR_DBR_XID_MASK 0xfffffUL - #define DBR_DBR_XID_SFT 0 - #define DBR_DBR_RESERVED8_MASK 0xff00000UL - #define DBR_DBR_RESERVED8_SFT 20 - #define DBR_DBR_TYPE_MASK 0xf0000000UL - #define DBR_DBR_TYPE_SFT 28 - #define DBR_DBR_TYPE_SQ (0x0UL << 28) - #define DBR_DBR_TYPE_RQ (0x1UL << 28) - #define DBR_DBR_TYPE_SRQ (0x2UL << 28) - #define DBR_DBR_TYPE_SRQ_ARM (0x3UL << 28) - #define DBR_DBR_TYPE_CQ (0x4UL << 28) - #define DBR_DBR_TYPE_CQ_ARMSE (0x5UL << 28) - #define DBR_DBR_TYPE_CQ_ARMALL (0x6UL << 28) - #define DBR_DBR_TYPE_CQ_ARMENA (0x7UL << 28) - #define DBR_DBR_TYPE_SRQ_ARMENA (0x8UL << 28) - #define DBR_DBR_TYPE_CQ_CUTOFF_ACK (0x9UL << 28) - #define DBR_DBR_TYPE_NULL (0xfUL << 28) +/* dbc_dbc (size:64b/8B) */ +struct dbc_dbc { + __le32 index; + #define DBC_DBC_INDEX_MASK 0xffffffUL + #define DBC_DBC_INDEX_SFT 0 + __le32 type_path_xid; + #define DBC_DBC_XID_MASK 0xfffffUL + #define DBC_DBC_XID_SFT 0 + #define DBC_DBC_PATH_MASK 0x3000000UL + #define DBC_DBC_PATH_SFT 24 + #define DBC_DBC_PATH_ROCE (0x0UL << 24) + #define DBC_DBC_PATH_L2 (0x1UL << 24) + #define DBC_DBC_PATH_ENGINE (0x2UL << 24) + #define DBC_DBC_PATH_LAST DBC_DBC_PATH_ENGINE + #define DBC_DBC_DEBUG_TRACE 0x8000000UL + #define DBC_DBC_TYPE_MASK 0xf0000000UL + #define DBC_DBC_TYPE_SFT 28 + #define DBC_DBC_TYPE_SQ (0x0UL << 28) + #define DBC_DBC_TYPE_RQ (0x1UL << 28) + #define DBC_DBC_TYPE_SRQ (0x2UL << 28) + #define DBC_DBC_TYPE_SRQ_ARM (0x3UL << 28) + #define DBC_DBC_TYPE_CQ (0x4UL << 28) + #define DBC_DBC_TYPE_CQ_ARMSE (0x5UL << 28) + #define DBC_DBC_TYPE_CQ_ARMALL (0x6UL << 28) + #define DBC_DBC_TYPE_CQ_ARMENA (0x7UL << 28) + #define DBC_DBC_TYPE_SRQ_ARMENA (0x8UL << 28) + #define DBC_DBC_TYPE_CQ_CUTOFF_ACK (0x9UL << 28) + #define DBC_DBC_TYPE_NQ (0xaUL << 28) + #define DBC_DBC_TYPE_NQ_ARM (0xbUL << 28) + #define DBC_DBC_TYPE_NULL (0xfUL << 28) + #define DBC_DBC_TYPE_LAST DBC_DBC_TYPE_NULL }; -/* 32b Doorbell Format (4 bytes) */ -struct dbr_dbr32 { - __le32 type_abs_incr_xid; - #define DBR_DBR32_XID_MASK 0xfffffUL - #define DBR_DBR32_XID_SFT 0 - #define DBR_DBR32_RESERVED4_MASK 0xf00000UL - #define DBR_DBR32_RESERVED4_SFT 20 - #define DBR_DBR32_INCR_MASK 0xf000000UL - #define DBR_DBR32_INCR_SFT 24 - #define DBR_DBR32_ABS 0x10000000UL - #define DBR_DBR32_TYPE_MASK 0xe0000000UL - #define DBR_DBR32_TYPE_SFT 29 - #define DBR_DBR32_TYPE_SQ (0x0UL << 29) +/* dbc_dbc32 (size:32b/4B) */ +struct dbc_dbc32 { + __le32 type_abs_incr_xid; + #define DBC_DBC32_XID_MASK 0xfffffUL + #define DBC_DBC32_XID_SFT 0 + #define DBC_DBC32_PATH_MASK 0xc00000UL + #define DBC_DBC32_PATH_SFT 22 + #define DBC_DBC32_PATH_ROCE (0x0UL << 22) + #define DBC_DBC32_PATH_L2 (0x1UL << 22) + #define DBC_DBC32_PATH_LAST DBC_DBC32_PATH_L2 + #define DBC_DBC32_INCR_MASK 0xf000000UL + #define DBC_DBC32_INCR_SFT 24 + #define DBC_DBC32_ABS 0x10000000UL + #define DBC_DBC32_TYPE_MASK 0xe0000000UL + #define DBC_DBC32_TYPE_SFT 29 + #define DBC_DBC32_TYPE_SQ (0x0UL << 29) + #define DBC_DBC32_TYPE_LAST DBC_DBC32_TYPE_SQ }; /* SQ WQE Structures */ @@ -2719,6 +2729,8 @@ struct creq_query_func_resp_sb { __le16 max_srq; __le32 max_gid; __le32 tqm_alloc_reqs[12]; + __le32 max_dpi; + __le32 reserved_32; }; /* Set resources command response (16 bytes) */