From 7c3d2bc4b9c283c3ad285dee9bd00c7ce513ad49 Mon Sep 17 00:00:00 2001 From: "Kalderon, Michal" Date: Wed, 26 Jul 2017 14:41:50 +0300 Subject: [PATCH 001/296] RDMA/qedr: Add additional maintainer to MAINTAINERS file Signed-off-by: Michal Kalderon Signed-off-by: Ram Amrani Signed-off-by: Ariel Elior Signed-off-by: Doug Ledford --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 0e967b3ca1c6..1765fb3b2844 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10850,6 +10850,7 @@ F: drivers/net/ethernet/qlogic/qede/ QLOGIC QL4xxx RDMA DRIVER M: Ram Amrani +M: Michal Kalderon M: Ariel Elior L: linux-rdma@vger.kernel.org S: Supported From 99d195cc089e689e54bad13c9bdf88e90d158892 Mon Sep 17 00:00:00 2001 From: "Kalderon, Michal" Date: Wed, 26 Jul 2017 14:41:51 +0300 Subject: [PATCH 002/296] RDMA/qedr: Rename the qedr_cm file as a preparation for iWARP support The main differences between iWARP and RoCE lay in the communication management functions. These will be placed in separate files. Signed-off-by: Michal Kalderon Signed-off-by: Ram Amrani Signed-off-by: Ariel Elior Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/Makefile | 2 +- drivers/infiniband/hw/qedr/{qedr_cm.c => qedr_roce_cm.c} | 2 +- drivers/infiniband/hw/qedr/{qedr_cm.h => qedr_roce_cm.h} | 0 drivers/infiniband/hw/qedr/verbs.c | 2 +- 4 files changed, 3 insertions(+), 3 deletions(-) rename drivers/infiniband/hw/qedr/{qedr_cm.c => qedr_roce_cm.c} (99%) rename drivers/infiniband/hw/qedr/{qedr_cm.h => qedr_roce_cm.h} (100%) diff --git a/drivers/infiniband/hw/qedr/Makefile b/drivers/infiniband/hw/qedr/Makefile index ba7067c77f2f..331a3612f8b0 100644 --- a/drivers/infiniband/hw/qedr/Makefile +++ b/drivers/infiniband/hw/qedr/Makefile @@ -1,3 +1,3 @@ obj-$(CONFIG_INFINIBAND_QEDR) := qedr.o -qedr-y := main.o verbs.o qedr_cm.o +qedr-y := main.o verbs.o qedr_roce_cm.o diff --git a/drivers/infiniband/hw/qedr/qedr_cm.c b/drivers/infiniband/hw/qedr/qedr_roce_cm.c similarity index 99% rename from drivers/infiniband/hw/qedr/qedr_cm.c rename to drivers/infiniband/hw/qedr/qedr_roce_cm.c index 4689e802b332..c3c249ba62b8 100644 --- a/drivers/infiniband/hw/qedr/qedr_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.c @@ -48,7 +48,7 @@ #include "qedr.h" #include "verbs.h" #include -#include "qedr_cm.h" +#include "qedr_roce_cm.h" void qedr_inc_sw_gsi_cons(struct qedr_qp_hwq_info *info) { diff --git a/drivers/infiniband/hw/qedr/qedr_cm.h b/drivers/infiniband/hw/qedr/qedr_roce_cm.h similarity index 100% rename from drivers/infiniband/hw/qedr/qedr_cm.h rename to drivers/infiniband/hw/qedr/qedr_roce_cm.h diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 2ae71b8f1ba8..9a280d1971fc 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -49,7 +49,7 @@ #include "qedr.h" #include "verbs.h" #include -#include "qedr_cm.h" +#include "qedr_roce_cm.h" #define DB_ADDR_SHIFT(addr) ((addr) << DB_PWM_ADDR_OFFSET_SHIFT) From e6a38c54faf38498170e227c82ea25cb8bc1ae71 Mon Sep 17 00:00:00 2001 From: "Kalderon, Michal" Date: Wed, 26 Jul 2017 14:41:52 +0300 Subject: [PATCH 003/296] RDMA/qedr: Add support for registering an iWARP device There are slight differences between iWARP and RoCE in the ibdev registration. This patch handles the changes. Signed-off-by: Michal Kalderon Signed-off-by: Ram Amrani Signed-off-by: Ariel Elior Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/main.c | 84 +++++++++++++++++++++++++++--- drivers/infiniband/hw/qedr/qedr.h | 3 ++ drivers/infiniband/hw/qedr/verbs.c | 34 +++++------- drivers/infiniband/hw/qedr/verbs.h | 2 + 4 files changed, 97 insertions(+), 26 deletions(-) diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index b5851fd67d4f..ad227b5c0c43 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -33,6 +33,8 @@ #include #include #include +#include +#include #include #include #include @@ -94,8 +96,75 @@ static struct net_device *qedr_get_netdev(struct ib_device *dev, u8 port_num) return qdev->ndev; } +int qedr_roce_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = qedr_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE | + RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; + immutable->max_mad_size = IB_MGMT_MAD_SIZE; + + return 0; +} + +int qedr_iw_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = qedr_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = 1; + immutable->gid_tbl_len = 1; + immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; + immutable->max_mad_size = 0; + + return 0; +} + +int qedr_iw_register_device(struct qedr_dev *dev) +{ + dev->ibdev.node_type = RDMA_NODE_RNIC; + dev->ibdev.query_gid = qedr_iw_query_gid; + + dev->ibdev.get_port_immutable = qedr_iw_port_immutable; + + dev->ibdev.iwcm = kzalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL); + if (!dev->ibdev.iwcm) + return -ENOMEM; + + memcpy(dev->ibdev.iwcm->ifname, + dev->ndev->name, sizeof(dev->ibdev.iwcm->ifname)); + + return 0; +} + +void qedr_roce_register_device(struct qedr_dev *dev) +{ + dev->ibdev.node_type = RDMA_NODE_IB_CA; + dev->ibdev.query_gid = qedr_query_gid; + + dev->ibdev.add_gid = qedr_add_gid; + dev->ibdev.del_gid = qedr_del_gid; + + dev->ibdev.get_port_immutable = qedr_roce_port_immutable; +} + static int qedr_register_device(struct qedr_dev *dev) { + int rc; + strlcpy(dev->ibdev.name, "qedr%d", IB_DEVICE_NAME_MAX); dev->ibdev.node_guid = dev->attr.node_guid; @@ -123,18 +192,21 @@ static int qedr_register_device(struct qedr_dev *dev) QEDR_UVERBS(POST_SEND) | QEDR_UVERBS(POST_RECV); + if (IS_IWARP(dev)) { + rc = qedr_iw_register_device(dev); + if (rc) + return rc; + } else { + qedr_roce_register_device(dev); + } + dev->ibdev.phys_port_cnt = 1; dev->ibdev.num_comp_vectors = dev->num_cnq; - dev->ibdev.node_type = RDMA_NODE_IB_CA; dev->ibdev.query_device = qedr_query_device; dev->ibdev.query_port = qedr_query_port; dev->ibdev.modify_port = qedr_modify_port; - dev->ibdev.query_gid = qedr_query_gid; - dev->ibdev.add_gid = qedr_add_gid; - dev->ibdev.del_gid = qedr_del_gid; - dev->ibdev.alloc_ucontext = qedr_alloc_ucontext; dev->ibdev.dealloc_ucontext = qedr_dealloc_ucontext; dev->ibdev.mmap = qedr_mmap; @@ -168,7 +240,7 @@ static int qedr_register_device(struct qedr_dev *dev) dev->ibdev.post_recv = qedr_post_recv; dev->ibdev.process_mad = qedr_process_mad; - dev->ibdev.get_port_immutable = qedr_port_immutable; + dev->ibdev.get_netdev = qedr_get_netdev; dev->ibdev.dev.parent = &dev->pdev->dev; diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index ab7784bfdac6..c52fde04d575 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -44,6 +44,8 @@ #define QEDR_MODULE_VERSION "8.10.10.0" #define QEDR_NODE_DESC "QLogic 579xx RoCE HCA" #define DP_NAME(dev) ((dev)->ibdev.name) +#define IS_IWARP(_dev) ((_dev)->rdma_type == QED_RDMA_TYPE_IWARP) +#define IS_ROCE(_dev) ((_dev)->rdma_type == QED_RDMA_TYPE_ROCE) #define DP_DEBUG(dev, module, fmt, ...) \ pr_debug("(%s) " module ": " fmt, \ @@ -161,6 +163,7 @@ struct qedr_dev { struct qedr_cq *gsi_sqcq; struct qedr_cq *gsi_rqcq; struct qedr_qp *gsi_qp; + enum qed_rdma_type rdma_type; unsigned long enet_state; }; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 9a280d1971fc..047ac74f6ed8 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -70,6 +70,20 @@ int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) return 0; } +int qedr_iw_query_gid(struct ib_device *ibdev, u8 port, + int index, union ib_gid *sgid) +{ + struct qedr_dev *dev = get_qedr_dev(ibdev); + + memset(sgid->raw, 0, sizeof(sgid->raw)); + ether_addr_copy(sgid->raw, dev->ndev->dev_addr); + + DP_DEBUG(dev, QEDR_MSG_INIT, "QUERY sgid[%d]=%llx:%llx\n", index, + sgid->global.interface_id, sgid->global.subnet_prefix); + + return 0; +} + int qedr_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *sgid) { @@ -3600,23 +3614,3 @@ int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags, mad_hdr->method, mad_hdr->mgmt_class, mad_hdr->status); return IB_MAD_RESULT_SUCCESS; } - -int qedr_port_immutable(struct ib_device *ibdev, u8 port_num, - struct ib_port_immutable *immutable) -{ - struct ib_port_attr attr; - int err; - - immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE | - RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; - - err = ib_query_port(ibdev, port_num, &attr); - if (err) - return err; - - immutable->pkey_tbl_len = attr.pkey_tbl_len; - immutable->gid_tbl_len = attr.gid_tbl_len; - immutable->max_mad_size = IB_MGMT_MAD_SIZE; - - return 0; -} diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 0f8ab49d5a1a..1a94425dea33 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -39,6 +39,8 @@ int qedr_modify_port(struct ib_device *, u8 port, int mask, struct ib_port_modify *props); int qedr_query_gid(struct ib_device *, u8 port, int index, union ib_gid *gid); +int qedr_iw_query_gid(struct ib_device *ibdev, u8 port, + int index, union ib_gid *gid); int qedr_query_pkey(struct ib_device *, u8 port, u16 index, u16 *pkey); From f5b1b1775be6320aeac64b2d4fd38dc3f420fb18 Mon Sep 17 00:00:00 2001 From: "Kalderon, Michal" Date: Wed, 26 Jul 2017 14:41:53 +0300 Subject: [PATCH 004/296] RDMA/qedr: Add iWARP support in existing verbs Make slight modifications to common RoCE/iWARP code. Add additional doorbell for iWARP post_send. iWARP QP pbl is allocated in qed and not in qedr. Signed-off-by: Michal Kalderon Signed-off-by: Ram Amrani Signed-off-by: Ariel Elior Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/qedr.h | 3 + drivers/infiniband/hw/qedr/verbs.c | 171 +++++++++++++++++++++++------ 2 files changed, 139 insertions(+), 35 deletions(-) diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index c52fde04d575..0c0a39ad297b 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -319,6 +319,9 @@ struct qedr_qp_hwq_info { /* DB */ void __iomem *db; union db_prod32 db_data; + + void __iomem *iwarp_db2; + union db_prod32 iwarp_db2_data; }; #define QEDR_INC_SW_IDX(p_info, index) \ diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 047ac74f6ed8..a4c29325a9ae 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -277,8 +277,13 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr) attr->sm_lid = 0; attr->sm_sl = 0; attr->port_cap_flags = IB_PORT_IP_BASED_GIDS; - attr->gid_tbl_len = QEDR_MAX_SGID; - attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN; + if (rdma_protocol_iwarp(&dev->ibdev, 1)) { + attr->gid_tbl_len = 1; + attr->pkey_tbl_len = 1; + } else { + attr->gid_tbl_len = QEDR_MAX_SGID; + attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN; + } attr->bad_pkey_cntr = rdma_port->pkey_bad_counter; attr->qkey_viol_cntr = 0; get_link_speed_and_width(rdma_port->link_speed, @@ -1430,6 +1435,21 @@ static int qedr_create_user_qp(struct qedr_dev *dev, return rc; } +static void qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp) +{ + qp->sq.db = dev->db_addr + + DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); + qp->sq.db_data.data.icid = qp->icid; + + qp->rq.db = dev->db_addr + + DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD); + qp->rq.db_data.data.icid = qp->icid; + qp->rq.iwarp_db2 = dev->db_addr + + DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS); + qp->rq.iwarp_db2_data.data.icid = qp->icid; + qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD; +} + static int qedr_roce_create_kernel_qp(struct qedr_dev *dev, struct qedr_qp *qp, @@ -1476,8 +1496,71 @@ qedr_roce_create_kernel_qp(struct qedr_dev *dev, qp->icid = out_params.icid; qedr_set_roce_db_info(dev, qp); + return rc; +} - return 0; +static int +qedr_iwarp_create_kernel_qp(struct qedr_dev *dev, + struct qedr_qp *qp, + struct qed_rdma_create_qp_in_params *in_params, + u32 n_sq_elems, u32 n_rq_elems) +{ + struct qed_rdma_create_qp_out_params out_params; + struct qed_chain_ext_pbl ext_pbl; + int rc; + + in_params->sq_num_pages = QED_CHAIN_PAGE_CNT(n_sq_elems, + QEDR_SQE_ELEMENT_SIZE, + QED_CHAIN_MODE_PBL); + in_params->rq_num_pages = QED_CHAIN_PAGE_CNT(n_rq_elems, + QEDR_RQE_ELEMENT_SIZE, + QED_CHAIN_MODE_PBL); + + qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx, + in_params, &out_params); + + if (!qp->qed_qp) + return -EINVAL; + + /* Now we allocate the chain */ + ext_pbl.p_pbl_virt = out_params.sq_pbl_virt; + ext_pbl.p_pbl_phys = out_params.sq_pbl_phys; + + rc = dev->ops->common->chain_alloc(dev->cdev, + QED_CHAIN_USE_TO_PRODUCE, + QED_CHAIN_MODE_PBL, + QED_CHAIN_CNT_TYPE_U32, + n_sq_elems, + QEDR_SQE_ELEMENT_SIZE, + &qp->sq.pbl, &ext_pbl); + + if (rc) + goto err; + + ext_pbl.p_pbl_virt = out_params.rq_pbl_virt; + ext_pbl.p_pbl_phys = out_params.rq_pbl_phys; + + rc = dev->ops->common->chain_alloc(dev->cdev, + QED_CHAIN_USE_TO_CONSUME_PRODUCE, + QED_CHAIN_MODE_PBL, + QED_CHAIN_CNT_TYPE_U32, + n_rq_elems, + QEDR_RQE_ELEMENT_SIZE, + &qp->rq.pbl, &ext_pbl); + + if (rc) + goto err; + + qp->qp_id = out_params.qp_id; + qp->icid = out_params.icid; + + qedr_set_iwarp_db_info(dev, qp); + return rc; + +err: + dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp); + + return rc; } static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp) @@ -1552,8 +1635,12 @@ static int qedr_create_kernel_qp(struct qedr_dev *dev, n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE; - rc = qedr_roce_create_kernel_qp(dev, qp, &in_params, - n_sq_elems, n_rq_elems); + if (rdma_protocol_iwarp(&dev->ibdev, 1)) + rc = qedr_iwarp_create_kernel_qp(dev, qp, &in_params, + n_sq_elems, n_rq_elems); + else + rc = qedr_roce_create_kernel_qp(dev, qp, &in_params, + n_sq_elems, n_rq_elems); if (rc) qedr_cleanup_kernel(dev, qp); @@ -1700,10 +1787,13 @@ static int qedr_update_qp_state(struct qedr_dev *dev, /* Update doorbell (in case post_recv was * done before move to RTR) */ - wmb(); - writel(qp->rq.db_data.raw, qp->rq.db); - /* Make sure write takes effect */ - mmiowb(); + + if (rdma_protocol_roce(&dev->ibdev, 1)) { + wmb(); + writel(qp->rq.db_data.raw, qp->rq.db); + /* Make sure write takes effect */ + mmiowb(); + } break; case QED_ROCE_QP_STATE_ERR: break; @@ -1797,16 +1887,18 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, else new_qp_state = old_qp_state; - if (!ib_modify_qp_is_ok - (old_qp_state, new_qp_state, ibqp->qp_type, attr_mask, - IB_LINK_LAYER_ETHERNET)) { - DP_ERR(dev, - "modify qp: invalid attribute mask=0x%x specified for\n" - "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n", - attr_mask, qp->qp_id, ibqp->qp_type, old_qp_state, - new_qp_state); - rc = -EINVAL; - goto err; + if (rdma_protocol_roce(&dev->ibdev, 1)) { + if (!ib_modify_qp_is_ok(old_qp_state, new_qp_state, + ibqp->qp_type, attr_mask, + IB_LINK_LAYER_ETHERNET)) { + DP_ERR(dev, + "modify qp: invalid attribute mask=0x%x specified for\n" + "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n", + attr_mask, qp->qp_id, ibqp->qp_type, + old_qp_state, new_qp_state); + rc = -EINVAL; + goto err; + } } /* Translate the masks... */ @@ -2122,15 +2214,17 @@ int qedr_destroy_qp(struct ib_qp *ibqp) DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n", qp, qp->qp_type); - if ((qp->state != QED_ROCE_QP_STATE_RESET) && - (qp->state != QED_ROCE_QP_STATE_ERR) && - (qp->state != QED_ROCE_QP_STATE_INIT)) { + if (rdma_protocol_roce(&dev->ibdev, 1)) { + if ((qp->state != QED_ROCE_QP_STATE_RESET) && + (qp->state != QED_ROCE_QP_STATE_ERR) && + (qp->state != QED_ROCE_QP_STATE_INIT)) { - attr.qp_state = IB_QPS_ERR; - attr_mask |= IB_QP_STATE; + attr.qp_state = IB_QPS_ERR; + attr_mask |= IB_QP_STATE; - /* Change the QP state to ERROR */ - qedr_modify_qp(ibqp, &attr, attr_mask, NULL); + /* Change the QP state to ERROR */ + qedr_modify_qp(ibqp, &attr, attr_mask, NULL); + } } if (qp->qp_type == IB_QPT_GSI) @@ -3025,15 +3119,17 @@ int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, spin_lock_irqsave(&qp->q_lock, flags); - if ((qp->state != QED_ROCE_QP_STATE_RTS) && - (qp->state != QED_ROCE_QP_STATE_ERR) && - (qp->state != QED_ROCE_QP_STATE_SQD)) { - spin_unlock_irqrestore(&qp->q_lock, flags); - *bad_wr = wr; - DP_DEBUG(dev, QEDR_MSG_CQ, - "QP in wrong state! QP icid=0x%x state %d\n", - qp->icid, qp->state); - return -EINVAL; + if (rdma_protocol_roce(&dev->ibdev, 1)) { + if ((qp->state != QED_ROCE_QP_STATE_RTS) && + (qp->state != QED_ROCE_QP_STATE_ERR) && + (qp->state != QED_ROCE_QP_STATE_SQD)) { + spin_unlock_irqrestore(&qp->q_lock, flags); + *bad_wr = wr; + DP_DEBUG(dev, QEDR_MSG_CQ, + "QP in wrong state! QP icid=0x%x state %d\n", + qp->icid, qp->state); + return -EINVAL; + } } while (wr) { @@ -3153,6 +3249,11 @@ int qedr_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, /* Make sure write sticks */ mmiowb(); + if (rdma_protocol_iwarp(&dev->ibdev, 1)) { + writel(qp->rq.iwarp_db2_data.raw, qp->rq.iwarp_db2); + mmiowb(); /* for second doorbell */ + } + wr = wr->next; } From fb1a22be9d32675ba140a4e1438e5e52c6d5694e Mon Sep 17 00:00:00 2001 From: "Kalderon, Michal" Date: Wed, 26 Jul 2017 14:41:54 +0300 Subject: [PATCH 005/296] RDMA/qedr: Add support for read with invalidate, supported in iWARP iWARP supports read with invalidate. There is an assumption that read with invalidate will only be called on an iWARP device Signed-off-by: Michal Kalderon Signed-off-by: Ram Amrani Signed-off-by: Ariel Elior Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/qedr_hsi_rdma.h | 6 ++++-- drivers/infiniband/hw/qedr/verbs.c | 8 +++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h b/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h index 5c98d2055cad..b7587f10e7de 100644 --- a/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h +++ b/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h @@ -655,8 +655,10 @@ struct rdma_sq_rdma_wqe_1st { #define RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG_SHIFT 4 #define RDMA_SQ_RDMA_WQE_1ST_DIF_ON_HOST_FLG_MASK 0x1 #define RDMA_SQ_RDMA_WQE_1ST_DIF_ON_HOST_FLG_SHIFT 5 -#define RDMA_SQ_RDMA_WQE_1ST_RESERVED0_MASK 0x3 -#define RDMA_SQ_RDMA_WQE_1ST_RESERVED0_SHIFT 6 +#define RDMA_SQ_RDMA_WQE_1ST_READ_INV_FLG_MASK 0x1 +#define RDMA_SQ_RDMA_WQE_1ST_READ_INV_FLG_SHIFT 6 +#define RDMA_SQ_RDMA_WQE_1ST_RESERVED0_MASK 0x1 +#define RDMA_SQ_RDMA_WQE_1ST_RESERVED0_SHIFT 7 u8 wqe_size; u8 prev_wqe_size; }; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index a4c29325a9ae..237ae5e40e76 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -2845,6 +2845,7 @@ static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode) case IB_WR_SEND_WITH_INV: return IB_WC_SEND; case IB_WR_RDMA_READ: + case IB_WR_RDMA_READ_WITH_INV: return IB_WC_RDMA_READ; case IB_WR_ATOMIC_CMP_AND_SWP: return IB_WC_COMP_SWAP; @@ -3005,11 +3006,8 @@ static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length; break; case IB_WR_RDMA_READ_WITH_INV: - DP_ERR(dev, - "RDMA READ WITH INVALIDATE not supported\n"); - *bad_wr = wr; - rc = -EINVAL; - break; + SET_FIELD2(wqe->flags, RDMA_SQ_RDMA_WQE_1ST_READ_INV_FLG, 1); + /* fallthrough... same is identical to RDMA READ */ case IB_WR_RDMA_READ: wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD; From de0089e692a92ce5180eec6a79f1f79153e3c669 Mon Sep 17 00:00:00 2001 From: "Kalderon, Michal" Date: Wed, 26 Jul 2017 14:41:55 +0300 Subject: [PATCH 006/296] RDMA/qedr: Add iWARP connection management qp related callbacks This patch implements the following iWARP callbacks: qp_add_ref qp_rem_ref get_qp Signed-off-by: Michal Kalderon Signed-off-by: Ram Amrani Signed-off-by: Ariel Elior Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/Makefile | 2 +- drivers/infiniband/hw/qedr/main.c | 10 +++++ drivers/infiniband/hw/qedr/qedr.h | 5 ++- drivers/infiniband/hw/qedr/qedr_iw_cm.c | 57 +++++++++++++++++++++++++ drivers/infiniband/hw/qedr/qedr_iw_cm.h | 37 ++++++++++++++++ drivers/infiniband/hw/qedr/verbs.c | 38 ++++++++++++++++- 6 files changed, 145 insertions(+), 4 deletions(-) create mode 100644 drivers/infiniband/hw/qedr/qedr_iw_cm.c create mode 100644 drivers/infiniband/hw/qedr/qedr_iw_cm.h diff --git a/drivers/infiniband/hw/qedr/Makefile b/drivers/infiniband/hw/qedr/Makefile index 331a3612f8b0..1c0bc4f78550 100644 --- a/drivers/infiniband/hw/qedr/Makefile +++ b/drivers/infiniband/hw/qedr/Makefile @@ -1,3 +1,3 @@ obj-$(CONFIG_INFINIBAND_QEDR) := qedr.o -qedr-y := main.o verbs.o qedr_roce_cm.o +qedr-y := main.o verbs.o qedr_roce_cm.o qedr_iw_cm.o diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index ad227b5c0c43..ebac63f597bc 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -39,12 +39,14 @@ #include #include #include +#include #include #include #include "qedr.h" #include "verbs.h" #include +#include "qedr_iw_cm.h" MODULE_DESCRIPTION("QLogic 40G/100G ROCE Driver"); MODULE_AUTHOR("QLogic Corporation"); @@ -143,6 +145,9 @@ int qedr_iw_register_device(struct qedr_dev *dev) dev->ibdev.iwcm = kzalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL); if (!dev->ibdev.iwcm) return -ENOMEM; + dev->ibdev.iwcm->add_ref = qedr_iw_qp_add_ref; + dev->ibdev.iwcm->rem_ref = qedr_iw_qp_rem_ref; + dev->ibdev.iwcm->get_qp = qedr_iw_get_qp; memcpy(dev->ibdev.iwcm->ifname, dev->ndev->name, sizeof(dev->ibdev.iwcm->ifname)); @@ -315,6 +320,11 @@ static int qedr_alloc_resources(struct qedr_dev *dev) spin_lock_init(&dev->sgid_lock); + if (IS_IWARP(dev)) { + spin_lock_init(&dev->idr_lock); + idr_init(&dev->qpidr); + } + /* Allocate Status blocks for CNQ */ dev->sb_array = kcalloc(dev->num_cnq, sizeof(*dev->sb_array), GFP_KERNEL); diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 0c0a39ad297b..5dd82d1651be 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -33,6 +33,7 @@ #define __QEDR_H__ #include +#include #include #include #include @@ -164,7 +165,8 @@ struct qedr_dev { struct qedr_cq *gsi_rqcq; struct qedr_qp *gsi_qp; enum qed_rdma_type rdma_type; - + spinlock_t idr_lock; /* Protect qpidr data-structure */ + struct idr qpidr; unsigned long enet_state; }; @@ -399,6 +401,7 @@ struct qedr_qp { /* Relevant to qps created from user space only (applications) */ struct qedr_userq usq; struct qedr_userq urq; + atomic_t refcnt; }; struct qedr_ah { diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c new file mode 100644 index 000000000000..8811dbc036e2 --- /dev/null +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -0,0 +1,57 @@ +/* QLogic qedr NIC Driver + * Copyright (c) 2015-2017 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "qedr.h" +void qedr_iw_qp_add_ref(struct ib_qp *ibqp) +{ + struct qedr_qp *qp = get_qedr_qp(ibqp); + + atomic_inc(&qp->refcnt); +} + +void qedr_iw_qp_rem_ref(struct ib_qp *ibqp) +{ + struct qedr_qp *qp = get_qedr_qp(ibqp); + + if (atomic_dec_and_test(&qp->refcnt)) { + spin_lock_irq(&qp->dev->idr_lock); + idr_remove(&qp->dev->qpidr, qp->qp_id); + spin_unlock_irq(&qp->dev->idr_lock); + kfree(qp); + } +} + +struct ib_qp *qedr_iw_get_qp(struct ib_device *ibdev, int qpn) +{ + struct qedr_dev *dev = get_qedr_dev(ibdev); + + return idr_find(&dev->qpidr, qpn); +} diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.h b/drivers/infiniband/hw/qedr/qedr_iw_cm.h new file mode 100644 index 000000000000..ea6d17afbff2 --- /dev/null +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.h @@ -0,0 +1,37 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015-2017 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +void qedr_iw_qp_add_ref(struct ib_qp *qp); + +void qedr_iw_qp_rem_ref(struct ib_qp *qp); + +struct ib_qp *qedr_iw_get_qp(struct ib_device *dev, int qpn); diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 237ae5e40e76..2d8d031ca713 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1280,6 +1280,7 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev, struct ib_qp_init_attr *attrs) { spin_lock_init(&qp->q_lock); + atomic_set(&qp->refcnt, 1); qp->pd = pd; qp->qp_type = attrs->qp_type; qp->max_inline_data = attrs->cap.max_inline_data; @@ -1350,6 +1351,33 @@ static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp) qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len); } +static int qedr_idr_add(struct qedr_dev *dev, void *ptr, u32 id) +{ + int rc; + + if (!rdma_protocol_iwarp(&dev->ibdev, 1)) + return 0; + + idr_preload(GFP_KERNEL); + spin_lock_irq(&dev->idr_lock); + + rc = idr_alloc(&dev->qpidr, ptr, id, id + 1, GFP_ATOMIC); + + spin_unlock_irq(&dev->idr_lock); + idr_preload_end(); + + return rc < 0 ? rc : 0; +} + +static void qedr_idr_remove(struct qedr_dev *dev, u32 id) +{ + if (!rdma_protocol_iwarp(&dev->ibdev, 1)) + return; + + spin_lock_irq(&dev->idr_lock); + idr_remove(&dev->qpidr, id); + spin_unlock_irq(&dev->idr_lock); +} static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp) { if (qp->usq.umem) @@ -1700,6 +1728,10 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, qp->ibqp.qp_num = qp->qp_id; + rc = qedr_idr_add(dev, qp, qp->qp_id); + if (rc) + goto err; + return &qp->ibqp; err: @@ -2232,8 +2264,10 @@ int qedr_destroy_qp(struct ib_qp *ibqp) qedr_free_qp_resources(dev, qp); - kfree(qp); - + if (atomic_dec_and_test(&qp->refcnt)) { + qedr_idr_remove(dev, qp->qp_id); + kfree(qp); + } return rc; } From e411e0587e0ddb6dae69944fac72f5d15ca89507 Mon Sep 17 00:00:00 2001 From: "Kalderon, Michal" Date: Wed, 26 Jul 2017 14:41:56 +0300 Subject: [PATCH 007/296] RDMA/qedr: Add iWARP connection management functions Implements the iWARP connection management functions: connect, accept, create listener and destroy listener Signed-off-by: Michal Kalderon Signed-off-by: Ram Amrani Signed-off-by: Ariel Elior Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/main.c | 11 + drivers/infiniband/hw/qedr/qedr.h | 22 +- drivers/infiniband/hw/qedr/qedr_iw_cm.c | 689 ++++++++++++++++++++++++ drivers/infiniband/hw/qedr/qedr_iw_cm.h | 12 + drivers/infiniband/hw/qedr/verbs.c | 17 + 5 files changed, 750 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index ebac63f597bc..c594b11a89f0 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -145,6 +145,12 @@ int qedr_iw_register_device(struct qedr_dev *dev) dev->ibdev.iwcm = kzalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL); if (!dev->ibdev.iwcm) return -ENOMEM; + + dev->ibdev.iwcm->connect = qedr_iw_connect; + dev->ibdev.iwcm->accept = qedr_iw_accept; + dev->ibdev.iwcm->reject = qedr_iw_reject; + dev->ibdev.iwcm->create_listen = qedr_iw_create_listen; + dev->ibdev.iwcm->destroy_listen = qedr_iw_destroy_listen; dev->ibdev.iwcm->add_ref = qedr_iw_qp_add_ref; dev->ibdev.iwcm->rem_ref = qedr_iw_qp_rem_ref; dev->ibdev.iwcm->get_qp = qedr_iw_get_qp; @@ -296,6 +302,9 @@ static void qedr_free_resources(struct qedr_dev *dev) { int i; + if (IS_IWARP(dev)) + destroy_workqueue(dev->iwarp_wq); + for (i = 0; i < dev->num_cnq; i++) { qedr_free_mem_sb(dev, &dev->sb_array[i], dev->sb_start + i); dev->ops->common->chain_free(dev->cdev, &dev->cnq_array[i].pbl); @@ -323,6 +332,7 @@ static int qedr_alloc_resources(struct qedr_dev *dev) if (IS_IWARP(dev)) { spin_lock_init(&dev->idr_lock); idr_init(&dev->qpidr); + dev->iwarp_wq = create_singlethread_workqueue("qedr_iwarpq"); } /* Allocate Status blocks for CNQ */ @@ -800,6 +810,7 @@ static int qedr_init_hw(struct qedr_dev *dev) in_params->events = &events; in_params->cq_mode = QED_RDMA_CQ_MODE_32_BITS; in_params->max_mtu = dev->ndev->mtu; + dev->iwarp_max_mtu = dev->ndev->mtu; ether_addr_copy(&in_params->mac_addr[0], dev->ndev->dev_addr); rc = dev->ops->rdma_init(dev->cdev, in_params); diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 5dd82d1651be..06cb96d86332 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -60,6 +60,7 @@ #define QEDR_MSG_SQ " SQ" #define QEDR_MSG_QP " QP" #define QEDR_MSG_GSI " GSI" +#define QEDR_MSG_IWARP " IW" #define QEDR_CQ_MAGIC_NUMBER (0x11223344) @@ -167,6 +168,9 @@ struct qedr_dev { enum qed_rdma_type rdma_type; spinlock_t idr_lock; /* Protect qpidr data-structure */ struct idr qpidr; + struct workqueue_struct *iwarp_wq; + u16 iwarp_max_mtu; + unsigned long enet_state; }; @@ -344,7 +348,7 @@ enum qedr_qp_err_bitmap { struct qedr_qp { struct ib_qp ibqp; /* must be first */ struct qedr_dev *dev; - + struct qedr_iw_ep *ep; struct qedr_qp_hwq_info sq; struct qedr_qp_hwq_info rq; @@ -402,6 +406,7 @@ struct qedr_qp { struct qedr_userq usq; struct qedr_userq urq; atomic_t refcnt; + bool destroyed; }; struct qedr_ah { @@ -482,6 +487,21 @@ static inline int qedr_get_dmac(struct qedr_dev *dev, return 0; } +struct qedr_iw_listener { + struct qedr_dev *dev; + struct iw_cm_id *cm_id; + int backlog; + void *qed_handle; +}; + +struct qedr_iw_ep { + struct qedr_dev *dev; + struct iw_cm_id *cm_id; + struct qedr_qp *qp; + void *qed_context; + u8 during_connect; +}; + static inline struct qedr_ucontext *get_qedr_ucontext(struct ib_ucontext *ibucontext) { diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index 8811dbc036e2..fe9b2b6149b0 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -29,7 +29,696 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#include +#include +#include +#include +#include +#include +#include #include "qedr.h" +#include "qedr_iw_cm.h" + +static inline void +qedr_fill_sockaddr4(const struct qed_iwarp_cm_info *cm_info, + struct iw_cm_event *event) +{ + struct sockaddr_in *laddr = (struct sockaddr_in *)&event->local_addr; + struct sockaddr_in *raddr = (struct sockaddr_in *)&event->remote_addr; + + laddr->sin_family = AF_INET; + raddr->sin_family = AF_INET; + + laddr->sin_port = htons(cm_info->local_port); + raddr->sin_port = htons(cm_info->remote_port); + + laddr->sin_addr.s_addr = htonl(cm_info->local_ip[0]); + raddr->sin_addr.s_addr = htonl(cm_info->remote_ip[0]); +} + +static inline void +qedr_fill_sockaddr6(const struct qed_iwarp_cm_info *cm_info, + struct iw_cm_event *event) +{ + struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *)&event->local_addr; + struct sockaddr_in6 *raddr6 = + (struct sockaddr_in6 *)&event->remote_addr; + int i; + + laddr6->sin6_family = AF_INET6; + raddr6->sin6_family = AF_INET6; + + laddr6->sin6_port = htons(cm_info->local_port); + raddr6->sin6_port = htons(cm_info->remote_port); + + for (i = 0; i < 4; i++) { + laddr6->sin6_addr.in6_u.u6_addr32[i] = + htonl(cm_info->local_ip[i]); + raddr6->sin6_addr.in6_u.u6_addr32[i] = + htonl(cm_info->remote_ip[i]); + } +} + +void +qedr_iw_mpa_request(void *context, struct qed_iwarp_cm_event_params *params) +{ + struct qedr_iw_listener *listener = (struct qedr_iw_listener *)context; + struct qedr_dev *dev = listener->dev; + struct iw_cm_event event; + struct qedr_iw_ep *ep; + + ep = kzalloc(sizeof(*ep), GFP_ATOMIC); + if (!ep) + return; + + ep->dev = dev; + ep->qed_context = params->ep_context; + + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_CONNECT_REQUEST; + event.status = params->status; + + if (params->cm_info->ip_version == QED_TCP_IPV4) + qedr_fill_sockaddr4(params->cm_info, &event); + else + qedr_fill_sockaddr6(params->cm_info, &event); + + event.provider_data = (void *)ep; + event.private_data = (void *)params->cm_info->private_data; + event.private_data_len = (u8)params->cm_info->private_data_len; + event.ord = params->cm_info->ord; + event.ird = params->cm_info->ird; + + listener->cm_id->event_handler(listener->cm_id, &event); +} + +void +qedr_iw_issue_event(void *context, + struct qed_iwarp_cm_event_params *params, + enum iw_cm_event_type event_type) +{ + struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context; + struct iw_cm_event event; + + memset(&event, 0, sizeof(event)); + event.status = params->status; + event.event = event_type; + + if (params->cm_info) { + event.ird = params->cm_info->ird; + event.ord = params->cm_info->ord; + event.private_data_len = params->cm_info->private_data_len; + event.private_data = (void *)params->cm_info->private_data; + } + + if (ep->cm_id) + ep->cm_id->event_handler(ep->cm_id, &event); +} + +void +qedr_iw_close_event(void *context, struct qed_iwarp_cm_event_params *params) +{ + struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context; + + if (ep->cm_id) { + qedr_iw_issue_event(context, params, IW_CM_EVENT_CLOSE); + + ep->cm_id->rem_ref(ep->cm_id); + ep->cm_id = NULL; + } +} + +void +qedr_iw_qp_event(void *context, + struct qed_iwarp_cm_event_params *params, + enum ib_event_type ib_event, char *str) +{ + struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context; + struct qedr_dev *dev = ep->dev; + struct ib_qp *ibqp = &ep->qp->ibqp; + struct ib_event event; + + DP_NOTICE(dev, "QP error received: %s\n", str); + + if (ibqp->event_handler) { + event.event = ib_event; + event.device = ibqp->device; + event.element.qp = ibqp; + ibqp->event_handler(&event, ibqp->qp_context); + } +} + +struct qedr_discon_work { + struct work_struct work; + struct qedr_iw_ep *ep; + enum qed_iwarp_event_type event; + int status; +}; + +static void qedr_iw_disconnect_worker(struct work_struct *work) +{ + struct qedr_discon_work *dwork = + container_of(work, struct qedr_discon_work, work); + struct qed_rdma_modify_qp_in_params qp_params = { 0 }; + struct qedr_iw_ep *ep = dwork->ep; + struct qedr_dev *dev = ep->dev; + struct qedr_qp *qp = ep->qp; + struct iw_cm_event event; + + if (qp->destroyed) { + kfree(dwork); + qedr_iw_qp_rem_ref(&qp->ibqp); + return; + } + + memset(&event, 0, sizeof(event)); + event.status = dwork->status; + event.event = IW_CM_EVENT_DISCONNECT; + + /* Success means graceful disconnect was requested. modifying + * to SQD is translated to graceful disconnect. O/w reset is sent + */ + if (dwork->status) + qp_params.new_state = QED_ROCE_QP_STATE_ERR; + else + qp_params.new_state = QED_ROCE_QP_STATE_SQD; + + kfree(dwork); + + if (ep->cm_id) + ep->cm_id->event_handler(ep->cm_id, &event); + + SET_FIELD(qp_params.modify_flags, + QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1); + + dev->ops->rdma_modify_qp(dev->rdma_ctx, qp->qed_qp, &qp_params); + + qedr_iw_qp_rem_ref(&qp->ibqp); +} + +void +qedr_iw_disconnect_event(void *context, + struct qed_iwarp_cm_event_params *params) +{ + struct qedr_discon_work *work; + struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context; + struct qedr_dev *dev = ep->dev; + struct qedr_qp *qp = ep->qp; + + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return; + + qedr_iw_qp_add_ref(&qp->ibqp); + work->ep = ep; + work->event = params->event; + work->status = params->status; + + INIT_WORK(&work->work, qedr_iw_disconnect_worker); + queue_work(dev->iwarp_wq, &work->work); +} + +static void +qedr_iw_passive_complete(void *context, + struct qed_iwarp_cm_event_params *params) +{ + struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context; + struct qedr_dev *dev = ep->dev; + + /* We will only reach the following state if MPA_REJECT was called on + * passive. In this case there will be no associated QP. + */ + if ((params->status == -ECONNREFUSED) && (!ep->qp)) { + DP_DEBUG(dev, QEDR_MSG_IWARP, + "PASSIVE connection refused releasing ep...\n"); + kfree(ep); + return; + } + + qedr_iw_issue_event(context, params, IW_CM_EVENT_ESTABLISHED); + + if (params->status < 0) + qedr_iw_close_event(context, params); +} + +int +qedr_iw_mpa_reply(void *context, struct qed_iwarp_cm_event_params *params) +{ + struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context; + struct qedr_dev *dev = ep->dev; + struct qed_iwarp_send_rtr_in rtr_in; + + rtr_in.ep_context = params->ep_context; + + return dev->ops->iwarp_send_rtr(dev->rdma_ctx, &rtr_in); +} + +int +qedr_iw_event_handler(void *context, struct qed_iwarp_cm_event_params *params) +{ + struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context; + struct qedr_dev *dev = ep->dev; + + switch (params->event) { + case QED_IWARP_EVENT_MPA_REQUEST: + qedr_iw_mpa_request(context, params); + break; + case QED_IWARP_EVENT_ACTIVE_MPA_REPLY: + qedr_iw_mpa_reply(context, params); + break; + case QED_IWARP_EVENT_PASSIVE_COMPLETE: + ep->during_connect = 0; + qedr_iw_passive_complete(context, params); + break; + + case QED_IWARP_EVENT_ACTIVE_COMPLETE: + ep->during_connect = 0; + qedr_iw_issue_event(context, + params, + IW_CM_EVENT_CONNECT_REPLY); + if (params->status < 0) { + struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context; + + ep->cm_id->rem_ref(ep->cm_id); + ep->cm_id = NULL; + } + break; + case QED_IWARP_EVENT_DISCONNECT: + qedr_iw_disconnect_event(context, params); + break; + case QED_IWARP_EVENT_CLOSE: + ep->during_connect = 0; + qedr_iw_close_event(context, params); + break; + case QED_IWARP_EVENT_RQ_EMPTY: + qedr_iw_qp_event(context, params, IB_EVENT_QP_FATAL, + "QED_IWARP_EVENT_RQ_EMPTY"); + break; + case QED_IWARP_EVENT_IRQ_FULL: + qedr_iw_qp_event(context, params, IB_EVENT_QP_FATAL, + "QED_IWARP_EVENT_IRQ_FULL"); + break; + case QED_IWARP_EVENT_LLP_TIMEOUT: + qedr_iw_qp_event(context, params, IB_EVENT_QP_FATAL, + "QED_IWARP_EVENT_LLP_TIMEOUT"); + break; + case QED_IWARP_EVENT_REMOTE_PROTECTION_ERROR: + qedr_iw_qp_event(context, params, IB_EVENT_QP_ACCESS_ERR, + "QED_IWARP_EVENT_REMOTE_PROTECTION_ERROR"); + break; + case QED_IWARP_EVENT_CQ_OVERFLOW: + qedr_iw_qp_event(context, params, IB_EVENT_QP_FATAL, + "QED_IWARP_EVENT_CQ_OVERFLOW"); + break; + case QED_IWARP_EVENT_QP_CATASTROPHIC: + qedr_iw_qp_event(context, params, IB_EVENT_QP_FATAL, + "QED_IWARP_EVENT_QP_CATASTROPHIC"); + break; + case QED_IWARP_EVENT_LOCAL_ACCESS_ERROR: + qedr_iw_qp_event(context, params, IB_EVENT_QP_ACCESS_ERR, + "QED_IWARP_EVENT_LOCAL_ACCESS_ERROR"); + break; + case QED_IWARP_EVENT_REMOTE_OPERATION_ERROR: + qedr_iw_qp_event(context, params, IB_EVENT_QP_FATAL, + "QED_IWARP_EVENT_REMOTE_OPERATION_ERROR"); + break; + case QED_IWARP_EVENT_TERMINATE_RECEIVED: + DP_NOTICE(dev, "Got terminate message\n"); + break; + default: + DP_NOTICE(dev, "Unknown event received %d\n", params->event); + break; + }; + return 0; +} + +static u16 qedr_iw_get_vlan_ipv4(struct qedr_dev *dev, u32 *addr) +{ + struct net_device *ndev; + u16 vlan_id = 0; + + ndev = ip_dev_find(&init_net, htonl(addr[0])); + + if (ndev) { + vlan_id = rdma_vlan_dev_vlan_id(ndev); + dev_put(ndev); + } + if (vlan_id == 0xffff) + vlan_id = 0; + return vlan_id; +} + +static u16 qedr_iw_get_vlan_ipv6(u32 *addr) +{ + struct net_device *ndev = NULL; + struct in6_addr laddr6; + u16 vlan_id = 0; + int i; + + if (!IS_ENABLED(CONFIG_IPV6)) + return vlan_id; + + for (i = 0; i < 4; i++) + laddr6.in6_u.u6_addr32[i] = htonl(addr[i]); + + rcu_read_lock(); + for_each_netdev_rcu(&init_net, ndev) { + if (ipv6_chk_addr(&init_net, &laddr6, ndev, 1)) { + vlan_id = rdma_vlan_dev_vlan_id(ndev); + break; + } + } + + rcu_read_unlock(); + if (vlan_id == 0xffff) + vlan_id = 0; + + return vlan_id; +} + +static int +qedr_addr4_resolve(struct qedr_dev *dev, + struct sockaddr_in *src_in, + struct sockaddr_in *dst_in, u8 *dst_mac) +{ + __be32 src_ip = src_in->sin_addr.s_addr; + __be32 dst_ip = dst_in->sin_addr.s_addr; + struct neighbour *neigh = NULL; + struct rtable *rt = NULL; + int rc = 0; + + rt = ip_route_output(&init_net, dst_ip, src_ip, 0, 0); + if (IS_ERR(rt)) { + DP_ERR(dev, "ip_route_output returned error\n"); + return -EINVAL; + } + + neigh = dst_neigh_lookup(&rt->dst, &dst_ip); + + if (neigh) { + rcu_read_lock(); + if (neigh->nud_state & NUD_VALID) { + ether_addr_copy(dst_mac, neigh->ha); + DP_DEBUG(dev, QEDR_MSG_QP, "mac_addr=[%pM]\n", dst_mac); + } else { + neigh_event_send(neigh, NULL); + } + rcu_read_unlock(); + neigh_release(neigh); + } + + ip_rt_put(rt); + + return rc; +} + +static int +qedr_addr6_resolve(struct qedr_dev *dev, + struct sockaddr_in6 *src_in, + struct sockaddr_in6 *dst_in, u8 *dst_mac) +{ + struct neighbour *neigh = NULL; + struct dst_entry *dst; + struct flowi6 fl6; + int rc = 0; + + memset(&fl6, 0, sizeof(fl6)); + fl6.daddr = dst_in->sin6_addr; + fl6.saddr = src_in->sin6_addr; + + dst = ip6_route_output(&init_net, NULL, &fl6); + + if ((!dst) || dst->error) { + if (dst) { + dst_release(dst); + DP_ERR(dev, + "ip6_route_output returned dst->error = %d\n", + dst->error); + } + return -EINVAL; + } + neigh = dst_neigh_lookup(dst, &dst_in); + + if (neigh) { + rcu_read_lock(); + if (neigh->nud_state & NUD_VALID) { + ether_addr_copy(dst_mac, neigh->ha); + DP_DEBUG(dev, QEDR_MSG_QP, "mac_addr=[%pM]\n", dst_mac); + } else { + neigh_event_send(neigh, NULL); + } + rcu_read_unlock(); + neigh_release(neigh); + } + + dst_release(dst); + + return rc; +} + +int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) +{ + struct qedr_dev *dev = get_qedr_dev(cm_id->device); + struct qed_iwarp_connect_out out_params; + struct qed_iwarp_connect_in in_params; + struct qed_iwarp_cm_info *cm_info; + struct sockaddr_in6 *laddr6; + struct sockaddr_in6 *raddr6; + struct sockaddr_in *laddr; + struct sockaddr_in *raddr; + struct qedr_iw_ep *ep; + struct qedr_qp *qp; + int rc = 0; + int i; + + qp = idr_find(&dev->qpidr, conn_param->qpn); + + laddr = (struct sockaddr_in *)&cm_id->local_addr; + raddr = (struct sockaddr_in *)&cm_id->remote_addr; + laddr6 = (struct sockaddr_in6 *)&cm_id->local_addr; + raddr6 = (struct sockaddr_in6 *)&cm_id->remote_addr; + + DP_DEBUG(dev, QEDR_MSG_IWARP, + "Connect source address: %pISpc, remote address: %pISpc\n", + &cm_id->local_addr, &cm_id->remote_addr); + + if (!laddr->sin_port || !raddr->sin_port) + return -EINVAL; + + ep = kzalloc(sizeof(*ep), GFP_KERNEL); + if (!ep) + return -ENOMEM; + + ep->dev = dev; + ep->qp = qp; + qp->ep = ep; + cm_id->add_ref(cm_id); + ep->cm_id = cm_id; + + in_params.event_cb = qedr_iw_event_handler; + in_params.cb_context = ep; + + cm_info = &in_params.cm_info; + memset(cm_info->local_ip, 0, sizeof(cm_info->local_ip)); + memset(cm_info->remote_ip, 0, sizeof(cm_info->remote_ip)); + + if (cm_id->remote_addr.ss_family == AF_INET) { + cm_info->ip_version = QED_TCP_IPV4; + + cm_info->remote_ip[0] = ntohl(raddr->sin_addr.s_addr); + cm_info->local_ip[0] = ntohl(laddr->sin_addr.s_addr); + cm_info->remote_port = ntohs(raddr->sin_port); + cm_info->local_port = ntohs(laddr->sin_port); + cm_info->vlan = qedr_iw_get_vlan_ipv4(dev, cm_info->local_ip); + + rc = qedr_addr4_resolve(dev, laddr, raddr, + (u8 *)in_params.remote_mac_addr); + + in_params.mss = dev->iwarp_max_mtu - + (sizeof(struct iphdr) + sizeof(struct tcphdr)); + + } else { + in_params.cm_info.ip_version = QED_TCP_IPV6; + + for (i = 0; i < 4; i++) { + cm_info->remote_ip[i] = + ntohl(raddr6->sin6_addr.in6_u.u6_addr32[i]); + cm_info->local_ip[i] = + ntohl(laddr6->sin6_addr.in6_u.u6_addr32[i]); + } + + cm_info->local_port = ntohs(laddr6->sin6_port); + cm_info->remote_port = ntohs(raddr6->sin6_port); + + in_params.mss = dev->iwarp_max_mtu - + (sizeof(struct ipv6hdr) + sizeof(struct tcphdr)); + + cm_info->vlan = qedr_iw_get_vlan_ipv6(cm_info->local_ip); + + rc = qedr_addr6_resolve(dev, laddr6, raddr6, + (u8 *)in_params.remote_mac_addr); + } + if (rc) + goto err; + + DP_DEBUG(dev, QEDR_MSG_IWARP, + "ord = %d ird=%d private_data=%p private_data_len=%d rq_psn=%d\n", + conn_param->ord, conn_param->ird, conn_param->private_data, + conn_param->private_data_len, qp->rq_psn); + + cm_info->ord = conn_param->ord; + cm_info->ird = conn_param->ird; + cm_info->private_data = conn_param->private_data; + cm_info->private_data_len = conn_param->private_data_len; + in_params.qp = qp->qed_qp; + memcpy(in_params.local_mac_addr, dev->ndev->dev_addr, ETH_ALEN); + + ep->during_connect = 1; + rc = dev->ops->iwarp_connect(dev->rdma_ctx, &in_params, &out_params); + if (rc) + goto err; + + return rc; + +err: + cm_id->rem_ref(cm_id); + kfree(ep); + return rc; +} + +int qedr_iw_create_listen(struct iw_cm_id *cm_id, int backlog) +{ + struct qedr_dev *dev = get_qedr_dev(cm_id->device); + struct qedr_iw_listener *listener; + struct qed_iwarp_listen_in iparams; + struct qed_iwarp_listen_out oparams; + struct sockaddr_in *laddr; + struct sockaddr_in6 *laddr6; + int rc; + int i; + + laddr = (struct sockaddr_in *)&cm_id->local_addr; + laddr6 = (struct sockaddr_in6 *)&cm_id->local_addr; + + DP_DEBUG(dev, QEDR_MSG_IWARP, + "Create Listener address: %pISpc\n", &cm_id->local_addr); + + listener = kzalloc(sizeof(*listener), GFP_KERNEL); + if (!listener) + return -ENOMEM; + + listener->dev = dev; + cm_id->add_ref(cm_id); + listener->cm_id = cm_id; + listener->backlog = backlog; + + iparams.cb_context = listener; + iparams.event_cb = qedr_iw_event_handler; + iparams.max_backlog = backlog; + + if (cm_id->local_addr.ss_family == AF_INET) { + iparams.ip_version = QED_TCP_IPV4; + memset(iparams.ip_addr, 0, sizeof(iparams.ip_addr)); + + iparams.ip_addr[0] = ntohl(laddr->sin_addr.s_addr); + iparams.port = ntohs(laddr->sin_port); + iparams.vlan = qedr_iw_get_vlan_ipv4(dev, iparams.ip_addr); + } else { + iparams.ip_version = QED_TCP_IPV6; + + for (i = 0; i < 4; i++) { + iparams.ip_addr[i] = + ntohl(laddr6->sin6_addr.in6_u.u6_addr32[i]); + } + + iparams.port = ntohs(laddr6->sin6_port); + + iparams.vlan = qedr_iw_get_vlan_ipv6(iparams.ip_addr); + } + rc = dev->ops->iwarp_create_listen(dev->rdma_ctx, &iparams, &oparams); + if (rc) + goto err; + + listener->qed_handle = oparams.handle; + cm_id->provider_data = listener; + return rc; + +err: + cm_id->rem_ref(cm_id); + kfree(listener); + return rc; +} + +int qedr_iw_destroy_listen(struct iw_cm_id *cm_id) +{ + struct qedr_iw_listener *listener = cm_id->provider_data; + struct qedr_dev *dev = get_qedr_dev(cm_id->device); + int rc = 0; + + if (listener->qed_handle) + rc = dev->ops->iwarp_destroy_listen(dev->rdma_ctx, + listener->qed_handle); + + cm_id->rem_ref(cm_id); + return rc; +} + +int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) +{ + struct qedr_iw_ep *ep = (struct qedr_iw_ep *)cm_id->provider_data; + struct qedr_dev *dev = ep->dev; + struct qedr_qp *qp; + struct qed_iwarp_accept_in params; + int rc; + + DP_DEBUG(dev, QEDR_MSG_IWARP, "Accept on qpid=%d\n", conn_param->qpn); + + qp = idr_find(&dev->qpidr, conn_param->qpn); + if (!qp) { + DP_ERR(dev, "Invalid QP number %d\n", conn_param->qpn); + return -EINVAL; + } + + ep->qp = qp; + qp->ep = ep; + cm_id->add_ref(cm_id); + ep->cm_id = cm_id; + + params.ep_context = ep->qed_context; + params.cb_context = ep; + params.qp = ep->qp->qed_qp; + params.private_data = conn_param->private_data; + params.private_data_len = conn_param->private_data_len; + params.ird = conn_param->ird; + params.ord = conn_param->ord; + + ep->during_connect = 1; + rc = dev->ops->iwarp_accept(dev->rdma_ctx, ¶ms); + if (rc) + goto err; + + return rc; +err: + ep->during_connect = 0; + cm_id->rem_ref(cm_id); + return rc; +} + +int qedr_iw_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) +{ + struct qedr_iw_ep *ep = (struct qedr_iw_ep *)cm_id->provider_data; + struct qedr_dev *dev = ep->dev; + struct qed_iwarp_reject_in params; + + params.ep_context = ep->qed_context; + params.cb_context = ep; + params.private_data = pdata; + params.private_data_len = pdata_len; + ep->qp = NULL; + + return dev->ops->iwarp_reject(dev->rdma_ctx, ¶ms); +} + void qedr_iw_qp_add_ref(struct ib_qp *ibqp) { struct qedr_qp *qp = get_qedr_qp(ibqp); diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.h b/drivers/infiniband/hw/qedr/qedr_iw_cm.h index ea6d17afbff2..08f4b1067e6c 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.h +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.h @@ -30,6 +30,18 @@ * SOFTWARE. */ #include + +int qedr_iw_connect(struct iw_cm_id *cm_id, + struct iw_cm_conn_param *conn_param); + +int qedr_iw_create_listen(struct iw_cm_id *cm_id, int backlog); + +int qedr_iw_destroy_listen(struct iw_cm_id *cm_id); + +int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); + +int qedr_iw_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len); + void qedr_iw_qp_add_ref(struct ib_qp *qp); void qedr_iw_qp_rem_ref(struct ib_qp *qp); diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 2d8d031ca713..1e8799c79d85 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -2257,6 +2257,23 @@ int qedr_destroy_qp(struct ib_qp *ibqp) /* Change the QP state to ERROR */ qedr_modify_qp(ibqp, &attr, attr_mask, NULL); } + } else { + /* Wait for the connect/accept to complete */ + if (qp->ep) { + int wait_count = 1; + + while (qp->ep->during_connect) { + DP_DEBUG(dev, QEDR_MSG_QP, + "Still in during connect/accept\n"); + + msleep(100); + if (wait_count++ > 200) { + DP_NOTICE(dev, + "during connect timeout\n"); + break; + } + } + } } if (qp->qp_type == IB_QPT_GSI) From 69ad0e7fe8452a6bc9b619e3f76a77f19d9687ab Mon Sep 17 00:00:00 2001 From: "Kalderon, Michal" Date: Wed, 26 Jul 2017 14:41:57 +0300 Subject: [PATCH 008/296] RDMA/qedr: Add support for iWARP in user space Pass the second doorbell offset to userspace in create_qp response. Pbl allocation is different for RoCE and iWARP and requires different handling. RoCE allocated the pbl and passes the pointer to qed, where-as in iWARP, qed allocates the pbl and qedr populates it after it returns Signed-off-by: Michal Kalderon Signed-off-by: Ram Amrani Signed-off-by: Ariel Elior Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/verbs.c | 80 +++++++++++++++++++++++------- 1 file changed, 63 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 1e8799c79d85..1c995c6da8d8 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -786,7 +786,8 @@ static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx, struct qedr_dev *dev, struct qedr_userq *q, u64 buf_addr, size_t buf_len, - int access, int dmasync) + int access, int dmasync, + int alloc_and_init) { u32 fw_pages; int rc; @@ -807,19 +808,25 @@ static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx, if (rc) goto err0; - q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL); - if (IS_ERR(q->pbl_tbl)) { - rc = PTR_ERR(q->pbl_tbl); - goto err0; - } - + if (alloc_and_init) { + q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL); + if (IS_ERR(q->pbl_tbl)) { + rc = PTR_ERR(q->pbl_tbl); + goto err0; + } qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info, FW_PAGE_SHIFT); + } else { + q->pbl_tbl = kzalloc(sizeof(*q->pbl_tbl), GFP_KERNEL); + if (!q->pbl_tbl) + goto err0; + } return 0; err0: ib_umem_release(q->umem); + q->umem = NULL; return rc; } @@ -945,7 +952,8 @@ struct ib_cq *qedr_create_cq(struct ib_device *ibdev, cq->cq_type = QEDR_CQ_TYPE_USER; rc = qedr_init_user_queue(ib_ctx, dev, &cq->q, ureq.addr, - ureq.len, IB_ACCESS_LOCAL_WRITE, 1); + ureq.len, IB_ACCESS_LOCAL_WRITE, + 1, 1); if (rc) goto err0; @@ -1238,18 +1246,34 @@ static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev, return 0; } -static void qedr_copy_rq_uresp(struct qedr_create_qp_uresp *uresp, +static void qedr_copy_rq_uresp(struct qedr_dev *dev, + struct qedr_create_qp_uresp *uresp, struct qedr_qp *qp) { - uresp->rq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD); + /* iWARP requires two doorbells per RQ. */ + if (rdma_protocol_iwarp(&dev->ibdev, 1)) { + uresp->rq_db_offset = + DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD); + uresp->rq_db2_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS); + } else { + uresp->rq_db_offset = + DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD); + } + uresp->rq_icid = qp->icid; } -static void qedr_copy_sq_uresp(struct qedr_create_qp_uresp *uresp, +static void qedr_copy_sq_uresp(struct qedr_dev *dev, + struct qedr_create_qp_uresp *uresp, struct qedr_qp *qp) { uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); - uresp->sq_icid = qp->icid + 1; + + /* iWARP uses the same cid for rq and sq */ + if (rdma_protocol_iwarp(&dev->ibdev, 1)) + uresp->sq_icid = qp->icid; + else + uresp->sq_icid = qp->icid + 1; } static int qedr_copy_qp_uresp(struct qedr_dev *dev, @@ -1259,8 +1283,8 @@ static int qedr_copy_qp_uresp(struct qedr_dev *dev, int rc; memset(&uresp, 0, sizeof(uresp)); - qedr_copy_sq_uresp(&uresp, qp); - qedr_copy_rq_uresp(&uresp, qp); + qedr_copy_sq_uresp(dev, &uresp, qp); + qedr_copy_rq_uresp(dev, &uresp, qp); uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE; uresp.qp_id = qp->qp_id; @@ -1378,6 +1402,25 @@ static void qedr_idr_remove(struct qedr_dev *dev, u32 id) idr_remove(&dev->qpidr, id); spin_unlock_irq(&dev->idr_lock); } + +static inline void +qedr_iwarp_populate_user_qp(struct qedr_dev *dev, + struct qedr_qp *qp, + struct qed_rdma_create_qp_out_params *out_params) +{ + qp->usq.pbl_tbl->va = out_params->sq_pbl_virt; + qp->usq.pbl_tbl->pa = out_params->sq_pbl_phys; + + qedr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl, + &qp->usq.pbl_info, FW_PAGE_SHIFT); + + qp->urq.pbl_tbl->va = out_params->rq_pbl_virt; + qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys; + + qedr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl, + &qp->urq.pbl_info, FW_PAGE_SHIFT); +} + static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp) { if (qp->usq.umem) @@ -1401,6 +1444,7 @@ static int qedr_create_user_qp(struct qedr_dev *dev, struct ib_ucontext *ib_ctx = NULL; struct qedr_ucontext *ctx = NULL; struct qedr_create_qp_ureq ureq; + int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1); int rc = -EINVAL; ib_ctx = ibpd->uobject->context; @@ -1415,14 +1459,13 @@ static int qedr_create_user_qp(struct qedr_dev *dev, /* SQ - read access only (0), dma sync not required (0) */ rc = qedr_init_user_queue(ib_ctx, dev, &qp->usq, ureq.sq_addr, - ureq.sq_len, 0, 0); + ureq.sq_len, 0, 0, alloc_and_init); if (rc) return rc; /* RQ - read access only (0), dma sync not required (0) */ rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr, - ureq.rq_len, 0, 0); - + ureq.rq_len, 0, 0, alloc_and_init); if (rc) return rc; @@ -1443,6 +1486,9 @@ static int qedr_create_user_qp(struct qedr_dev *dev, goto err1; } + if (rdma_protocol_iwarp(&dev->ibdev, 1)) + qedr_iwarp_populate_user_qp(dev, qp, &out_params); + qp->qp_id = out_params.qp_id; qp->icid = out_params.icid; From 89fd2576cba4bacd1a1a8738313a913d4ca95855 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 25 Aug 2017 11:18:39 +0300 Subject: [PATCH 009/296] RDMA/qedr: Missing error code in qedr_init_user_queue() We should return -ENOMEM if the kzalloc() fails. We currently return success. Fixes: 69ad0e7fe845 ("RDMA/qedr: Add support for iWARP in user space") Signed-off-by: Dan Carpenter Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/verbs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 1c995c6da8d8..249b560a32d7 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -818,8 +818,10 @@ static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx, FW_PAGE_SHIFT); } else { q->pbl_tbl = kzalloc(sizeof(*q->pbl_tbl), GFP_KERNEL); - if (!q->pbl_tbl) + if (!q->pbl_tbl) { + rc = -ENOMEM; goto err0; + } } return 0; From bd491d2ad3c071fb77f487a471aa66f205e2e7f2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 5 Sep 2017 16:59:20 +0200 Subject: [PATCH 010/296] RDMA/qedr: fix build error without ipv6 When CONFIG_IPV6 disabled, we run into a link error: drivers/infiniband/hw/qedr/qedr_iw_cm.o: In function `qedr_addr6_resolve.isra.3': qedr_iw_cm.c:(.text+0x4e0): undefined reference to `ip6_route_output_flags' The ipv6 handling code is obviously not needed here, so this adds a compile-time check for the Kconfig symbol in all three places in the code that decide between ipv4 and ipv6. We don't have to worry about a link error wtih QEDR=y/IPV6=m, as that configuration is already prohibited by CONFIG_INFINIBAND depending on "m || IPV6 != m". Fixes: e411e0587e0d ("RDMA/qedr: Add iWARP connection management functions") Signed-off-by: Arnd Bergmann Acked-by: Michal Kalderon Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/qedr_iw_cm.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index fe9b2b6149b0..2950d3f6ecb8 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -98,7 +98,8 @@ qedr_iw_mpa_request(void *context, struct qed_iwarp_cm_event_params *params) event.event = IW_CM_EVENT_CONNECT_REQUEST; event.status = params->status; - if (params->cm_info->ip_version == QED_TCP_IPV4) + if (!IS_ENABLED(CONFIG_IPV6) || + params->cm_info->ip_version == QED_TCP_IPV4) qedr_fill_sockaddr4(params->cm_info, &event); else qedr_fill_sockaddr6(params->cm_info, &event); @@ -522,7 +523,8 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) memset(cm_info->local_ip, 0, sizeof(cm_info->local_ip)); memset(cm_info->remote_ip, 0, sizeof(cm_info->remote_ip)); - if (cm_id->remote_addr.ss_family == AF_INET) { + if (!IS_ENABLED(CONFIG_IPV6) || + cm_id->remote_addr.ss_family == AF_INET) { cm_info->ip_version = QED_TCP_IPV4; cm_info->remote_ip[0] = ntohl(raddr->sin_addr.s_addr); @@ -616,7 +618,8 @@ int qedr_iw_create_listen(struct iw_cm_id *cm_id, int backlog) iparams.event_cb = qedr_iw_event_handler; iparams.max_backlog = backlog; - if (cm_id->local_addr.ss_family == AF_INET) { + if (!IS_ENABLED(CONFIG_IPV6) || + cm_id->local_addr.ss_family == AF_INET) { iparams.ip_version = QED_TCP_IPV4; memset(iparams.ip_addr, 0, sizeof(iparams.ip_addr)); From 08805fdbeb2d9300c09e681793518fb4da522235 Mon Sep 17 00:00:00 2001 From: "Wei Hu(Xavier)" Date: Wed, 30 Aug 2017 17:22:59 +0800 Subject: [PATCH 011/296] RDMA/hns: Split hw v1 driver from hns roce driver The hardware relevant definitions and operations are implemented in hns_roce_hw_v* file. According to the diversity chips, the file is named as hns_roce_hw_v1.c or hns_roce_hw_v2.c etc. The general software process flow, common structures and allocated algorithms are implemented in other files located in hns roce driver. In order to support the scalability of the hardware version, the common driver features are in the hns-roce.ko, and the hardware relevant operations are in hns_roce_hw_v1.ko or hns_roce_hw_v2.ko based on the series chips. Signed-off-by: Lijun Ou Signed-off-by: Shaobo Xu Signed-off-by: Wei Hu (Xavier) Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/Kconfig | 15 +- drivers/infiniband/hw/hns/Makefile | 4 +- drivers/infiniband/hw/hns/hns_roce_alloc.c | 2 + drivers/infiniband/hw/hns/hns_roce_cmd.c | 3 + drivers/infiniband/hw/hns/hns_roce_cq.c | 3 + drivers/infiniband/hw/hns/hns_roce_device.h | 6 +- drivers/infiniband/hw/hns/hns_roce_hem.c | 1 + drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 244 ++++++++++++++++- drivers/infiniband/hw/hns/hns_roce_main.c | 282 +++----------------- drivers/infiniband/hw/hns/hns_roce_mr.c | 3 + drivers/infiniband/hw/hns/hns_roce_pd.c | 2 + drivers/infiniband/hw/hns/hns_roce_qp.c | 12 + 12 files changed, 330 insertions(+), 247 deletions(-) diff --git a/drivers/infiniband/hw/hns/Kconfig b/drivers/infiniband/hw/hns/Kconfig index 61c93bbd230d..37e33e522c19 100644 --- a/drivers/infiniband/hw/hns/Kconfig +++ b/drivers/infiniband/hw/hns/Kconfig @@ -1,10 +1,21 @@ config INFINIBAND_HNS tristate "HNS RoCE Driver" depends on NET_VENDOR_HISILICON - depends on (ARM64 || (COMPILE_TEST && 64BIT)) && HNS && HNS_DSAF && HNS_ENET + depends on ARM64 || (COMPILE_TEST && 64BIT) ---help--- This is a RoCE/RDMA driver for the Hisilicon RoCE engine. The engine - is used in Hisilicon Hi1610 and more further ICT SoC. + is used in Hisilicon Hip06 and more further ICT SoC based on + platform device. To compile this driver as a module, choose M here: the module will be called hns-roce. + +config INFINIBAND_HNS_HIP06 + tristate "Hisilicon Hip06 Family RoCE support" + depends on INFINIBAND_HNS && HNS && HNS_DSAF && HNS_ENET + ---help--- + RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip06 and + Hip07 SoC. These RoCE engines are platform devices. + + To compile this driver as a module, choose M here: the module + will be called hns-roce-hw-v1. diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile index 7e8ebd24dcae..806ac4e678bd 100644 --- a/drivers/infiniband/hw/hns/Makefile +++ b/drivers/infiniband/hw/hns/Makefile @@ -5,4 +5,6 @@ obj-$(CONFIG_INFINIBAND_HNS) += hns-roce.o hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_eq.o hns_roce_pd.o \ hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \ - hns_roce_cq.o hns_roce_alloc.o hns_roce_hw_v1.o + hns_roce_cq.o hns_roce_alloc.o +obj-$(CONFIG_INFINIBAND_HNS_HIP06) += hns-roce-hw-v1.o +hns-roce-hw-v1-objs := hns_roce_hw_v1.o diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index e1b433cdd5e2..71a5098579c3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -67,6 +67,7 @@ void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj, { hns_roce_bitmap_free_range(bitmap, obj, 1, rr); } +EXPORT_SYMBOL_GPL(hns_roce_bitmap_free); int hns_roce_bitmap_alloc_range(struct hns_roce_bitmap *bitmap, int cnt, int align, unsigned long *obj) @@ -177,6 +178,7 @@ void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size, kfree(buf->page_list); } } +EXPORT_SYMBOL_GPL(hns_roce_buf_free); int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, struct hns_roce_buf *buf) diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c index b94dcd823ad1..c981d9db5942 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.c +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c @@ -269,6 +269,7 @@ int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, in_modifier, op_modifier, op, timeout); } +EXPORT_SYMBOL_GPL(hns_roce_cmd_mbox); int hns_roce_cmd_init(struct hns_roce_dev *hr_dev) { @@ -356,6 +357,7 @@ struct hns_roce_cmd_mailbox return mailbox; } +EXPORT_SYMBOL_GPL(hns_roce_alloc_cmd_mailbox); void hns_roce_free_cmd_mailbox(struct hns_roce_dev *hr_dev, struct hns_roce_cmd_mailbox *mailbox) @@ -366,3 +368,4 @@ void hns_roce_free_cmd_mailbox(struct hns_roce_dev *hr_dev, dma_pool_free(hr_dev->cmd.pool, mailbox->buf, mailbox->dma); kfree(mailbox); } +EXPORT_SYMBOL_GPL(hns_roce_free_cmd_mailbox); diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index b89fd711019e..1d57df7a8967 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -205,6 +205,7 @@ void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn); hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR); } +EXPORT_SYMBOL_GPL(hns_roce_free_cq); static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev, struct ib_ucontext *context, @@ -385,6 +386,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, kfree(hr_cq); return ERR_PTR(ret); } +EXPORT_SYMBOL_GPL(hns_roce_ib_create_cq); int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq) { @@ -410,6 +412,7 @@ int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq) return ret; } +EXPORT_SYMBOL_GPL(hns_roce_ib_destroy_cq); void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn) { diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index e493a61e14e1..fc2b82d5de70 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -571,7 +571,7 @@ struct hns_roce_dev { int loop_idc; dma_addr_t tptr_dma_addr; /*only for hw v1*/ u32 tptr_size; /*only for hw v1*/ - struct hns_roce_hw *hw; + const struct hns_roce_hw *hw; }; static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev) @@ -749,7 +749,7 @@ void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn); void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type); void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type); int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index); - -extern struct hns_roce_hw hns_roce_hw_v1; +int hns_roce_init(struct hns_roce_dev *hr_dev); +void hns_roce_exit(struct hns_roce_dev *hr_dev); #endif /* _HNS_ROCE_DEVICE_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index c5104e0b2916..06aed4a93d68 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -314,6 +314,7 @@ void *hns_roce_table_find(struct hns_roce_hem_table *table, unsigned long obj, mutex_unlock(&table->mutex); return page ? lowmem_page_address(page) + offset : NULL; } +EXPORT_SYMBOL_GPL(hns_roce_table_find); int hns_roce_table_get_range(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 747efd1ae5a6..917d00347e2d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include "hns_roce_common.h" #include "hns_roce_device.h" @@ -3843,7 +3844,7 @@ int hns_roce_v1_destroy_cq(struct ib_cq *ibcq) struct hns_roce_v1_priv hr_v1_priv; -struct hns_roce_hw hns_roce_hw_v1 = { +static const struct hns_roce_hw hns_roce_hw_v1 = { .reset = hns_roce_v1_reset, .hw_profile = hns_roce_v1_profile, .hw_init = hns_roce_v1_init, @@ -3865,3 +3866,244 @@ struct hns_roce_hw hns_roce_hw_v1 = { .destroy_cq = hns_roce_v1_destroy_cq, .priv = &hr_v1_priv, }; + +static const struct of_device_id hns_roce_of_match[] = { + { .compatible = "hisilicon,hns-roce-v1", .data = &hns_roce_hw_v1, }, + {}, +}; +MODULE_DEVICE_TABLE(of, hns_roce_of_match); + +static const struct acpi_device_id hns_roce_acpi_match[] = { + { "HISI00D1", (kernel_ulong_t)&hns_roce_hw_v1 }, + {}, +}; +MODULE_DEVICE_TABLE(acpi, hns_roce_acpi_match); + +static int hns_roce_node_match(struct device *dev, void *fwnode) +{ + return dev->fwnode == fwnode; +} + +static struct +platform_device *hns_roce_find_pdev(struct fwnode_handle *fwnode) +{ + struct device *dev; + + /* get the 'device' corresponding to the matching 'fwnode' */ + dev = bus_find_device(&platform_bus_type, NULL, + fwnode, hns_roce_node_match); + /* get the platform device */ + return dev ? to_platform_device(dev) : NULL; +} + +static int hns_roce_get_cfg(struct hns_roce_dev *hr_dev) +{ + struct device *dev = &hr_dev->pdev->dev; + struct platform_device *pdev = NULL; + struct net_device *netdev = NULL; + struct device_node *net_node; + struct resource *res; + int port_cnt = 0; + u8 phy_port; + int ret; + int i; + + /* check if we are compatible with the underlying SoC */ + if (dev_of_node(dev)) { + const struct of_device_id *of_id; + + of_id = of_match_node(hns_roce_of_match, dev->of_node); + if (!of_id) { + dev_err(dev, "device is not compatible!\n"); + return -ENXIO; + } + hr_dev->hw = (const struct hns_roce_hw *)of_id->data; + if (!hr_dev->hw) { + dev_err(dev, "couldn't get H/W specific DT data!\n"); + return -ENXIO; + } + } else if (is_acpi_device_node(dev->fwnode)) { + const struct acpi_device_id *acpi_id; + + acpi_id = acpi_match_device(hns_roce_acpi_match, dev); + if (!acpi_id) { + dev_err(dev, "device is not compatible!\n"); + return -ENXIO; + } + hr_dev->hw = (const struct hns_roce_hw *) acpi_id->driver_data; + if (!hr_dev->hw) { + dev_err(dev, "couldn't get H/W specific ACPI data!\n"); + return -ENXIO; + } + } else { + dev_err(dev, "can't read compatibility data from DT or ACPI\n"); + return -ENXIO; + } + + /* get the mapped register base address */ + res = platform_get_resource(hr_dev->pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(dev, "memory resource not found!\n"); + return -EINVAL; + } + hr_dev->reg_base = devm_ioremap_resource(dev, res); + if (IS_ERR(hr_dev->reg_base)) + return PTR_ERR(hr_dev->reg_base); + + /* read the node_guid of IB device from the DT or ACPI */ + ret = device_property_read_u8_array(dev, "node-guid", + (u8 *)&hr_dev->ib_dev.node_guid, + GUID_LEN); + if (ret) { + dev_err(dev, "couldn't get node_guid from DT or ACPI!\n"); + return ret; + } + + /* get the RoCE associated ethernet ports or netdevices */ + for (i = 0; i < HNS_ROCE_MAX_PORTS; i++) { + if (dev_of_node(dev)) { + net_node = of_parse_phandle(dev->of_node, "eth-handle", + i); + if (!net_node) + continue; + pdev = of_find_device_by_node(net_node); + } else if (is_acpi_device_node(dev->fwnode)) { + struct acpi_reference_args args; + struct fwnode_handle *fwnode; + + ret = acpi_node_get_property_reference(dev->fwnode, + "eth-handle", + i, &args); + if (ret) + continue; + fwnode = acpi_fwnode_handle(args.adev); + pdev = hns_roce_find_pdev(fwnode); + } else { + dev_err(dev, "cannot read data from DT or ACPI\n"); + return -ENXIO; + } + + if (pdev) { + netdev = platform_get_drvdata(pdev); + phy_port = (u8)i; + if (netdev) { + hr_dev->iboe.netdevs[port_cnt] = netdev; + hr_dev->iboe.phy_port[port_cnt] = phy_port; + } else { + dev_err(dev, "no netdev found with pdev %s\n", + pdev->name); + return -ENODEV; + } + port_cnt++; + } + } + + if (port_cnt == 0) { + dev_err(dev, "unable to get eth-handle for available ports!\n"); + return -EINVAL; + } + + hr_dev->caps.num_ports = port_cnt; + + /* cmd issue mode: 0 is poll, 1 is event */ + hr_dev->cmd_mod = 1; + hr_dev->loop_idc = 0; + + /* read the interrupt names from the DT or ACPI */ + ret = device_property_read_string_array(dev, "interrupt-names", + hr_dev->irq_names, + HNS_ROCE_MAX_IRQ_NUM); + if (ret < 0) { + dev_err(dev, "couldn't get interrupt names from DT or ACPI!\n"); + return ret; + } + + /* fetch the interrupt numbers */ + for (i = 0; i < HNS_ROCE_MAX_IRQ_NUM; i++) { + hr_dev->irq[i] = platform_get_irq(hr_dev->pdev, i); + if (hr_dev->irq[i] <= 0) { + dev_err(dev, "platform get of irq[=%d] failed!\n", i); + return -EINVAL; + } + } + + return 0; +} + +/** + * hns_roce_probe - RoCE driver entrance + * @pdev: pointer to platform device + * Return : int + * + */ +static int hns_roce_probe(struct platform_device *pdev) +{ + int ret; + struct hns_roce_dev *hr_dev; + struct device *dev = &pdev->dev; + + hr_dev = (struct hns_roce_dev *)ib_alloc_device(sizeof(*hr_dev)); + if (!hr_dev) + return -ENOMEM; + + hr_dev->pdev = pdev; + platform_set_drvdata(pdev, hr_dev); + + if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64ULL)) && + dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32ULL))) { + dev_err(dev, "Not usable DMA addressing mode\n"); + ret = -EIO; + goto error_failed_get_cfg; + } + + ret = hns_roce_get_cfg(hr_dev); + if (ret) { + dev_err(dev, "Get Configuration failed!\n"); + goto error_failed_get_cfg; + } + + ret = hns_roce_init(hr_dev); + if (ret) { + dev_err(dev, "RoCE engine init failed!\n"); + goto error_failed_get_cfg; + } + + return 0; + +error_failed_get_cfg: + ib_dealloc_device(&hr_dev->ib_dev); + + return ret; +} + +/** + * hns_roce_remove - remove RoCE device + * @pdev: pointer to platform device + */ +static int hns_roce_remove(struct platform_device *pdev) +{ + struct hns_roce_dev *hr_dev = platform_get_drvdata(pdev); + + hns_roce_exit(hr_dev); + ib_dealloc_device(&hr_dev->ib_dev); + + return 0; +} + +static struct platform_driver hns_roce_driver = { + .probe = hns_roce_probe, + .remove = hns_roce_remove, + .driver = { + .name = DRV_NAME, + .of_match_table = hns_roce_of_match, + .acpi_match_table = ACPI_PTR(hns_roce_acpi_match), + }, +}; + +module_platform_driver(hns_roce_driver); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Wei Hu "); +MODULE_AUTHOR("Nenglong Zhao "); +MODULE_AUTHOR("Lijun Ou "); +MODULE_DESCRIPTION("Hisilicon Hip06 Family RoCE Driver"); diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index d9777b662eba..7afa0c8a6b64 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -57,6 +57,7 @@ int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index) { return gid_index * hr_dev->caps.num_ports + port; } +EXPORT_SYMBOL_GPL(hns_get_gid_index); static void hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr) { @@ -174,8 +175,9 @@ static int hns_roce_setup_mtu_mac(struct hns_roce_dev *hr_dev) u8 i; for (i = 0; i < hr_dev->caps.num_ports; i++) { - hr_dev->hw->set_mtu(hr_dev, hr_dev->iboe.phy_port[i], - hr_dev->caps.max_mtu); + if (hr_dev->hw->set_mtu) + hr_dev->hw->set_mtu(hr_dev, hr_dev->iboe.phy_port[i], + hr_dev->caps.max_mtu); hns_roce_set_mac(hr_dev, i, hr_dev->iboe.netdevs[i]->dev_addr); } @@ -531,169 +533,6 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) return ret; } -static const struct of_device_id hns_roce_of_match[] = { - { .compatible = "hisilicon,hns-roce-v1", .data = &hns_roce_hw_v1, }, - {}, -}; -MODULE_DEVICE_TABLE(of, hns_roce_of_match); - -static const struct acpi_device_id hns_roce_acpi_match[] = { - { "HISI00D1", (kernel_ulong_t)&hns_roce_hw_v1 }, - {}, -}; -MODULE_DEVICE_TABLE(acpi, hns_roce_acpi_match); - -static int hns_roce_node_match(struct device *dev, void *fwnode) -{ - return dev->fwnode == fwnode; -} - -static struct -platform_device *hns_roce_find_pdev(struct fwnode_handle *fwnode) -{ - struct device *dev; - - /* get the 'device'corresponding to matching 'fwnode' */ - dev = bus_find_device(&platform_bus_type, NULL, - fwnode, hns_roce_node_match); - /* get the platform device */ - return dev ? to_platform_device(dev) : NULL; -} - -static int hns_roce_get_cfg(struct hns_roce_dev *hr_dev) -{ - int i; - int ret; - u8 phy_port; - int port_cnt = 0; - struct device *dev = &hr_dev->pdev->dev; - struct device_node *net_node; - struct net_device *netdev = NULL; - struct platform_device *pdev = NULL; - struct resource *res; - - /* check if we are compatible with the underlying SoC */ - if (dev_of_node(dev)) { - const struct of_device_id *of_id; - - of_id = of_match_node(hns_roce_of_match, dev->of_node); - if (!of_id) { - dev_err(dev, "device is not compatible!\n"); - return -ENXIO; - } - hr_dev->hw = (struct hns_roce_hw *)of_id->data; - if (!hr_dev->hw) { - dev_err(dev, "couldn't get H/W specific DT data!\n"); - return -ENXIO; - } - } else if (is_acpi_device_node(dev->fwnode)) { - const struct acpi_device_id *acpi_id; - - acpi_id = acpi_match_device(hns_roce_acpi_match, dev); - if (!acpi_id) { - dev_err(dev, "device is not compatible!\n"); - return -ENXIO; - } - hr_dev->hw = (struct hns_roce_hw *) acpi_id->driver_data; - if (!hr_dev->hw) { - dev_err(dev, "couldn't get H/W specific ACPI data!\n"); - return -ENXIO; - } - } else { - dev_err(dev, "can't read compatibility data from DT or ACPI\n"); - return -ENXIO; - } - - /* get the mapped register base address */ - res = platform_get_resource(hr_dev->pdev, IORESOURCE_MEM, 0); - if (!res) { - dev_err(dev, "memory resource not found!\n"); - return -EINVAL; - } - hr_dev->reg_base = devm_ioremap_resource(dev, res); - if (IS_ERR(hr_dev->reg_base)) - return PTR_ERR(hr_dev->reg_base); - - /* read the node_guid of IB device from the DT or ACPI */ - ret = device_property_read_u8_array(dev, "node-guid", - (u8 *)&hr_dev->ib_dev.node_guid, - GUID_LEN); - if (ret) { - dev_err(dev, "couldn't get node_guid from DT or ACPI!\n"); - return ret; - } - - /* get the RoCE associated ethernet ports or netdevices */ - for (i = 0; i < HNS_ROCE_MAX_PORTS; i++) { - if (dev_of_node(dev)) { - net_node = of_parse_phandle(dev->of_node, "eth-handle", - i); - if (!net_node) - continue; - pdev = of_find_device_by_node(net_node); - } else if (is_acpi_device_node(dev->fwnode)) { - struct acpi_reference_args args; - struct fwnode_handle *fwnode; - - ret = acpi_node_get_property_reference(dev->fwnode, - "eth-handle", - i, &args); - if (ret) - continue; - fwnode = acpi_fwnode_handle(args.adev); - pdev = hns_roce_find_pdev(fwnode); - } else { - dev_err(dev, "cannot read data from DT or ACPI\n"); - return -ENXIO; - } - - if (pdev) { - netdev = platform_get_drvdata(pdev); - phy_port = (u8)i; - if (netdev) { - hr_dev->iboe.netdevs[port_cnt] = netdev; - hr_dev->iboe.phy_port[port_cnt] = phy_port; - } else { - dev_err(dev, "no netdev found with pdev %s\n", - pdev->name); - return -ENODEV; - } - port_cnt++; - } - } - - if (port_cnt == 0) { - dev_err(dev, "unable to get eth-handle for available ports!\n"); - return -EINVAL; - } - - hr_dev->caps.num_ports = port_cnt; - - /* cmd issue mode: 0 is poll, 1 is event */ - hr_dev->cmd_mod = 1; - hr_dev->loop_idc = 0; - - /* read the interrupt names from the DT or ACPI */ - ret = device_property_read_string_array(dev, "interrupt-names", - hr_dev->irq_names, - HNS_ROCE_MAX_IRQ_NUM); - if (ret < 0) { - dev_err(dev, "couldn't get interrupt names from DT or ACPI!\n"); - return ret; - } - - /* fetch the interrupt numbers */ - for (i = 0; i < HNS_ROCE_MAX_IRQ_NUM; i++) { - hr_dev->irq[i] = platform_get_irq(hr_dev->pdev, i); - if (hr_dev->irq[i] <= 0) { - dev_err(dev, "platform get of irq[=%d] failed!\n", i); - return -EINVAL; - } - } - - return 0; -} - static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) { int ret; @@ -826,42 +665,17 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) return ret; } -/** - * hns_roce_probe - RoCE driver entrance - * @pdev: pointer to platform device - * Return : int - * - */ -static int hns_roce_probe(struct platform_device *pdev) +int hns_roce_init(struct hns_roce_dev *hr_dev) { int ret; - struct hns_roce_dev *hr_dev; - struct device *dev = &pdev->dev; + struct device *dev = &hr_dev->pdev->dev; - hr_dev = (struct hns_roce_dev *)ib_alloc_device(sizeof(*hr_dev)); - if (!hr_dev) - return -ENOMEM; - - hr_dev->pdev = pdev; - platform_set_drvdata(pdev, hr_dev); - - if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64ULL)) && - dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32ULL))) { - dev_err(dev, "Not usable DMA addressing mode\n"); - ret = -EIO; - goto error_failed_get_cfg; - } - - ret = hns_roce_get_cfg(hr_dev); - if (ret) { - dev_err(dev, "Get Configuration failed!\n"); - goto error_failed_get_cfg; - } - - ret = hr_dev->hw->reset(hr_dev, true); - if (ret) { - dev_err(dev, "Reset RoCE engine failed!\n"); - goto error_failed_get_cfg; + if (hr_dev->hw->reset) { + ret = hr_dev->hw->reset(hr_dev, true); + if (ret) { + dev_err(dev, "Reset RoCE engine failed!\n"); + return ret; + } } hr_dev->hw->hw_profile(hr_dev); @@ -872,10 +686,12 @@ static int hns_roce_probe(struct platform_device *pdev) goto error_failed_cmd_init; } - ret = hns_roce_init_eq_table(hr_dev); - if (ret) { - dev_err(dev, "eq init failed!\n"); - goto error_failed_eq_table; + if (hr_dev->cmd_mod) { + ret = hns_roce_init_eq_table(hr_dev); + if (ret) { + dev_err(dev, "eq init failed!\n"); + goto error_failed_eq_table; + } } if (hr_dev->cmd_mod) { @@ -898,10 +714,12 @@ static int hns_roce_probe(struct platform_device *pdev) goto error_failed_setup_hca; } - ret = hr_dev->hw->hw_init(hr_dev); - if (ret) { - dev_err(dev, "hw_init failed!\n"); - goto error_failed_engine_init; + if (hr_dev->hw->hw_init) { + ret = hr_dev->hw->hw_init(hr_dev); + if (ret) { + dev_err(dev, "hw_init failed!\n"); + goto error_failed_engine_init; + } } ret = hns_roce_register_device(hr_dev); @@ -911,7 +729,8 @@ static int hns_roce_probe(struct platform_device *pdev) return 0; error_failed_register_device: - hr_dev->hw->hw_exit(hr_dev); + if (hr_dev->hw->hw_exit) + hr_dev->hw->hw_exit(hr_dev); error_failed_engine_init: hns_roce_cleanup_bitmap(hr_dev); @@ -924,58 +743,41 @@ static int hns_roce_probe(struct platform_device *pdev) hns_roce_cmd_use_polling(hr_dev); error_failed_use_event: - hns_roce_cleanup_eq_table(hr_dev); + if (hr_dev->cmd_mod) + hns_roce_cleanup_eq_table(hr_dev); error_failed_eq_table: hns_roce_cmd_cleanup(hr_dev); error_failed_cmd_init: - ret = hr_dev->hw->reset(hr_dev, false); - if (ret) - dev_err(&hr_dev->pdev->dev, "roce_engine reset fail\n"); - -error_failed_get_cfg: - ib_dealloc_device(&hr_dev->ib_dev); + if (hr_dev->hw->reset) { + ret = hr_dev->hw->reset(hr_dev, false); + if (ret) + dev_err(dev, "Dereset RoCE engine failed!\n"); + } return ret; } +EXPORT_SYMBOL_GPL(hns_roce_init); -/** - * hns_roce_remove - remove RoCE device - * @pdev: pointer to platform device - */ -static int hns_roce_remove(struct platform_device *pdev) +void hns_roce_exit(struct hns_roce_dev *hr_dev) { - struct hns_roce_dev *hr_dev = platform_get_drvdata(pdev); - hns_roce_unregister_device(hr_dev); - hr_dev->hw->hw_exit(hr_dev); + if (hr_dev->hw->hw_exit) + hr_dev->hw->hw_exit(hr_dev); hns_roce_cleanup_bitmap(hr_dev); hns_roce_cleanup_hem(hr_dev); if (hr_dev->cmd_mod) hns_roce_cmd_use_polling(hr_dev); - hns_roce_cleanup_eq_table(hr_dev); + if (hr_dev->cmd_mod) + hns_roce_cleanup_eq_table(hr_dev); hns_roce_cmd_cleanup(hr_dev); - hr_dev->hw->reset(hr_dev, false); - - ib_dealloc_device(&hr_dev->ib_dev); - - return 0; + if (hr_dev->hw->reset) + hr_dev->hw->reset(hr_dev, false); } - -static struct platform_driver hns_roce_driver = { - .probe = hns_roce_probe, - .remove = hns_roce_remove, - .driver = { - .name = DRV_NAME, - .of_match_table = hns_roce_of_match, - .acpi_match_table = ACPI_PTR(hns_roce_acpi_match), - }, -}; - -module_platform_driver(hns_roce_driver); +EXPORT_SYMBOL_GPL(hns_roce_exit); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Wei Hu "); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index e387360e3780..ed3571385d95 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -47,6 +47,7 @@ unsigned long key_to_hw_index(u32 key) { return (key << 24) | (key >> 8); } +EXPORT_SYMBOL_GPL(key_to_hw_index); static int hns_roce_sw2hw_mpt(struct hns_roce_dev *hr_dev, struct hns_roce_cmd_mailbox *mailbox, @@ -65,6 +66,7 @@ int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev, mpt_index, !mailbox, HNS_ROCE_CMD_HW2SW_MPT, HNS_ROCE_CMD_TIMEOUT_MSECS); } +EXPORT_SYMBOL_GPL(hns_roce_hw2sw_mpt); static int hns_roce_buddy_alloc(struct hns_roce_buddy *buddy, int order, unsigned long *seg) @@ -233,6 +235,7 @@ void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt) hns_roce_table_put_range(hr_dev, &mr_table->mtt_table, mtt->first_seg, mtt->first_seg + (1 << mtt->order) - 1); } +EXPORT_SYMBOL_GPL(hns_roce_mtt_cleanup); static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova, u64 size, u32 access, int npages, diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index a64500fa1145..ffbf249d0885 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -86,6 +86,7 @@ struct ib_pd *hns_roce_alloc_pd(struct ib_device *ib_dev, return &pd->ibpd; } +EXPORT_SYMBOL_GPL(hns_roce_alloc_pd); int hns_roce_dealloc_pd(struct ib_pd *pd) { @@ -94,6 +95,7 @@ int hns_roce_dealloc_pd(struct ib_pd *pd) return 0; } +EXPORT_SYMBOL_GPL(hns_roce_dealloc_pd); int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar) { diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index f5dd21c2d275..ad13d05da945 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -136,6 +136,7 @@ enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state) return HNS_ROCE_QP_NUM_STATE; } } +EXPORT_SYMBOL_GPL(to_hns_roce_state); static int hns_roce_gsi_qp_alloc(struct hns_roce_dev *hr_dev, unsigned long qpn, struct hns_roce_qp *hr_qp) @@ -227,6 +228,7 @@ void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) hr_qp->qpn & (hr_dev->caps.num_qps - 1)); spin_unlock_irqrestore(&qp_table->lock, flags); } +EXPORT_SYMBOL_GPL(hns_roce_qp_remove); void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { @@ -241,6 +243,7 @@ void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) hns_roce_table_put(hr_dev, &qp_table->qp_table, hr_qp->qpn); } } +EXPORT_SYMBOL_GPL(hns_roce_qp_free); void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn, int cnt) @@ -252,6 +255,7 @@ void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn, hns_roce_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt, BITMAP_RR); } +EXPORT_SYMBOL_GPL(hns_roce_release_range_qp); static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, int is_user, int has_srq, @@ -629,6 +633,7 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, return &hr_qp->ibqp; } +EXPORT_SYMBOL_GPL(hns_roce_create_qp); int to_hr_qp_type(int qp_type) { @@ -647,6 +652,7 @@ int to_hr_qp_type(int qp_type) return transport_type; } +EXPORT_SYMBOL_GPL(to_hr_qp_type); int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) @@ -745,6 +751,7 @@ void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq) spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING); } } +EXPORT_SYMBOL_GPL(hns_roce_lock_cqs); void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq) __releases(&send_cq->lock) @@ -761,6 +768,7 @@ void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq, spin_unlock_irq(&recv_cq->lock); } } +EXPORT_SYMBOL_GPL(hns_roce_unlock_cqs); __be32 send_ieth(struct ib_send_wr *wr) { @@ -774,6 +782,7 @@ __be32 send_ieth(struct ib_send_wr *wr) return 0; } } +EXPORT_SYMBOL_GPL(send_ieth); static void *get_wqe(struct hns_roce_qp *hr_qp, int offset) { @@ -785,11 +794,13 @@ void *get_recv_wqe(struct hns_roce_qp *hr_qp, int n) { return get_wqe(hr_qp, hr_qp->rq.offset + (n << hr_qp->rq.wqe_shift)); } +EXPORT_SYMBOL_GPL(get_recv_wqe); void *get_send_wqe(struct hns_roce_qp *hr_qp, int n) { return get_wqe(hr_qp, hr_qp->sq.offset + (n << hr_qp->sq.wqe_shift)); } +EXPORT_SYMBOL_GPL(get_send_wqe); bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, struct ib_cq *ib_cq) @@ -808,6 +819,7 @@ bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, return cur + nreq >= hr_wq->max_post; } +EXPORT_SYMBOL_GPL(hns_roce_wq_overflow); int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev) { From 016a0059d29d93aa5de4eb7bd535da672ac89ae1 Mon Sep 17 00:00:00 2001 From: "Wei Hu(Xavier)" Date: Wed, 30 Aug 2017 17:23:00 +0800 Subject: [PATCH 012/296] RDMA/hns: Move priv in order to add multiple hns_roce support Move the data member called priv from hns_roce_hw to hns_roce_dev structure in order to support multiple hns_roce devices in one system at the same time. For example, there are two hip06 engines in the system. Signed-off-by: Lijun Ou Signed-off-by: Shaobo Xu Signed-off-by: Wei Hu (Xavier) Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_device.h | 2 +- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 61 ++++++++++++--------- 2 files changed, 35 insertions(+), 28 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index fc2b82d5de70..aa9281e66b8b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -535,7 +535,6 @@ struct hns_roce_hw { int (*poll_cq)(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int (*dereg_mr)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr); int (*destroy_cq)(struct ib_cq *ibcq); - void *priv; }; struct hns_roce_dev { @@ -572,6 +571,7 @@ struct hns_roce_dev { dma_addr_t tptr_dma_addr; /*only for hw v1*/ u32 tptr_size; /*only for hw v1*/ const struct hns_roce_hw *hw; + void *priv; }; static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 917d00347e2d..e4d13a93ef63 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -473,7 +473,7 @@ static void hns_roce_set_sdb_ext(struct hns_roce_dev *hr_dev, u32 ext_sdb_alept, dma_addr_t sdb_dma_addr; u32 val; - priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + priv = (struct hns_roce_v1_priv *)hr_dev->priv; db = &priv->db_table; /* Configure extend SDB threshold */ @@ -512,7 +512,7 @@ static void hns_roce_set_odb_ext(struct hns_roce_dev *hr_dev, u32 ext_odb_alept, dma_addr_t odb_dma_addr; u32 val; - priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + priv = (struct hns_roce_v1_priv *)hr_dev->priv; db = &priv->db_table; /* Configure extend ODB threshold */ @@ -548,7 +548,7 @@ static int hns_roce_db_ext_init(struct hns_roce_dev *hr_dev, u32 sdb_ext_mod, dma_addr_t odb_dma_addr; int ret = 0; - priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + priv = (struct hns_roce_v1_priv *)hr_dev->priv; db = &priv->db_table; db->ext_db = kmalloc(sizeof(*db->ext_db), GFP_KERNEL); @@ -669,7 +669,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) u8 port = 0; u8 sl; - priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + priv = (struct hns_roce_v1_priv *)hr_dev->priv; free_mr = &priv->free_mr; /* Reserved cq for loop qp */ @@ -817,7 +817,7 @@ static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev) int ret; int i; - priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + priv = (struct hns_roce_v1_priv *)hr_dev->priv; free_mr = &priv->free_mr; for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) { @@ -851,7 +851,7 @@ static int hns_roce_db_init(struct hns_roce_dev *hr_dev) u32 odb_evt_mod; int ret = 0; - priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + priv = (struct hns_roce_v1_priv *)hr_dev->priv; db = &priv->db_table; memset(db, 0, sizeof(*db)); @@ -907,7 +907,7 @@ static int hns_roce_v1_recreate_lp_qp(struct hns_roce_dev *hr_dev) unsigned long end = msecs_to_jiffies(HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS) + jiffies; - priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + priv = (struct hns_roce_v1_priv *)hr_dev->priv; free_mr = &priv->free_mr; lp_qp_work = kzalloc(sizeof(struct hns_roce_recreate_lp_qp_work), @@ -983,7 +983,7 @@ static void hns_roce_v1_mr_free_work_fn(struct work_struct *work) hr_dev = to_hr_dev(mr_work->ib_dev); dev = &hr_dev->pdev->dev; - priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + priv = (struct hns_roce_v1_priv *)hr_dev->priv; free_mr = &priv->free_mr; mr_free_cq = free_mr->mr_free_cq; @@ -1039,7 +1039,7 @@ int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) int npages; int ret = 0; - priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + priv = (struct hns_roce_v1_priv *)hr_dev->priv; free_mr = &priv->free_mr; if (mr->enabled) { @@ -1104,7 +1104,7 @@ static void hns_roce_db_free(struct hns_roce_dev *hr_dev) struct hns_roce_v1_priv *priv; struct hns_roce_db_table *db; - priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + priv = (struct hns_roce_v1_priv *)hr_dev->priv; db = &priv->db_table; if (db->sdb_ext_mod) { @@ -1134,7 +1134,7 @@ static int hns_roce_raq_init(struct hns_roce_dev *hr_dev) struct hns_roce_raq_table *raq; struct device *dev = &hr_dev->pdev->dev; - priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + priv = (struct hns_roce_v1_priv *)hr_dev->priv; raq = &priv->raq_table; raq->e_raq_buf = kzalloc(sizeof(*(raq->e_raq_buf)), GFP_KERNEL); @@ -1211,7 +1211,7 @@ static void hns_roce_raq_free(struct hns_roce_dev *hr_dev) struct hns_roce_v1_priv *priv; struct hns_roce_raq_table *raq; - priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + priv = (struct hns_roce_v1_priv *)hr_dev->priv; raq = &priv->raq_table; dma_free_coherent(dev, HNS_ROCE_V1_RAQ_SIZE, raq->e_raq_buf->buf, @@ -1245,7 +1245,7 @@ static int hns_roce_bt_init(struct hns_roce_dev *hr_dev) struct hns_roce_v1_priv *priv; int ret; - priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + priv = (struct hns_roce_v1_priv *)hr_dev->priv; priv->bt_table.qpc_buf.buf = dma_alloc_coherent(dev, HNS_ROCE_BT_RSV_BUF_SIZE, &priv->bt_table.qpc_buf.map, @@ -1287,7 +1287,7 @@ static void hns_roce_bt_free(struct hns_roce_dev *hr_dev) struct device *dev = &hr_dev->pdev->dev; struct hns_roce_v1_priv *priv; - priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + priv = (struct hns_roce_v1_priv *)hr_dev->priv; dma_free_coherent(dev, HNS_ROCE_BT_RSV_BUF_SIZE, priv->bt_table.cqc_buf.buf, priv->bt_table.cqc_buf.map); @@ -1305,7 +1305,7 @@ static int hns_roce_tptr_init(struct hns_roce_dev *hr_dev) struct hns_roce_buf_list *tptr_buf; struct hns_roce_v1_priv *priv; - priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + priv = (struct hns_roce_v1_priv *)hr_dev->priv; tptr_buf = &priv->tptr_table.tptr_buf; /* @@ -1331,7 +1331,7 @@ static void hns_roce_tptr_free(struct hns_roce_dev *hr_dev) struct hns_roce_buf_list *tptr_buf; struct hns_roce_v1_priv *priv; - priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + priv = (struct hns_roce_v1_priv *)hr_dev->priv; tptr_buf = &priv->tptr_table.tptr_buf; dma_free_coherent(dev, HNS_ROCE_V1_TPTR_BUF_SIZE, @@ -1345,7 +1345,7 @@ static int hns_roce_free_mr_init(struct hns_roce_dev *hr_dev) struct hns_roce_v1_priv *priv; int ret = 0; - priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + priv = (struct hns_roce_v1_priv *)hr_dev->priv; free_mr = &priv->free_mr; free_mr->free_mr_wq = create_singlethread_workqueue("hns_roce_free_mr"); @@ -1369,7 +1369,7 @@ static void hns_roce_free_mr_free(struct hns_roce_dev *hr_dev) struct hns_roce_free_mr *free_mr; struct hns_roce_v1_priv *priv; - priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + priv = (struct hns_roce_v1_priv *)hr_dev->priv; free_mr = &priv->free_mr; flush_workqueue(free_mr->free_mr_wq); @@ -1433,7 +1433,7 @@ static int hns_roce_des_qp_init(struct hns_roce_dev *hr_dev) struct hns_roce_v1_priv *priv; struct hns_roce_des_qp *des_qp; - priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + priv = (struct hns_roce_v1_priv *)hr_dev->priv; des_qp = &priv->des_qp; des_qp->requeue_flag = 1; @@ -1451,7 +1451,7 @@ static void hns_roce_des_qp_free(struct hns_roce_dev *hr_dev) struct hns_roce_v1_priv *priv; struct hns_roce_des_qp *des_qp; - priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + priv = (struct hns_roce_v1_priv *)hr_dev->priv; des_qp = &priv->des_qp; des_qp->requeue_flag = 0; @@ -1942,7 +1942,7 @@ void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, dma_addr_t tptr_dma_addr; int offset; - priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + priv = (struct hns_roce_v1_priv *)hr_dev->priv; tptr_buf = &priv->tptr_table.tptr_buf; cq_context = mb_buf; @@ -2290,7 +2290,7 @@ int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev, void __iomem *bt_cmd; u64 bt_ba = 0; - priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + priv = (struct hns_roce_v1_priv *)hr_dev->priv; switch (table->type) { case HEM_TYPE_QPC: @@ -3665,7 +3665,7 @@ static void hns_roce_v1_destroy_qp_work_fn(struct work_struct *work) qp_work_entry = container_of(work, struct hns_roce_qp_work, work); hr_dev = to_hr_dev(qp_work_entry->ib_dev); dev = &hr_dev->pdev->dev; - priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + priv = (struct hns_roce_v1_priv *)hr_dev->priv; hr_qp = qp_work_entry->qp; qpn = hr_qp->qpn; @@ -3782,7 +3782,7 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp) qp_work->sdb_inv_cnt = qp_work_entry.sdb_inv_cnt; qp_work->sche_cnt = qp_work_entry.sche_cnt; - priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + priv = (struct hns_roce_v1_priv *)hr_dev->priv; queue_work(priv->des_qp.qp_wq, &qp_work->work); dev_dbg(dev, "Begin destroy QP(0x%lx) work.\n", hr_qp->qpn); } @@ -3842,8 +3842,6 @@ int hns_roce_v1_destroy_cq(struct ib_cq *ibcq) return ret; } -struct hns_roce_v1_priv hr_v1_priv; - static const struct hns_roce_hw hns_roce_hw_v1 = { .reset = hns_roce_v1_reset, .hw_profile = hns_roce_v1_profile, @@ -3864,7 +3862,6 @@ static const struct hns_roce_hw hns_roce_hw_v1 = { .poll_cq = hns_roce_v1_poll_cq, .dereg_mr = hns_roce_v1_dereg_mr, .destroy_cq = hns_roce_v1_destroy_cq, - .priv = &hr_v1_priv, }; static const struct of_device_id hns_roce_of_match[] = { @@ -4046,6 +4043,12 @@ static int hns_roce_probe(struct platform_device *pdev) if (!hr_dev) return -ENOMEM; + hr_dev->priv = kzalloc(sizeof(struct hns_roce_v1_priv), GFP_KERNEL); + if (!hr_dev->priv) { + ret = -ENOMEM; + goto error_failed_kzalloc; + } + hr_dev->pdev = pdev; platform_set_drvdata(pdev, hr_dev); @@ -4071,6 +4074,9 @@ static int hns_roce_probe(struct platform_device *pdev) return 0; error_failed_get_cfg: + kfree(hr_dev->priv); + +error_failed_kzalloc: ib_dealloc_device(&hr_dev->ib_dev); return ret; @@ -4085,6 +4091,7 @@ static int hns_roce_remove(struct platform_device *pdev) struct hns_roce_dev *hr_dev = platform_get_drvdata(pdev); hns_roce_exit(hr_dev); + kfree(hr_dev->priv); ib_dealloc_device(&hr_dev->ib_dev); return 0; From dd74282df573192e201ee716c8b621eb322c4fab Mon Sep 17 00:00:00 2001 From: "Wei Hu(Xavier)" Date: Wed, 30 Aug 2017 17:23:01 +0800 Subject: [PATCH 013/296] RDMA/hns: Initialize the PCI device for hip08 RoCE The hip08 RoCE engine is a PCI device. This patch mainly obtains some information of PCI device for hip08 RoCE from hns3 NIC driver in the initialization. Signed-off-by: Lijun Ou Signed-off-by: Shaobo Xu Signed-off-by: Wei Hu (Xavier) Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/Kconfig | 10 ++ drivers/infiniband/hw/hns/Makefile | 4 + drivers/infiniband/hw/hns/hns_roce_device.h | 2 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 151 ++++++++++++++++++++ 4 files changed, 167 insertions(+) create mode 100644 drivers/infiniband/hw/hns/hns_roce_hw_v2.c diff --git a/drivers/infiniband/hw/hns/Kconfig b/drivers/infiniband/hw/hns/Kconfig index 37e33e522c19..fddb5fdf92de 100644 --- a/drivers/infiniband/hw/hns/Kconfig +++ b/drivers/infiniband/hw/hns/Kconfig @@ -19,3 +19,13 @@ config INFINIBAND_HNS_HIP06 To compile this driver as a module, choose M here: the module will be called hns-roce-hw-v1. + +config INFINIBAND_HNS_HIP08 + tristate "Hisilicon Hip08 Family RoCE support" + depends on INFINIBAND_HNS && PCI && HNS3 + ---help--- + RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip08 SoC. + The RoCE engine is a PCI device. + + To compile this driver as a module, choose M here: the module + will be called hns-roce-hw-v2. diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile index 806ac4e678bd..ff426a625e13 100644 --- a/drivers/infiniband/hw/hns/Makefile +++ b/drivers/infiniband/hw/hns/Makefile @@ -2,9 +2,13 @@ # Makefile for the Hisilicon RoCE drivers. # +ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3 + obj-$(CONFIG_INFINIBAND_HNS) += hns-roce.o hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_eq.o hns_roce_pd.o \ hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \ hns_roce_cq.o hns_roce_alloc.o obj-$(CONFIG_INFINIBAND_HNS_HIP06) += hns-roce-hw-v1.o hns-roce-hw-v1-objs := hns_roce_hw_v1.o +obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns-roce-hw-v2.o +hns-roce-hw-v2-objs := hns_roce_hw_v2.o diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index aa9281e66b8b..3132b68e032f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -540,6 +540,8 @@ struct hns_roce_hw { struct hns_roce_dev { struct ib_device ib_dev; struct platform_device *pdev; + struct pci_dev *pci_dev; + struct device *dev; struct hns_roce_uar priv_uar; const char *irq_names[HNS_ROCE_MAX_IRQ_NUM]; spinlock_t sm_lock; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c new file mode 100644 index 000000000000..b35c72d352d8 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2016-2017 Hisilicon Limited. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "hnae3.h" +#include "hns_roce_common.h" +#include "hns_roce_device.h" +#include "hns_roce_cmd.h" +#include "hns_roce_hem.h" + +static const struct hns_roce_hw hns_roce_hw_v2; + +static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = { + {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA), 0}, + {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA_MACSEC), 0}, + {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0}, + /* required last entry */ + {0, } +}; + +static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev, + struct hnae3_handle *handle) +{ + const struct pci_device_id *id; + + id = pci_match_id(hns_roce_hw_v2_pci_tbl, hr_dev->pci_dev); + if (!id) { + dev_err(hr_dev->dev, "device is not compatible!\n"); + return -ENXIO; + } + + hr_dev->hw = &hns_roce_hw_v2; + + /* Get info from NIC driver. */ + hr_dev->reg_base = handle->rinfo.roce_io_base; + hr_dev->caps.num_ports = 1; + hr_dev->iboe.netdevs[0] = handle->rinfo.netdev; + hr_dev->iboe.phy_port[0] = 0; + + /* cmd issue mode: 0 is poll, 1 is event */ + hr_dev->cmd_mod = 0; + hr_dev->loop_idc = 0; + + return 0; +} + +static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) +{ + struct hns_roce_dev *hr_dev; + int ret; + + hr_dev = (struct hns_roce_dev *)ib_alloc_device(sizeof(*hr_dev)); + if (!hr_dev) + return -ENOMEM; + + hr_dev->pci_dev = handle->pdev; + hr_dev->dev = &handle->pdev->dev; + handle->priv = hr_dev; + + ret = hns_roce_hw_v2_get_cfg(hr_dev, handle); + if (ret) { + dev_err(hr_dev->dev, "Get Configuration failed!\n"); + goto error_failed_get_cfg; + } + + ret = hns_roce_init(hr_dev); + if (ret) { + dev_err(hr_dev->dev, "RoCE Engine init failed!\n"); + goto error_failed_get_cfg; + } + + return 0; + +error_failed_get_cfg: + ib_dealloc_device(&hr_dev->ib_dev); + + return ret; +} + +static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, + bool reset) +{ + struct hns_roce_dev *hr_dev = (struct hns_roce_dev *)handle->priv; + + hns_roce_exit(hr_dev); + ib_dealloc_device(&hr_dev->ib_dev); +} + +static const struct hnae3_client_ops hns_roce_hw_v2_ops = { + .init_instance = hns_roce_hw_v2_init_instance, + .uninit_instance = hns_roce_hw_v2_uninit_instance, +}; + +static struct hnae3_client hns_roce_hw_v2_client = { + .name = "hns_roce_hw_v2", + .type = HNAE3_CLIENT_ROCE, + .ops = &hns_roce_hw_v2_ops, +}; + +static int __init hns_roce_hw_v2_init(void) +{ + return hnae3_register_client(&hns_roce_hw_v2_client); +} + +static void __exit hns_roce_hw_v2_exit(void) +{ + hnae3_unregister_client(&hns_roce_hw_v2_client); +} + +module_init(hns_roce_hw_v2_init); +module_exit(hns_roce_hw_v2_exit); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Wei Hu "); +MODULE_AUTHOR("Lijun Ou "); +MODULE_AUTHOR("Shaobo Xu "); +MODULE_DESCRIPTION("Hisilicon Hip08 Family RoCE Driver"); From 13ca970e3692e498e1544d0c5141f20da9a8e89d Mon Sep 17 00:00:00 2001 From: "Wei Hu(Xavier)" Date: Wed, 30 Aug 2017 17:23:02 +0800 Subject: [PATCH 014/296] RDMA/hns: Modify assignment device variable to support both PCI device and platform device In order to support the scalability of the hardware version, the features irrelevant to the hardware will be located in the hns-roce.ko, and the hardware relevant operations will be located in hns_roce_hw_v1.ko or hns_roce_hw_v2.ko based on the series chips. The hip08 RoCE engine is a PCI device, hip06 RoCE engine is a platform device. In order to support both platform device and PCI device, We replace &hr_dev->pdev->dev with hr_dev->dev in hns-roce.ko as belows: Before modification: struct device *dev = hr_dev->dev; After modification: struct device *dev = &hr_dev->pdev->dev; The related structure: struct hns_roce_dev { ... struct platform_device *pdev; struct pci_dev *pci_dev; struct device *dev; ... } Signed-off-by: Lijun Ou Signed-off-by: Shaobo Xu Signed-off-by: Wei Hu (Xavier) Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_ah.c | 2 +- drivers/infiniband/hw/hns/hns_roce_alloc.c | 6 +++--- drivers/infiniband/hw/hns/hns_roce_cmd.c | 6 +++--- drivers/infiniband/hw/hns/hns_roce_cq.c | 12 ++++++------ drivers/infiniband/hw/hns/hns_roce_hem.c | 12 ++++++------ drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 1 + drivers/infiniband/hw/hns/hns_roce_main.c | 12 ++++++------ drivers/infiniband/hw/hns/hns_roce_mr.c | 8 ++++---- drivers/infiniband/hw/hns/hns_roce_pd.c | 2 +- drivers/infiniband/hw/hns/hns_roce_qp.c | 20 ++++++++++---------- 10 files changed, 41 insertions(+), 40 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index d545302b8ef8..b30bc8830643 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -44,7 +44,7 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibpd->device); - struct device *dev = &hr_dev->pdev->dev; + struct device *dev = hr_dev->dev; struct ib_gid_attr gid_attr; struct hns_roce_ah *ah; u16 vlan_tag = 0xffff; diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index 71a5098579c3..8c9a33f4e1c1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -161,7 +161,7 @@ void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size, struct hns_roce_buf *buf) { int i; - struct device *dev = &hr_dev->pdev->dev; + struct device *dev = hr_dev->dev; u32 bits_per_long = BITS_PER_LONG; if (buf->nbufs == 1) { @@ -172,7 +172,7 @@ void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size, for (i = 0; i < buf->nbufs; ++i) if (buf->page_list[i].buf) - dma_free_coherent(&hr_dev->pdev->dev, PAGE_SIZE, + dma_free_coherent(dev, PAGE_SIZE, buf->page_list[i].buf, buf->page_list[i].map); kfree(buf->page_list); @@ -186,7 +186,7 @@ int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, int i = 0; dma_addr_t t; struct page **pages; - struct device *dev = &hr_dev->pdev->dev; + struct device *dev = hr_dev->dev; u32 bits_per_long = BITS_PER_LONG; /* SQ/RQ buf lease than one page, SQ + RQ = 8K */ diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c index c981d9db5942..67ad3e9f0e48 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.c +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c @@ -125,7 +125,7 @@ static int __hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param, u8 op_modifier, u16 op, unsigned long timeout) { - struct device *dev = &hr_dev->pdev->dev; + struct device *dev = hr_dev->dev; u8 __iomem *hcr = hr_dev->cmd.hcr; unsigned long end = 0; u32 status = 0; @@ -196,8 +196,8 @@ static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param, unsigned long timeout) { struct hns_roce_cmdq *cmd = &hr_dev->cmd; - struct device *dev = &hr_dev->pdev->dev; struct hns_roce_cmd_context *context; + struct device *dev = hr_dev->dev; int ret = 0; spin_lock(&cmd->context_lock); @@ -273,7 +273,7 @@ EXPORT_SYMBOL_GPL(hns_roce_cmd_mbox); int hns_roce_cmd_init(struct hns_roce_dev *hr_dev) { - struct device *dev = &hr_dev->pdev->dev; + struct device *dev = hr_dev->dev; mutex_init(&hr_dev->cmd.hcr_mutex); sema_init(&hr_dev->cmd.poll_sem, 1); diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 1d57df7a8967..5f512429f634 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -58,7 +58,7 @@ static void hns_roce_ib_cq_event(struct hns_roce_cq *hr_cq, if (event_type != HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID && event_type != HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR && event_type != HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW) { - dev_err(&hr_dev->pdev->dev, + dev_err(hr_dev->dev, "hns_roce_ib: Unexpected event type 0x%x on CQ %06lx\n", event_type, hr_cq->cqn); return; @@ -87,7 +87,7 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, { struct hns_roce_cmd_mailbox *mailbox = NULL; struct hns_roce_cq_table *cq_table = NULL; - struct device *dev = &hr_dev->pdev->dev; + struct device *dev = hr_dev->dev; dma_addr_t dma_handle; u64 *mtts = NULL; int ret = 0; @@ -182,7 +182,7 @@ static int hns_roce_hw2sw_cq(struct hns_roce_dev *dev, void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; - struct device *dev = &hr_dev->pdev->dev; + struct device *dev = hr_dev->dev; int ret; ret = hns_roce_hw2sw_cq(hr_dev, NULL, hr_cq->cqn); @@ -282,7 +282,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); - struct device *dev = &hr_dev->pdev->dev; + struct device *dev = hr_dev->dev; struct hns_roce_ib_create_cq ucmd; struct hns_roce_cq *hr_cq = NULL; struct hns_roce_uar *uar = NULL; @@ -416,7 +416,7 @@ EXPORT_SYMBOL_GPL(hns_roce_ib_destroy_cq); void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn) { - struct device *dev = &hr_dev->pdev->dev; + struct device *dev = hr_dev->dev; struct hns_roce_cq *cq; cq = radix_tree_lookup(&hr_dev->cq_table.tree, @@ -432,7 +432,7 @@ void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn) void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type) { struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; - struct device *dev = &hr_dev->pdev->dev; + struct device *dev = hr_dev->dev; struct hns_roce_cq *cq; cq = radix_tree_lookup(&cq_table->tree, diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 06aed4a93d68..0ab49122e8c9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -84,7 +84,7 @@ struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev, int npages, * memory, directly return fail. */ mem = &chunk->mem[chunk->npages]; - buf = dma_alloc_coherent(&hr_dev->pdev->dev, PAGE_SIZE << order, + buf = dma_alloc_coherent(hr_dev->dev, PAGE_SIZE << order, &sg_dma_address(mem), gfp_mask); if (!buf) goto fail; @@ -115,7 +115,7 @@ void hns_roce_free_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem *hem) list_for_each_entry_safe(chunk, tmp, &hem->chunk_list, list) { for (i = 0; i < chunk->npages; ++i) - dma_free_coherent(&hr_dev->pdev->dev, + dma_free_coherent(hr_dev->dev, chunk->mem[i].length, lowmem_page_address(sg_page(&chunk->mem[i])), sg_dma_address(&chunk->mem[i])); @@ -128,8 +128,8 @@ void hns_roce_free_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem *hem) static int hns_roce_set_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, unsigned long obj) { - struct device *dev = &hr_dev->pdev->dev; spinlock_t *lock = &hr_dev->bt_cmd_lock; + struct device *dev = hr_dev->dev; unsigned long end = 0; unsigned long flags; struct hns_roce_hem_iter iter; @@ -212,7 +212,7 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev, int hns_roce_table_get(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, unsigned long obj) { - struct device *dev = &hr_dev->pdev->dev; + struct device *dev = hr_dev->dev; int ret = 0; unsigned long i; @@ -251,7 +251,7 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev, void hns_roce_table_put(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, unsigned long obj) { - struct device *dev = &hr_dev->pdev->dev; + struct device *dev = hr_dev->dev; unsigned long i; i = (obj & (table->num_obj - 1)) / @@ -380,7 +380,7 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table) { - struct device *dev = &hr_dev->pdev->dev; + struct device *dev = hr_dev->dev; unsigned long i; for (i = 0; i < table->num_hem; ++i) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index e4d13a93ef63..22cc3fb6c631 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -4050,6 +4050,7 @@ static int hns_roce_probe(struct platform_device *pdev) } hr_dev->pdev = pdev; + hr_dev->dev = dev; platform_set_drvdata(pdev, hr_dev); if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64ULL)) && diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 7afa0c8a6b64..b07d437278a6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -117,7 +117,7 @@ static int hns_roce_del_gid(struct ib_device *device, u8 port_num, static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port, unsigned long event) { - struct device *dev = &hr_dev->pdev->dev; + struct device *dev = hr_dev->dev; struct net_device *netdev; netdev = hr_dev->iboe.netdevs[port]; @@ -240,7 +240,7 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num, struct ib_port_attr *props) { struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); - struct device *dev = &hr_dev->pdev->dev; + struct device *dev = hr_dev->dev; struct net_device *net_dev; unsigned long flags; enum ib_mtu mtu; @@ -428,7 +428,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) int ret; struct hns_roce_ib_iboe *iboe = NULL; struct ib_device *ib_dev = NULL; - struct device *dev = &hr_dev->pdev->dev; + struct device *dev = hr_dev->dev; iboe = &hr_dev->iboe; spin_lock_init(&iboe->lock); @@ -536,7 +536,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) { int ret; - struct device *dev = &hr_dev->pdev->dev; + struct device *dev = hr_dev->dev; ret = hns_roce_init_hem_table(hr_dev, &hr_dev->mr_table.mtt_table, HEM_TYPE_MTT, hr_dev->caps.mtt_entry_sz, @@ -605,7 +605,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) { int ret; - struct device *dev = &hr_dev->pdev->dev; + struct device *dev = hr_dev->dev; spin_lock_init(&hr_dev->sm_lock); spin_lock_init(&hr_dev->bt_cmd_lock); @@ -668,7 +668,7 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) int hns_roce_init(struct hns_roce_dev *hr_dev) { int ret; - struct device *dev = &hr_dev->pdev->dev; + struct device *dev = hr_dev->dev; if (hr_dev->hw->reset) { ret = hr_dev->hw->reset(hr_dev, true); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index ed3571385d95..923d2b40e9c5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -241,9 +241,9 @@ static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova, u64 size, u32 access, int npages, struct hns_roce_mr *mr) { + struct device *dev = hr_dev->dev; unsigned long index = 0; int ret = 0; - struct device *dev = &hr_dev->pdev->dev; /* Allocate a key for mr from mr_table */ ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index); @@ -276,7 +276,7 @@ static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova, static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) { - struct device *dev = &hr_dev->pdev->dev; + struct device *dev = hr_dev->dev; int npages = 0; int ret; @@ -302,7 +302,7 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, { int ret; unsigned long mtpt_idx = key_to_hw_index(mr->key); - struct device *dev = &hr_dev->pdev->dev; + struct device *dev = hr_dev->dev; struct hns_roce_cmd_mailbox *mailbox; struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; @@ -552,7 +552,7 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); - struct device *dev = &hr_dev->pdev->dev; + struct device *dev = hr_dev->dev; struct hns_roce_mr *mr = NULL; int ret = 0; int n = 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index ffbf249d0885..079bb10237cf 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -60,7 +60,7 @@ struct ib_pd *hns_roce_alloc_pd(struct ib_device *ib_dev, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); - struct device *dev = &hr_dev->pdev->dev; + struct device *dev = hr_dev->dev; struct hns_roce_pd *pd; int ret; diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index ad13d05da945..b512ecc6df22 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -44,7 +44,7 @@ void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type) { struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; - struct device *dev = &hr_dev->pdev->dev; + struct device *dev = hr_dev->dev; struct hns_roce_qp *qp; spin_lock(&qp_table->lock); @@ -154,7 +154,7 @@ static int hns_roce_gsi_qp_alloc(struct hns_roce_dev *hr_dev, unsigned long qpn, hr_qp->qpn & (hr_dev->caps.num_qps - 1), hr_qp); spin_unlock_irq(&qp_table->lock); if (ret) { - dev_err(&hr_dev->pdev->dev, "QPC radix_tree_insert failed\n"); + dev_err(hr_dev->dev, "QPC radix_tree_insert failed\n"); goto err_put_irrl; } @@ -172,7 +172,7 @@ static int hns_roce_qp_alloc(struct hns_roce_dev *hr_dev, unsigned long qpn, struct hns_roce_qp *hr_qp) { struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; - struct device *dev = &hr_dev->pdev->dev; + struct device *dev = hr_dev->dev; int ret; if (!qpn) @@ -261,8 +261,8 @@ static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, int is_user, int has_srq, struct hns_roce_qp *hr_qp) { + struct device *dev = hr_dev->dev; u32 max_cnt; - struct device *dev = &hr_dev->pdev->dev; /* Check the validity of QP support capacity */ if (cap->max_recv_wr > hr_dev->caps.max_wqes || @@ -319,7 +319,7 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev, if ((u32)(1 << ucmd->log_sq_bb_count) > hr_dev->caps.max_wqes || ucmd->log_sq_stride > max_sq_stride || ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) { - dev_err(&hr_dev->pdev->dev, "check SQ size error!\n"); + dev_err(hr_dev->dev, "check SQ size error!\n"); return -EINVAL; } @@ -343,7 +343,7 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp) { - struct device *dev = &hr_dev->pdev->dev; + struct device *dev = hr_dev->dev; u32 max_cnt; if (cap->max_send_wr > hr_dev->caps.max_wqes || @@ -395,7 +395,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, struct ib_udata *udata, unsigned long sqpn, struct hns_roce_qp *hr_qp) { - struct device *dev = &hr_dev->pdev->dev; + struct device *dev = hr_dev->dev; struct hns_roce_ib_create_qp ucmd; unsigned long qpn = 0; int ret = 0; @@ -575,7 +575,7 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); - struct device *dev = &hr_dev->pdev->dev; + struct device *dev = hr_dev->dev; struct hns_roce_sqp *hr_sqp; struct hns_roce_qp *hr_qp; int ret; @@ -660,7 +660,7 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); enum ib_qp_state cur_state, new_state; - struct device *dev = &hr_dev->pdev->dev; + struct device *dev = hr_dev->dev; int ret = -EINVAL; int p; enum ib_mtu active_mtu; @@ -835,7 +835,7 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev) hr_dev->caps.num_qps - 1, SQP_NUM, reserved_from_top); if (ret) { - dev_err(&hr_dev->pdev->dev, "qp bitmap init failed!error=%d\n", + dev_err(hr_dev->dev, "qp bitmap init failed!error=%d\n", ret); return ret; } From a04ff739f2a93d0564a5c71bfb3f459a3c06dbb8 Mon Sep 17 00:00:00 2001 From: "Wei Hu(Xavier)" Date: Wed, 30 Aug 2017 17:23:03 +0800 Subject: [PATCH 015/296] RDMA/hns: Add command queue support for hip08 RoCE driver The command queue is the configuration queue. The software configures hardware by filling the commands into command queues. It includes command send queue and receive queue. In hip08 RoCE engine, It supports to configure and query registers by command queue. Signed-off-by: Lijun Ou Signed-off-by: Shaobo Xu Signed-off-by: Wei Hu (Xavier) Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_common.h | 13 + drivers/infiniband/hw/hns/hns_roce_device.h | 2 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 281 +++++++++++++++++++- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 111 ++++++++ drivers/infiniband/hw/hns/hns_roce_main.c | 14 + 5 files changed, 420 insertions(+), 1 deletion(-) create mode 100644 drivers/infiniband/hw/hns/hns_roce_hw_v2.h diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h index 4af403e1348c..94381c2f581a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_common.h +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -362,4 +362,17 @@ #define ROCEE_ECC_UCERR_ALM0_REG 0xB34 #define ROCEE_ECC_CERR_ALM0_REG 0xB40 +/* V2 ROCEE REG */ +#define ROCEE_TX_CMQ_BASEADDR_L_REG 0x07000 +#define ROCEE_TX_CMQ_BASEADDR_H_REG 0x07004 +#define ROCEE_TX_CMQ_DEPTH_REG 0x07008 +#define ROCEE_TX_CMQ_TAIL_REG 0x07010 +#define ROCEE_TX_CMQ_HEAD_REG 0x07014 + +#define ROCEE_RX_CMQ_BASEADDR_L_REG 0x07018 +#define ROCEE_RX_CMQ_BASEADDR_H_REG 0x0701c +#define ROCEE_RX_CMQ_DEPTH_REG 0x07020 +#define ROCEE_RX_CMQ_TAIL_REG 0x07024 +#define ROCEE_RX_CMQ_HEAD_REG 0x07028 + #endif /* _HNS_ROCE_COMMON_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 3132b68e032f..87c2e072aad8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -506,6 +506,8 @@ struct hns_roce_caps { struct hns_roce_hw { int (*reset)(struct hns_roce_dev *hr_dev, bool enable); + int (*cmq_init)(struct hns_roce_dev *hr_dev); + void (*cmq_exit)(struct hns_roce_dev *hr_dev); void (*hw_profile)(struct hns_roce_dev *hr_dev); int (*hw_init)(struct hns_roce_dev *hr_dev); void (*hw_exit)(struct hns_roce_dev *hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index b35c72d352d8..b610f3a4b8e4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -41,8 +41,277 @@ #include "hns_roce_device.h" #include "hns_roce_cmd.h" #include "hns_roce_hem.h" +#include "hns_roce_hw_v2.h" -static const struct hns_roce_hw hns_roce_hw_v2; +static int hns_roce_cmq_space(struct hns_roce_v2_cmq_ring *ring) +{ + int ntu = ring->next_to_use; + int ntc = ring->next_to_clean; + int used = (ntu - ntc + ring->desc_num) % ring->desc_num; + + return ring->desc_num - used - 1; +} + +static int hns_roce_alloc_cmq_desc(struct hns_roce_dev *hr_dev, + struct hns_roce_v2_cmq_ring *ring) +{ + int size = ring->desc_num * sizeof(struct hns_roce_cmq_desc); + + ring->desc = kzalloc(size, GFP_KERNEL); + if (!ring->desc) + return -ENOMEM; + + ring->desc_dma_addr = dma_map_single(hr_dev->dev, ring->desc, size, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(hr_dev->dev, ring->desc_dma_addr)) { + ring->desc_dma_addr = 0; + kfree(ring->desc); + ring->desc = NULL; + return -ENOMEM; + } + + return 0; +} + +static void hns_roce_free_cmq_desc(struct hns_roce_dev *hr_dev, + struct hns_roce_v2_cmq_ring *ring) +{ + dma_unmap_single(hr_dev->dev, ring->desc_dma_addr, + ring->desc_num * sizeof(struct hns_roce_cmq_desc), + DMA_BIDIRECTIONAL); + kfree(ring->desc); +} + +static int hns_roce_init_cmq_ring(struct hns_roce_dev *hr_dev, bool ring_type) +{ + struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + struct hns_roce_v2_cmq_ring *ring = (ring_type == TYPE_CSQ) ? + &priv->cmq.csq : &priv->cmq.crq; + + ring->flag = ring_type; + ring->next_to_clean = 0; + ring->next_to_use = 0; + + return hns_roce_alloc_cmq_desc(hr_dev, ring); +} + +static void hns_roce_cmq_init_regs(struct hns_roce_dev *hr_dev, bool ring_type) +{ + struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + struct hns_roce_v2_cmq_ring *ring = (ring_type == TYPE_CSQ) ? + &priv->cmq.csq : &priv->cmq.crq; + dma_addr_t dma = ring->desc_dma_addr; + + if (ring_type == TYPE_CSQ) { + roce_write(hr_dev, ROCEE_TX_CMQ_BASEADDR_L_REG, (u32)dma); + roce_write(hr_dev, ROCEE_TX_CMQ_BASEADDR_H_REG, + upper_32_bits(dma)); + roce_write(hr_dev, ROCEE_TX_CMQ_DEPTH_REG, + (ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S) | + HNS_ROCE_CMQ_ENABLE); + roce_write(hr_dev, ROCEE_TX_CMQ_HEAD_REG, 0); + roce_write(hr_dev, ROCEE_TX_CMQ_TAIL_REG, 0); + } else { + roce_write(hr_dev, ROCEE_RX_CMQ_BASEADDR_L_REG, (u32)dma); + roce_write(hr_dev, ROCEE_RX_CMQ_BASEADDR_H_REG, + upper_32_bits(dma)); + roce_write(hr_dev, ROCEE_RX_CMQ_DEPTH_REG, + (ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S) | + HNS_ROCE_CMQ_ENABLE); + roce_write(hr_dev, ROCEE_RX_CMQ_HEAD_REG, 0); + roce_write(hr_dev, ROCEE_RX_CMQ_TAIL_REG, 0); + } +} + +static int hns_roce_v2_cmq_init(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + int ret; + + /* Setup the queue entries for command queue */ + priv->cmq.csq.desc_num = 1024; + priv->cmq.crq.desc_num = 1024; + + /* Setup the lock for command queue */ + spin_lock_init(&priv->cmq.csq.lock); + spin_lock_init(&priv->cmq.crq.lock); + + /* Setup Tx write back timeout */ + priv->cmq.tx_timeout = HNS_ROCE_CMQ_TX_TIMEOUT; + + /* Init CSQ */ + ret = hns_roce_init_cmq_ring(hr_dev, TYPE_CSQ); + if (ret) { + dev_err(hr_dev->dev, "Init CSQ error, ret = %d.\n", ret); + return ret; + } + + /* Init CRQ */ + ret = hns_roce_init_cmq_ring(hr_dev, TYPE_CRQ); + if (ret) { + dev_err(hr_dev->dev, "Init CRQ error, ret = %d.\n", ret); + goto err_crq; + } + + /* Init CSQ REG */ + hns_roce_cmq_init_regs(hr_dev, TYPE_CSQ); + + /* Init CRQ REG */ + hns_roce_cmq_init_regs(hr_dev, TYPE_CRQ); + + return 0; + +err_crq: + hns_roce_free_cmq_desc(hr_dev, &priv->cmq.csq); + + return ret; +} + +static void hns_roce_v2_cmq_exit(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + + hns_roce_free_cmq_desc(hr_dev, &priv->cmq.csq); + hns_roce_free_cmq_desc(hr_dev, &priv->cmq.crq); +} + +void hns_roce_cmq_setup_basic_desc(struct hns_roce_cmq_desc *desc, + enum hns_roce_opcode_type opcode, + bool is_read) +{ + memset((void *)desc, 0, sizeof(struct hns_roce_cmq_desc)); + desc->opcode = cpu_to_le16(opcode); + desc->flag = + cpu_to_le16(HNS_ROCE_CMD_FLAG_NO_INTR | HNS_ROCE_CMD_FLAG_IN); + if (is_read) + desc->flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_WR); + else + desc->flag &= cpu_to_le16(~HNS_ROCE_CMD_FLAG_WR); +} + +static int hns_roce_cmq_csq_done(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + u32 head = roce_read(hr_dev, ROCEE_TX_CMQ_HEAD_REG); + + return head == priv->cmq.csq.next_to_use; +} + +static int hns_roce_cmq_csq_clean(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq; + struct hns_roce_cmq_desc *desc; + u16 ntc = csq->next_to_clean; + u32 head; + int clean = 0; + + desc = &csq->desc[ntc]; + head = roce_read(hr_dev, ROCEE_TX_CMQ_HEAD_REG); + while (head != ntc) { + memset(desc, 0, sizeof(*desc)); + ntc++; + if (ntc == csq->desc_num) + ntc = 0; + desc = &csq->desc[ntc]; + clean++; + } + csq->next_to_clean = ntc; + + return clean; +} + +int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, + struct hns_roce_cmq_desc *desc, int num) +{ + struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq; + struct hns_roce_cmq_desc *desc_to_use; + bool complete = false; + u32 timeout = 0; + int handle = 0; + u16 desc_ret; + int ret = 0; + int ntc; + + spin_lock_bh(&csq->lock); + + if (num > hns_roce_cmq_space(csq)) { + spin_unlock_bh(&csq->lock); + return -EBUSY; + } + + /* + * Record the location of desc in the cmq for this time + * which will be use for hardware to write back + */ + ntc = csq->next_to_use; + + while (handle < num) { + desc_to_use = &csq->desc[csq->next_to_use]; + *desc_to_use = desc[handle]; + dev_dbg(hr_dev->dev, "set cmq desc:\n"); + csq->next_to_use++; + if (csq->next_to_use == csq->desc_num) + csq->next_to_use = 0; + handle++; + } + + /* Write to hardware */ + roce_write(hr_dev, ROCEE_TX_CMQ_TAIL_REG, csq->next_to_use); + + /* + * If the command is sync, wait for the firmware to write back, + * if multi descriptors to be sent, use the first one to check + */ + if ((desc->flag) & HNS_ROCE_CMD_FLAG_NO_INTR) { + do { + if (hns_roce_cmq_csq_done(hr_dev)) + break; + usleep_range(1000, 2000); + timeout++; + } while (timeout < priv->cmq.tx_timeout); + } + + if (hns_roce_cmq_csq_done(hr_dev)) { + complete = true; + handle = 0; + while (handle < num) { + /* get the result of hardware write back */ + desc_to_use = &csq->desc[ntc]; + desc[handle] = *desc_to_use; + dev_dbg(hr_dev->dev, "Get cmq desc:\n"); + desc_ret = desc[handle].retval; + if (desc_ret == CMD_EXEC_SUCCESS) + ret = 0; + else + ret = -EIO; + priv->cmq.last_status = desc_ret; + ntc++; + handle++; + if (ntc == csq->desc_num) + ntc = 0; + } + } + + if (!complete) + ret = -EAGAIN; + + /* clean the command send queue */ + handle = hns_roce_cmq_csq_clean(hr_dev); + if (handle != num) + dev_warn(hr_dev->dev, "Cleaned %d, need to clean %d\n", + handle, num); + + spin_unlock_bh(&csq->lock); + + return ret; +} + +static const struct hns_roce_hw hns_roce_hw_v2 = { + .cmq_init = hns_roce_v2_cmq_init, + .cmq_exit = hns_roce_v2_cmq_exit, +}; static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = { {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA), 0}, @@ -87,6 +356,12 @@ static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) if (!hr_dev) return -ENOMEM; + hr_dev->priv = kzalloc(sizeof(struct hns_roce_v2_priv), GFP_KERNEL); + if (!hr_dev->priv) { + ret = -ENOMEM; + goto error_failed_kzalloc; + } + hr_dev->pci_dev = handle->pdev; hr_dev->dev = &handle->pdev->dev; handle->priv = hr_dev; @@ -106,6 +381,9 @@ static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) return 0; error_failed_get_cfg: + kfree(hr_dev->priv); + +error_failed_kzalloc: ib_dealloc_device(&hr_dev->ib_dev); return ret; @@ -117,6 +395,7 @@ static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, struct hns_roce_dev *hr_dev = (struct hns_roce_dev *)handle->priv; hns_roce_exit(hr_dev); + kfree(hr_dev->priv); ib_dealloc_device(&hr_dev->ib_dev); } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h new file mode 100644 index 000000000000..781ae74a3eda --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2016-2017 Hisilicon Limited. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _HNS_ROCE_HW_V2_H +#define _HNS_ROCE_HW_V2_H + +#define HNS_ROCE_CMQ_TX_TIMEOUT 200 + +#define HNS_ROCE_CMD_FLAG_IN_VALID_SHIFT 0 +#define HNS_ROCE_CMD_FLAG_OUT_VALID_SHIFT 1 +#define HNS_ROCE_CMD_FLAG_NEXT_SHIFT 2 +#define HNS_ROCE_CMD_FLAG_WR_OR_RD_SHIFT 3 +#define HNS_ROCE_CMD_FLAG_NO_INTR_SHIFT 4 +#define HNS_ROCE_CMD_FLAG_ERR_INTR_SHIFT 5 + +#define HNS_ROCE_CMD_FLAG_IN BIT(HNS_ROCE_CMD_FLAG_IN_VALID_SHIFT) +#define HNS_ROCE_CMD_FLAG_OUT BIT(HNS_ROCE_CMD_FLAG_OUT_VALID_SHIFT) +#define HNS_ROCE_CMD_FLAG_NEXT BIT(HNS_ROCE_CMD_FLAG_NEXT_SHIFT) +#define HNS_ROCE_CMD_FLAG_WR BIT(HNS_ROCE_CMD_FLAG_WR_OR_RD_SHIFT) +#define HNS_ROCE_CMD_FLAG_NO_INTR BIT(HNS_ROCE_CMD_FLAG_NO_INTR_SHIFT) +#define HNS_ROCE_CMD_FLAG_ERR_INTR BIT(HNS_ROCE_CMD_FLAG_ERR_INTR_SHIFT) + +#define HNS_ROCE_CMQ_DESC_NUM_S 3 +#define HNS_ROCE_CMQ_EN_B 16 +#define HNS_ROCE_CMQ_ENABLE BIT(HNS_ROCE_CMQ_EN_B) + +/* CMQ command */ +enum hns_roce_opcode_type { + HNS_ROCE_OPC_QUERY_HW_VER = 0x8000, + HNS_ROCE_OPC_CFG_GLOBAL_PARAM = 0x8001, + HNS_ROCE_OPC_ALLOC_PF_RES = 0x8004, + HNS_ROCE_OPC_QUERY_PF_RES = 0x8400, + HNS_ROCE_OPC_ALLOC_VF_RES = 0x8401, + HNS_ROCE_OPC_CFG_BT_ATTR = 0x8506, +}; + +enum { + TYPE_CRQ, + TYPE_CSQ, +}; + +enum hns_roce_cmd_return_status { + CMD_EXEC_SUCCESS = 0, + CMD_NO_AUTH = 1, + CMD_NOT_EXEC = 2, + CMD_QUEUE_FULL = 3, +}; + +struct hns_roce_cmq_desc { + u16 opcode; + u16 flag; + u16 retval; + u16 rsv; + u32 data[6]; +}; + +struct hns_roce_v2_cmq_ring { + dma_addr_t desc_dma_addr; + struct hns_roce_cmq_desc *desc; + u32 head; + u32 tail; + + u16 buf_size; + u16 desc_num; + int next_to_use; + int next_to_clean; + u8 flag; + spinlock_t lock; /* command queue lock */ +}; + +struct hns_roce_v2_cmq { + struct hns_roce_v2_cmq_ring csq; + struct hns_roce_v2_cmq_ring crq; + u16 tx_timeout; + u16 last_status; +}; + +struct hns_roce_v2_priv { + struct hns_roce_v2_cmq cmq; +}; + +#endif diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index b07d437278a6..4f5a6fd7219d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -678,6 +678,14 @@ int hns_roce_init(struct hns_roce_dev *hr_dev) } } + if (hr_dev->hw->cmq_init) { + ret = hr_dev->hw->cmq_init(hr_dev); + if (ret) { + dev_err(dev, "Init RoCE Command Queue failed!\n"); + goto error_failed_cmq_init; + } + } + hr_dev->hw->hw_profile(hr_dev); ret = hns_roce_cmd_init(hr_dev); @@ -750,6 +758,10 @@ int hns_roce_init(struct hns_roce_dev *hr_dev) hns_roce_cmd_cleanup(hr_dev); error_failed_cmd_init: + if (hr_dev->hw->cmq_exit) + hr_dev->hw->cmq_exit(hr_dev); + +error_failed_cmq_init: if (hr_dev->hw->reset) { ret = hr_dev->hw->reset(hr_dev, false); if (ret) @@ -774,6 +786,8 @@ void hns_roce_exit(struct hns_roce_dev *hr_dev) if (hr_dev->cmd_mod) hns_roce_cleanup_eq_table(hr_dev); hns_roce_cmd_cleanup(hr_dev); + if (hr_dev->hw->cmq_exit) + hr_dev->hw->cmq_exit(hr_dev); if (hr_dev->hw->reset) hr_dev->hw->reset(hr_dev, false); } From cfc85f3e4b7f9f2d66d0816e3121c4c8fcde1448 Mon Sep 17 00:00:00 2001 From: "Wei Hu(Xavier)" Date: Wed, 30 Aug 2017 17:23:04 +0800 Subject: [PATCH 016/296] RDMA/hns: Add profile support for hip08 driver The profile's content mainly set some specifications and obtain some hardware resources by implementing the relative commands. Because max sge num of send queue is not the same with receive queue in hip08, we modified the calculation of props->max_sge in query_device ops. Signed-off-by: Lijun Ou Signed-off-by: Shaobo Xu Signed-off-by: Wei Hu (Xavier) Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_device.h | 8 +- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 4 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 246 ++++++++++++++++++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 161 +++++++++++++ drivers/infiniband/hw/hns/hns_roce_main.c | 8 +- 5 files changed, 423 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 87c2e072aad8..8ec6d6eed4dc 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -477,6 +477,7 @@ struct hns_roce_caps { u32 max_wqes; /* 16k */ u32 max_sq_desc_sz; /* 64 */ u32 max_rq_desc_sz; /* 64 */ + u32 max_srq_desc_sz; int max_qp_init_rdma; int max_qp_dest_rdma; int num_cqs; @@ -487,6 +488,7 @@ struct hns_roce_caps { int num_other_vectors; int num_mtpts; u32 num_mtt_segs; + u32 num_cqe_segs; int reserved_mrws; int reserved_uars; int num_pds; @@ -502,13 +504,17 @@ struct hns_roce_caps { int aeqe_depth; int ceqe_depth[HNS_ROCE_COMP_VEC_NUM]; enum ib_mtu max_mtu; + u32 qpc_bt_num; + u32 srqc_bt_num; + u32 cqc_bt_num; + u32 mpt_bt_num; }; struct hns_roce_hw { int (*reset)(struct hns_roce_dev *hr_dev, bool enable); int (*cmq_init)(struct hns_roce_dev *hr_dev); void (*cmq_exit)(struct hns_roce_dev *hr_dev); - void (*hw_profile)(struct hns_roce_dev *hr_dev); + int (*hw_profile)(struct hns_roce_dev *hr_dev); int (*hw_init)(struct hns_roce_dev *hr_dev); void (*hw_exit)(struct hns_roce_dev *hr_dev); void (*set_gid)(struct hns_roce_dev *hr_dev, u8 port, int gid_index, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 22cc3fb6c631..267e400fd527 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -1459,7 +1459,7 @@ static void hns_roce_des_qp_free(struct hns_roce_dev *hr_dev) destroy_workqueue(des_qp->qp_wq); } -void hns_roce_v1_profile(struct hns_roce_dev *hr_dev) +int hns_roce_v1_profile(struct hns_roce_dev *hr_dev) { int i = 0; struct hns_roce_caps *caps = &hr_dev->caps; @@ -1525,6 +1525,8 @@ void hns_roce_v1_profile(struct hns_roce_dev *hr_dev) caps->local_ca_ack_delay = le32_to_cpu(roce_read(hr_dev, ROCEE_ACK_DELAY_REG)); caps->max_mtu = IB_MTU_2048; + + return 0; } int hns_roce_v1_init(struct hns_roce_dev *hr_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index b610f3a4b8e4..af3cb30f04f3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -308,9 +308,255 @@ int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, return ret; } +int hns_roce_cmq_query_hw_info(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_query_version *resp; + struct hns_roce_cmq_desc desc; + int ret; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_HW_VER, true); + ret = hns_roce_cmq_send(hr_dev, &desc, 1); + if (ret) + return ret; + + resp = (struct hns_roce_query_version *)desc.data; + hr_dev->hw_rev = le32_to_cpu(resp->rocee_hw_version); + hr_dev->vendor_id = le32_to_cpu(resp->rocee_vendor_id); + + return 0; +} + +static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_cfg_global_param *req; + struct hns_roce_cmq_desc desc; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GLOBAL_PARAM, + false); + + req = (struct hns_roce_cfg_global_param *)desc.data; + memset(req, 0, sizeof(*req)); + roce_set_field(req->time_cfg_udp_port, + CFG_GLOBAL_PARAM_DATA_0_ROCEE_TIME_1US_CFG_M, + CFG_GLOBAL_PARAM_DATA_0_ROCEE_TIME_1US_CFG_S, 0x3e8); + roce_set_field(req->time_cfg_udp_port, + CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_M, + CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_S, 0x12b7); + + return hns_roce_cmq_send(hr_dev, &desc, 1); +} + +static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_cmq_desc desc[2]; + struct hns_roce_pf_res *res; + int ret; + int i; + + for (i = 0; i < 2; i++) { + hns_roce_cmq_setup_basic_desc(&desc[i], + HNS_ROCE_OPC_QUERY_PF_RES, true); + + if (i == 0) + desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); + else + desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); + } + + ret = hns_roce_cmq_send(hr_dev, desc, 2); + if (ret) + return ret; + + res = (struct hns_roce_pf_res *)desc[0].data; + + hr_dev->caps.qpc_bt_num = roce_get_field(res->qpc_bt_idx_num, + PF_RES_DATA_1_PF_QPC_BT_NUM_M, + PF_RES_DATA_1_PF_QPC_BT_NUM_S); + hr_dev->caps.srqc_bt_num = roce_get_field(res->srqc_bt_idx_num, + PF_RES_DATA_2_PF_SRQC_BT_NUM_M, + PF_RES_DATA_2_PF_SRQC_BT_NUM_S); + hr_dev->caps.cqc_bt_num = roce_get_field(res->cqc_bt_idx_num, + PF_RES_DATA_3_PF_CQC_BT_NUM_M, + PF_RES_DATA_3_PF_CQC_BT_NUM_S); + hr_dev->caps.mpt_bt_num = roce_get_field(res->mpt_bt_idx_num, + PF_RES_DATA_4_PF_MPT_BT_NUM_M, + PF_RES_DATA_4_PF_MPT_BT_NUM_S); + + return 0; +} + +static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_cmq_desc desc[2]; + struct hns_roce_vf_res_a *req_a; + struct hns_roce_vf_res_b *req_b; + int i; + + req_a = (struct hns_roce_vf_res_a *)desc[0].data; + req_b = (struct hns_roce_vf_res_b *)desc[1].data; + memset(req_a, 0, sizeof(*req_a)); + memset(req_b, 0, sizeof(*req_b)); + for (i = 0; i < 2; i++) { + hns_roce_cmq_setup_basic_desc(&desc[i], + HNS_ROCE_OPC_ALLOC_VF_RES, false); + + if (i == 0) + desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); + else + desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); + + if (i == 0) { + roce_set_field(req_a->vf_qpc_bt_idx_num, + VF_RES_A_DATA_1_VF_QPC_BT_IDX_M, + VF_RES_A_DATA_1_VF_QPC_BT_IDX_S, 0); + roce_set_field(req_a->vf_qpc_bt_idx_num, + VF_RES_A_DATA_1_VF_QPC_BT_NUM_M, + VF_RES_A_DATA_1_VF_QPC_BT_NUM_S, + HNS_ROCE_VF_QPC_BT_NUM); + + roce_set_field(req_a->vf_srqc_bt_idx_num, + VF_RES_A_DATA_2_VF_SRQC_BT_IDX_M, + VF_RES_A_DATA_2_VF_SRQC_BT_IDX_S, 0); + roce_set_field(req_a->vf_srqc_bt_idx_num, + VF_RES_A_DATA_2_VF_SRQC_BT_NUM_M, + VF_RES_A_DATA_2_VF_SRQC_BT_NUM_S, + HNS_ROCE_VF_SRQC_BT_NUM); + + roce_set_field(req_a->vf_cqc_bt_idx_num, + VF_RES_A_DATA_3_VF_CQC_BT_IDX_M, + VF_RES_A_DATA_3_VF_CQC_BT_IDX_S, 0); + roce_set_field(req_a->vf_cqc_bt_idx_num, + VF_RES_A_DATA_3_VF_CQC_BT_NUM_M, + VF_RES_A_DATA_3_VF_CQC_BT_NUM_S, + HNS_ROCE_VF_CQC_BT_NUM); + + roce_set_field(req_a->vf_mpt_bt_idx_num, + VF_RES_A_DATA_4_VF_MPT_BT_IDX_M, + VF_RES_A_DATA_4_VF_MPT_BT_IDX_S, 0); + roce_set_field(req_a->vf_mpt_bt_idx_num, + VF_RES_A_DATA_4_VF_MPT_BT_NUM_M, + VF_RES_A_DATA_4_VF_MPT_BT_NUM_S, + HNS_ROCE_VF_MPT_BT_NUM); + + roce_set_field(req_a->vf_eqc_bt_idx_num, + VF_RES_A_DATA_5_VF_EQC_IDX_M, + VF_RES_A_DATA_5_VF_EQC_IDX_S, 0); + roce_set_field(req_a->vf_eqc_bt_idx_num, + VF_RES_A_DATA_5_VF_EQC_NUM_M, + VF_RES_A_DATA_5_VF_EQC_NUM_S, + HNS_ROCE_VF_EQC_NUM); + } else { + roce_set_field(req_b->vf_smac_idx_num, + VF_RES_B_DATA_1_VF_SMAC_IDX_M, + VF_RES_B_DATA_1_VF_SMAC_IDX_S, 0); + roce_set_field(req_b->vf_smac_idx_num, + VF_RES_B_DATA_1_VF_SMAC_NUM_M, + VF_RES_B_DATA_1_VF_SMAC_NUM_S, + HNS_ROCE_VF_SMAC_NUM); + + roce_set_field(req_b->vf_sgid_idx_num, + VF_RES_B_DATA_2_VF_SGID_IDX_M, + VF_RES_B_DATA_2_VF_SGID_IDX_S, 0); + roce_set_field(req_b->vf_sgid_idx_num, + VF_RES_B_DATA_2_VF_SGID_NUM_M, + VF_RES_B_DATA_2_VF_SGID_NUM_S, + HNS_ROCE_VF_SGID_NUM); + + roce_set_field(req_b->vf_qid_idx_sl_num, + VF_RES_B_DATA_3_VF_QID_IDX_M, + VF_RES_B_DATA_3_VF_QID_IDX_S, 0); + roce_set_field(req_b->vf_qid_idx_sl_num, + VF_RES_B_DATA_3_VF_SL_NUM_M, + VF_RES_B_DATA_3_VF_SL_NUM_S, + HNS_ROCE_VF_SL_NUM); + } + } + + return hns_roce_cmq_send(hr_dev, desc, 2); +} + +static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_caps *caps = &hr_dev->caps; + int ret; + + ret = hns_roce_cmq_query_hw_info(hr_dev); + if (ret) { + dev_err(hr_dev->dev, "Query firmware version fail, ret = %d.\n", + ret); + return ret; + } + + ret = hns_roce_config_global_param(hr_dev); + if (ret) { + dev_err(hr_dev->dev, "Configure global param fail, ret = %d.\n", + ret); + } + + /* Get pf resource owned by every pf */ + ret = hns_roce_query_pf_resource(hr_dev); + if (ret) { + dev_err(hr_dev->dev, "Query pf resource fail, ret = %d.\n", + ret); + return ret; + } + + ret = hns_roce_alloc_vf_resource(hr_dev); + if (ret) { + dev_err(hr_dev->dev, "Allocate vf resource fail, ret = %d.\n", + ret); + return ret; + } + + hr_dev->vendor_part_id = 0; + hr_dev->sys_image_guid = 0; + + caps->num_qps = HNS_ROCE_V2_MAX_QP_NUM; + caps->max_wqes = HNS_ROCE_V2_MAX_WQE_NUM; + caps->num_cqs = HNS_ROCE_V2_MAX_CQ_NUM; + caps->max_cqes = HNS_ROCE_V2_MAX_CQE_NUM; + caps->max_sq_sg = HNS_ROCE_V2_MAX_SQ_SGE_NUM; + caps->max_rq_sg = HNS_ROCE_V2_MAX_RQ_SGE_NUM; + caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE; + caps->num_uars = HNS_ROCE_V2_UAR_NUM; + caps->phy_num_uars = HNS_ROCE_V2_PHY_UAR_NUM; + caps->num_aeq_vectors = 1; + caps->num_comp_vectors = 63; + caps->num_other_vectors = 0; + caps->num_mtpts = HNS_ROCE_V2_MAX_MTPT_NUM; + caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS; + caps->num_cqe_segs = HNS_ROCE_V2_MAX_CQE_SEGS; + caps->num_pds = HNS_ROCE_V2_MAX_PD_NUM; + caps->max_qp_init_rdma = HNS_ROCE_V2_MAX_QP_INIT_RDMA; + caps->max_qp_dest_rdma = HNS_ROCE_V2_MAX_QP_DEST_RDMA; + caps->max_sq_desc_sz = HNS_ROCE_V2_MAX_SQ_DESC_SZ; + caps->max_rq_desc_sz = HNS_ROCE_V2_MAX_RQ_DESC_SZ; + caps->max_srq_desc_sz = HNS_ROCE_V2_MAX_SRQ_DESC_SZ; + caps->qpc_entry_sz = HNS_ROCE_V2_QPC_ENTRY_SZ; + caps->irrl_entry_sz = HNS_ROCE_V2_IRRL_ENTRY_SZ; + caps->cqc_entry_sz = HNS_ROCE_V2_CQC_ENTRY_SZ; + caps->mtpt_entry_sz = HNS_ROCE_V2_MTPT_ENTRY_SZ; + caps->mtt_entry_sz = HNS_ROCE_V2_MTT_ENTRY_SZ; + caps->cq_entry_sz = HNS_ROCE_V2_CQE_ENTRY_SIZE; + caps->page_size_cap = HNS_ROCE_V2_PAGE_SIZE_SUPPORTED; + caps->reserved_lkey = 0; + caps->reserved_pds = 0; + caps->reserved_mrws = 1; + caps->reserved_uars = 0; + caps->reserved_cqs = 0; + + caps->pkey_table_len[0] = 1; + caps->gid_table_len[0] = 2; + caps->local_ca_ack_delay = 0; + caps->max_mtu = IB_MTU_4096; + + return 0; +} + static const struct hns_roce_hw hns_roce_hw_v2 = { .cmq_init = hns_roce_v2_cmq_init, .cmq_exit = hns_roce_v2_cmq_exit, + .hw_profile = hns_roce_v2_profile, }; static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 781ae74a3eda..3dcc80096d53 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -33,6 +33,43 @@ #ifndef _HNS_ROCE_HW_V2_H #define _HNS_ROCE_HW_V2_H +#include + +#define HNS_ROCE_VF_QPC_BT_NUM 256 +#define HNS_ROCE_VF_SRQC_BT_NUM 64 +#define HNS_ROCE_VF_CQC_BT_NUM 64 +#define HNS_ROCE_VF_MPT_BT_NUM 64 +#define HNS_ROCE_VF_EQC_NUM 64 +#define HNS_ROCE_VF_SMAC_NUM 32 +#define HNS_ROCE_VF_SGID_NUM 32 +#define HNS_ROCE_VF_SL_NUM 8 + +#define HNS_ROCE_V2_MAX_QP_NUM 0x2000 +#define HNS_ROCE_V2_MAX_WQE_NUM 0x8000 +#define HNS_ROCE_V2_MAX_CQ_NUM 0x8000 +#define HNS_ROCE_V2_MAX_CQE_NUM 0x400000 +#define HNS_ROCE_V2_MAX_RQ_SGE_NUM 0x100 +#define HNS_ROCE_V2_MAX_SQ_SGE_NUM 0xff +#define HNS_ROCE_V2_MAX_SQ_INLINE 0x20 +#define HNS_ROCE_V2_UAR_NUM 256 +#define HNS_ROCE_V2_PHY_UAR_NUM 1 +#define HNS_ROCE_V2_MAX_MTPT_NUM 0x8000 +#define HNS_ROCE_V2_MAX_MTT_SEGS 0x100000 +#define HNS_ROCE_V2_MAX_CQE_SEGS 0x10000 +#define HNS_ROCE_V2_MAX_PD_NUM 0x400000 +#define HNS_ROCE_V2_MAX_QP_INIT_RDMA 128 +#define HNS_ROCE_V2_MAX_QP_DEST_RDMA 128 +#define HNS_ROCE_V2_MAX_SQ_DESC_SZ 64 +#define HNS_ROCE_V2_MAX_RQ_DESC_SZ 16 +#define HNS_ROCE_V2_MAX_SRQ_DESC_SZ 64 +#define HNS_ROCE_V2_QPC_ENTRY_SZ 256 +#define HNS_ROCE_V2_IRRL_ENTRY_SZ 64 +#define HNS_ROCE_V2_CQC_ENTRY_SZ 64 +#define HNS_ROCE_V2_MTPT_ENTRY_SZ 64 +#define HNS_ROCE_V2_MTT_ENTRY_SZ 64 +#define HNS_ROCE_V2_CQE_ENTRY_SIZE 32 +#define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000 +#define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2 #define HNS_ROCE_CMQ_TX_TIMEOUT 200 #define HNS_ROCE_CMD_FLAG_IN_VALID_SHIFT 0 @@ -75,6 +112,130 @@ enum hns_roce_cmd_return_status { CMD_QUEUE_FULL = 3, }; +struct hns_roce_query_version { + __le16 rocee_vendor_id; + __le16 rocee_hw_version; + __le32 rsv[5]; +}; + +struct hns_roce_cfg_global_param { + __le32 time_cfg_udp_port; + __le32 rsv[5]; +}; + +#define CFG_GLOBAL_PARAM_DATA_0_ROCEE_TIME_1US_CFG_S 0 +#define CFG_GLOBAL_PARAM_DATA_0_ROCEE_TIME_1US_CFG_M GENMASK(9, 0) + +#define CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_S 16 +#define CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_M GENMASK(31, 16) + +struct hns_roce_pf_res { + __le32 rsv; + __le32 qpc_bt_idx_num; + __le32 srqc_bt_idx_num; + __le32 cqc_bt_idx_num; + __le32 mpt_bt_idx_num; + __le32 eqc_bt_idx_num; +}; + +#define PF_RES_DATA_1_PF_QPC_BT_IDX_S 0 +#define PF_RES_DATA_1_PF_QPC_BT_IDX_M GENMASK(10, 0) + +#define PF_RES_DATA_1_PF_QPC_BT_NUM_S 16 +#define PF_RES_DATA_1_PF_QPC_BT_NUM_M GENMASK(27, 16) + +#define PF_RES_DATA_2_PF_SRQC_BT_IDX_S 0 +#define PF_RES_DATA_2_PF_SRQC_BT_IDX_M GENMASK(8, 0) + +#define PF_RES_DATA_2_PF_SRQC_BT_NUM_S 16 +#define PF_RES_DATA_2_PF_SRQC_BT_NUM_M GENMASK(25, 16) + +#define PF_RES_DATA_3_PF_CQC_BT_IDX_S 0 +#define PF_RES_DATA_3_PF_CQC_BT_IDX_M GENMASK(8, 0) + +#define PF_RES_DATA_3_PF_CQC_BT_NUM_S 16 +#define PF_RES_DATA_3_PF_CQC_BT_NUM_M GENMASK(25, 16) + +#define PF_RES_DATA_4_PF_MPT_BT_IDX_S 0 +#define PF_RES_DATA_4_PF_MPT_BT_IDX_M GENMASK(8, 0) + +#define PF_RES_DATA_4_PF_MPT_BT_NUM_S 16 +#define PF_RES_DATA_4_PF_MPT_BT_NUM_M GENMASK(25, 16) + +#define PF_RES_DATA_5_PF_EQC_BT_IDX_S 0 +#define PF_RES_DATA_5_PF_EQC_BT_IDX_M GENMASK(8, 0) + +#define PF_RES_DATA_5_PF_EQC_BT_NUM_S 16 +#define PF_RES_DATA_5_PF_EQC_BT_NUM_M GENMASK(25, 16) + +struct hns_roce_vf_res_a { + u32 vf_id; + u32 vf_qpc_bt_idx_num; + u32 vf_srqc_bt_idx_num; + u32 vf_cqc_bt_idx_num; + u32 vf_mpt_bt_idx_num; + u32 vf_eqc_bt_idx_num; +}; + +#define VF_RES_A_DATA_1_VF_QPC_BT_IDX_S 0 +#define VF_RES_A_DATA_1_VF_QPC_BT_IDX_M GENMASK(10, 0) + +#define VF_RES_A_DATA_1_VF_QPC_BT_NUM_S 16 +#define VF_RES_A_DATA_1_VF_QPC_BT_NUM_M GENMASK(27, 16) + +#define VF_RES_A_DATA_2_VF_SRQC_BT_IDX_S 0 +#define VF_RES_A_DATA_2_VF_SRQC_BT_IDX_M GENMASK(8, 0) + +#define VF_RES_A_DATA_2_VF_SRQC_BT_NUM_S 16 +#define VF_RES_A_DATA_2_VF_SRQC_BT_NUM_M GENMASK(25, 16) + +#define VF_RES_A_DATA_3_VF_CQC_BT_IDX_S 0 +#define VF_RES_A_DATA_3_VF_CQC_BT_IDX_M GENMASK(8, 0) + +#define VF_RES_A_DATA_3_VF_CQC_BT_NUM_S 16 +#define VF_RES_A_DATA_3_VF_CQC_BT_NUM_M GENMASK(25, 16) + +#define VF_RES_A_DATA_4_VF_MPT_BT_IDX_S 0 +#define VF_RES_A_DATA_4_VF_MPT_BT_IDX_M GENMASK(8, 0) + +#define VF_RES_A_DATA_4_VF_MPT_BT_NUM_S 16 +#define VF_RES_A_DATA_4_VF_MPT_BT_NUM_M GENMASK(25, 16) + +#define VF_RES_A_DATA_5_VF_EQC_IDX_S 0 +#define VF_RES_A_DATA_5_VF_EQC_IDX_M GENMASK(8, 0) + +#define VF_RES_A_DATA_5_VF_EQC_NUM_S 16 +#define VF_RES_A_DATA_5_VF_EQC_NUM_M GENMASK(25, 16) + +struct hns_roce_vf_res_b { + u32 rsv0; + u32 vf_smac_idx_num; + u32 vf_sgid_idx_num; + u32 vf_qid_idx_sl_num; + u32 rsv[2]; +}; + +#define VF_RES_B_DATA_0_VF_ID_S 0 +#define VF_RES_B_DATA_0_VF_ID_M GENMASK(7, 0) + +#define VF_RES_B_DATA_1_VF_SMAC_IDX_S 0 +#define VF_RES_B_DATA_1_VF_SMAC_IDX_M GENMASK(7, 0) + +#define VF_RES_B_DATA_1_VF_SMAC_NUM_S 8 +#define VF_RES_B_DATA_1_VF_SMAC_NUM_M GENMASK(16, 8) + +#define VF_RES_B_DATA_2_VF_SGID_IDX_S 0 +#define VF_RES_B_DATA_2_VF_SGID_IDX_M GENMASK(7, 0) + +#define VF_RES_B_DATA_2_VF_SGID_NUM_S 8 +#define VF_RES_B_DATA_2_VF_SGID_NUM_M GENMASK(16, 8) + +#define VF_RES_B_DATA_3_VF_QID_IDX_S 0 +#define VF_RES_B_DATA_3_VF_QID_IDX_M GENMASK(9, 0) + +#define VF_RES_B_DATA_3_VF_SL_NUM_S 16 +#define VF_RES_B_DATA_3_VF_SL_NUM_M GENMASK(19, 16) + struct hns_roce_cmq_desc { u16 opcode; u16 flag; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 4f5a6fd7219d..db6593e15e02 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -202,7 +202,7 @@ static int hns_roce_query_device(struct ib_device *ib_dev, props->max_qp_wr = hr_dev->caps.max_wqes; props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_RC_RNR_NAK_GEN; - props->max_sge = hr_dev->caps.max_sq_sg; + props->max_sge = max(hr_dev->caps.max_sq_sg, hr_dev->caps.max_rq_sg); props->max_sge_rd = 1; props->max_cq = hr_dev->caps.num_cqs; props->max_cqe = hr_dev->caps.max_cqes; @@ -686,7 +686,11 @@ int hns_roce_init(struct hns_roce_dev *hr_dev) } } - hr_dev->hw->hw_profile(hr_dev); + ret = hr_dev->hw->hw_profile(hr_dev); + if (ret) { + dev_err(dev, "Get RoCE engine profile failed!\n"); + goto error_failed_cmd_init; + } ret = hns_roce_cmd_init(hr_dev); if (ret) { From a680f2f376fe70bad85f350059be995c9dc2a802 Mon Sep 17 00:00:00 2001 From: "Wei Hu(Xavier)" Date: Wed, 30 Aug 2017 17:23:05 +0800 Subject: [PATCH 017/296] RDMA/hns: Add mailbox's implementation for hip08 RoCE driver In hip08 SoC, the hardware implementation of mailbox command has changed with hip06 SoC. As a result, it adjusts the architecture of the command code and implements the interfaces of mailbox for hip08 SoC. Signed-off-by: Lijun Ou Signed-off-by: Shaobo Xu Signed-off-by: Wei Hu (Xavier) Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_cmd.c | 98 ++------------------- drivers/infiniband/hw/hns/hns_roce_common.h | 1 + drivers/infiniband/hw/hns/hns_roce_device.h | 5 +- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 75 ++++++++++++++++ drivers/infiniband/hw/hns/hns_roce_hw_v1.h | 5 ++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 84 ++++++++++++++++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 20 +++++ 7 files changed, 195 insertions(+), 93 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c index 67ad3e9f0e48..1085cb249bc1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.c +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c @@ -38,69 +38,7 @@ #define CMD_POLL_TOKEN 0xffff #define CMD_MAX_NUM 32 -#define STATUS_MASK 0xff #define CMD_TOKEN_MASK 0x1f -#define GO_BIT_TIMEOUT_MSECS 10000 - -enum { - HCR_TOKEN_OFFSET = 0x14, - HCR_STATUS_OFFSET = 0x18, - HCR_GO_BIT = 15, -}; - -static int cmd_pending(struct hns_roce_dev *hr_dev) -{ - u32 status = readl(hr_dev->cmd.hcr + HCR_TOKEN_OFFSET); - - return (!!(status & (1 << HCR_GO_BIT))); -} - -/* this function should be serialized with "hcr_mutex" */ -static int __hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev, - u64 in_param, u64 out_param, - u32 in_modifier, u8 op_modifier, u16 op, - u16 token, int event) -{ - struct hns_roce_cmdq *cmd = &hr_dev->cmd; - struct device *dev = &hr_dev->pdev->dev; - u32 __iomem *hcr = (u32 *)cmd->hcr; - int ret = -EAGAIN; - unsigned long end; - u32 val = 0; - - end = msecs_to_jiffies(GO_BIT_TIMEOUT_MSECS) + jiffies; - while (cmd_pending(hr_dev)) { - if (time_after(jiffies, end)) { - dev_dbg(dev, "jiffies=%d end=%d\n", (int)jiffies, - (int)end); - goto out; - } - cond_resched(); - } - - roce_set_field(val, ROCEE_MB6_ROCEE_MB_CMD_M, ROCEE_MB6_ROCEE_MB_CMD_S, - op); - roce_set_field(val, ROCEE_MB6_ROCEE_MB_CMD_MDF_M, - ROCEE_MB6_ROCEE_MB_CMD_MDF_S, op_modifier); - roce_set_bit(val, ROCEE_MB6_ROCEE_MB_EVENT_S, event); - roce_set_bit(val, ROCEE_MB6_ROCEE_MB_HW_RUN_S, 1); - roce_set_field(val, ROCEE_MB6_ROCEE_MB_TOKEN_M, - ROCEE_MB6_ROCEE_MB_TOKEN_S, token); - - __raw_writeq(cpu_to_le64(in_param), hcr + 0); - __raw_writeq(cpu_to_le64(out_param), hcr + 2); - __raw_writel(cpu_to_le32(in_modifier), hcr + 4); - /* Memory barrier */ - wmb(); - - __raw_writel(cpu_to_le32(val), hcr + 5); - - mmiowb(); - ret = 0; - -out: - return ret; -} static int hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, u32 in_modifier, @@ -108,12 +46,11 @@ static int hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev, u64 in_param, int event) { struct hns_roce_cmdq *cmd = &hr_dev->cmd; - int ret = -EAGAIN; + int ret; mutex_lock(&cmd->hcr_mutex); - ret = __hns_roce_cmd_mbox_post_hw(hr_dev, in_param, out_param, - in_modifier, op_modifier, op, token, - event); + ret = hr_dev->hw->post_mbox(hr_dev, in_param, out_param, in_modifier, + op_modifier, op, token, event); mutex_unlock(&cmd->hcr_mutex); return ret; @@ -126,9 +63,6 @@ static int __hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param, unsigned long timeout) { struct device *dev = hr_dev->dev; - u8 __iomem *hcr = hr_dev->cmd.hcr; - unsigned long end = 0; - u32 status = 0; int ret; ret = hns_roce_cmd_mbox_post_hw(hr_dev, in_param, out_param, @@ -136,29 +70,10 @@ static int __hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param, CMD_POLL_TOKEN, 0); if (ret) { dev_err(dev, "[cmd_poll]hns_roce_cmd_mbox_post_hw failed\n"); - goto out; + return ret; } - end = msecs_to_jiffies(timeout) + jiffies; - while (cmd_pending(hr_dev) && time_before(jiffies, end)) - cond_resched(); - - if (cmd_pending(hr_dev)) { - dev_err(dev, "[cmd_poll]hw run cmd TIMEDOUT!\n"); - ret = -ETIMEDOUT; - goto out; - } - - status = le32_to_cpu((__force __be32) - __raw_readl(hcr + HCR_STATUS_OFFSET)); - if ((status & STATUS_MASK) != 0x1) { - dev_err(dev, "mailbox status 0x%x!\n", status); - ret = -EBUSY; - goto out; - } - -out: - return ret; + return hr_dev->hw->chk_mbox(hr_dev, timeout); } static int hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param, @@ -198,7 +113,7 @@ static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param, struct hns_roce_cmdq *cmd = &hr_dev->cmd; struct hns_roce_cmd_context *context; struct device *dev = hr_dev->dev; - int ret = 0; + int ret; spin_lock(&cmd->context_lock); WARN_ON(cmd->free_head < 0); @@ -280,7 +195,6 @@ int hns_roce_cmd_init(struct hns_roce_dev *hr_dev) hr_dev->cmd.use_events = 0; hr_dev->cmd.toggle = 1; hr_dev->cmd.max_cmds = CMD_MAX_NUM; - hr_dev->cmd.hcr = hr_dev->reg_base + ROCEE_MB1_REG; hr_dev->cmd.pool = dma_pool_create("hns_roce_cmd", dev, HNS_ROCE_MAILBOX_SIZE, HNS_ROCE_MAILBOX_SIZE, 0); diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h index 94381c2f581a..0c950f8e4249 100644 --- a/drivers/infiniband/hw/hns/hns_roce_common.h +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -341,6 +341,7 @@ #define ROCEE_BT_CMD_L_REG 0x200 #define ROCEE_MB1_REG 0x210 +#define ROCEE_MB6_REG 0x224 #define ROCEE_DB_SQ_L_0_REG 0x230 #define ROCEE_DB_OTHERS_L_0_REG 0x238 #define ROCEE_QP1C_CFG0_0_REG 0x270 diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 8ec6d6eed4dc..7c587504fa9f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -367,7 +367,6 @@ struct hns_roce_cmd_context { struct hns_roce_cmdq { struct dma_pool *pool; - u8 __iomem *hcr; struct mutex hcr_mutex; struct semaphore poll_sem; /* @@ -517,6 +516,10 @@ struct hns_roce_hw { int (*hw_profile)(struct hns_roce_dev *hr_dev); int (*hw_init)(struct hns_roce_dev *hr_dev); void (*hw_exit)(struct hns_roce_dev *hr_dev); + int (*post_mbox)(struct hns_roce_dev *hr_dev, u64 in_param, + u64 out_param, u32 in_modifier, u8 op_modifier, u16 op, + u16 token, int event); + int (*chk_mbox)(struct hns_roce_dev *hr_dev, unsigned long timeout); void (*set_gid)(struct hns_roce_dev *hr_dev, u8 port, int gid_index, union ib_gid *gid); void (*set_mac)(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 267e400fd527..2bf28169439e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -1619,6 +1619,79 @@ void hns_roce_v1_exit(struct hns_roce_dev *hr_dev) hns_roce_db_free(hr_dev); } +static int hns_roce_v1_cmd_pending(struct hns_roce_dev *hr_dev) +{ + u32 status = readl(hr_dev->reg_base + ROCEE_MB6_REG); + + return (!!(status & (1 << HCR_GO_BIT))); +} + +int hns_roce_v1_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, + u64 out_param, u32 in_modifier, u8 op_modifier, + u16 op, u16 token, int event) +{ + u32 *hcr = (u32 *)(hr_dev->reg_base + ROCEE_MB1_REG); + unsigned long end; + u32 val = 0; + + end = msecs_to_jiffies(GO_BIT_TIMEOUT_MSECS) + jiffies; + while (hns_roce_v1_cmd_pending(hr_dev)) { + if (time_after(jiffies, end)) { + dev_err(hr_dev->dev, "jiffies=%d end=%d\n", + (int)jiffies, (int)end); + return -EAGAIN; + } + cond_resched(); + } + + roce_set_field(val, ROCEE_MB6_ROCEE_MB_CMD_M, ROCEE_MB6_ROCEE_MB_CMD_S, + op); + roce_set_field(val, ROCEE_MB6_ROCEE_MB_CMD_MDF_M, + ROCEE_MB6_ROCEE_MB_CMD_MDF_S, op_modifier); + roce_set_bit(val, ROCEE_MB6_ROCEE_MB_EVENT_S, event); + roce_set_bit(val, ROCEE_MB6_ROCEE_MB_HW_RUN_S, 1); + roce_set_field(val, ROCEE_MB6_ROCEE_MB_TOKEN_M, + ROCEE_MB6_ROCEE_MB_TOKEN_S, token); + + __raw_writeq(cpu_to_le64(in_param), hcr + 0); + __raw_writeq(cpu_to_le64(out_param), hcr + 2); + __raw_writel(cpu_to_le32(in_modifier), hcr + 4); + /* Memory barrier */ + wmb(); + + __raw_writel(cpu_to_le32(val), hcr + 5); + + mmiowb(); + + return 0; +} + +static int hns_roce_v1_chk_mbox(struct hns_roce_dev *hr_dev, + unsigned long timeout) +{ + u8 __iomem *hcr = hr_dev->reg_base + ROCEE_MB1_REG; + unsigned long end = 0; + u32 status = 0; + + end = msecs_to_jiffies(timeout) + jiffies; + while (hns_roce_v1_cmd_pending(hr_dev) && time_before(jiffies, end)) + cond_resched(); + + if (hns_roce_v1_cmd_pending(hr_dev)) { + dev_err(hr_dev->dev, "[cmd_poll]hw run cmd TIMEDOUT!\n"); + return -ETIMEDOUT; + } + + status = le32_to_cpu((__force __be32) + __raw_readl(hcr + HCR_STATUS_OFFSET)); + if ((status & STATUS_MASK) != 0x1) { + dev_err(hr_dev->dev, "mailbox status 0x%x!\n", status); + return -EBUSY; + } + + return 0; +} + void hns_roce_v1_set_gid(struct hns_roce_dev *hr_dev, u8 port, int gid_index, union ib_gid *gid) { @@ -3849,6 +3922,8 @@ static const struct hns_roce_hw hns_roce_hw_v1 = { .hw_profile = hns_roce_v1_profile, .hw_init = hns_roce_v1_init, .hw_exit = hns_roce_v1_exit, + .post_mbox = hns_roce_v1_post_mbox, + .chk_mbox = hns_roce_v1_chk_mbox, .set_gid = hns_roce_v1_set_gid, .set_mac = hns_roce_v1_set_mac, .set_mtu = hns_roce_v1_set_mtu, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h index b213b5e6fef1..eb83ff3bfcad 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h @@ -948,6 +948,11 @@ struct hns_roce_qp_context { #define QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_M \ (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_S) +#define STATUS_MASK 0xff +#define GO_BIT_TIMEOUT_MSECS 10000 +#define HCR_STATUS_OFFSET 0x18 +#define HCR_GO_BIT 15 + struct hns_roce_rq_db { u32 u32_4; u32 u32_8; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index af3cb30f04f3..259af2e72695 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -553,10 +553,94 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) return 0; } +static int hns_roce_v2_cmd_pending(struct hns_roce_dev *hr_dev) +{ + u32 status = readl(hr_dev->reg_base + ROCEE_VF_MB_STATUS_REG); + + return status >> HNS_ROCE_HW_RUN_BIT_SHIFT; +} + +static int hns_roce_v2_cmd_complete(struct hns_roce_dev *hr_dev) +{ + u32 status = readl(hr_dev->reg_base + ROCEE_VF_MB_STATUS_REG); + + return status & HNS_ROCE_HW_MB_STATUS_MASK; +} + +static int hns_roce_v2_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, + u64 out_param, u32 in_modifier, u8 op_modifier, + u16 op, u16 token, int event) +{ + struct device *dev = hr_dev->dev; + u32 *hcr = (u32 *)(hr_dev->reg_base + ROCEE_VF_MB_CFG0_REG); + unsigned long end; + u32 val0 = 0; + u32 val1 = 0; + + end = msecs_to_jiffies(HNS_ROCE_V2_GO_BIT_TIMEOUT_MSECS) + jiffies; + while (hns_roce_v2_cmd_pending(hr_dev)) { + if (time_after(jiffies, end)) { + dev_dbg(dev, "jiffies=%d end=%d\n", (int)jiffies, + (int)end); + return -EAGAIN; + } + cond_resched(); + } + + roce_set_field(val0, HNS_ROCE_VF_MB4_TAG_MASK, + HNS_ROCE_VF_MB4_TAG_SHIFT, in_modifier); + roce_set_field(val0, HNS_ROCE_VF_MB4_CMD_MASK, + HNS_ROCE_VF_MB4_CMD_SHIFT, op); + roce_set_field(val1, HNS_ROCE_VF_MB5_EVENT_MASK, + HNS_ROCE_VF_MB5_EVENT_SHIFT, event); + roce_set_field(val1, HNS_ROCE_VF_MB5_TOKEN_MASK, + HNS_ROCE_VF_MB5_TOKEN_SHIFT, token); + + __raw_writeq(cpu_to_le64(in_param), hcr + 0); + __raw_writeq(cpu_to_le64(out_param), hcr + 2); + + /* Memory barrier */ + wmb(); + + __raw_writel(cpu_to_le32(val0), hcr + 4); + __raw_writel(cpu_to_le32(val1), hcr + 5); + + mmiowb(); + + return 0; +} + +static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev, + unsigned long timeout) +{ + struct device *dev = hr_dev->dev; + unsigned long end = 0; + u32 status; + + end = msecs_to_jiffies(timeout) + jiffies; + while (hns_roce_v2_cmd_pending(hr_dev) && time_before(jiffies, end)) + cond_resched(); + + if (hns_roce_v2_cmd_pending(hr_dev)) { + dev_err(dev, "[cmd_poll]hw run cmd TIMEDOUT!\n"); + return -ETIMEDOUT; + } + + status = hns_roce_v2_cmd_complete(hr_dev); + if (status != 0x1) { + dev_err(dev, "mailbox status 0x%x!\n", status); + return -EBUSY; + } + + return 0; +} + static const struct hns_roce_hw hns_roce_hw_v2 = { .cmq_init = hns_roce_v2_cmq_init, .cmq_exit = hns_roce_v2_cmq_exit, .hw_profile = hns_roce_v2_profile, + .post_mbox = hns_roce_v2_post_mbox, + .chk_mbox = hns_roce_v2_chk_mbox, }; static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 3dcc80096d53..b420ea99e3ae 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -244,6 +244,26 @@ struct hns_roce_cmq_desc { u32 data[6]; }; +#define ROCEE_VF_MB_CFG0_REG 0x40 +#define ROCEE_VF_MB_STATUS_REG 0x58 + +#define HNS_ROCE_V2_GO_BIT_TIMEOUT_MSECS 10000 + +#define HNS_ROCE_HW_RUN_BIT_SHIFT 31 +#define HNS_ROCE_HW_MB_STATUS_MASK 0xFF + +#define HNS_ROCE_VF_MB4_TAG_MASK 0xFFFFFF00 +#define HNS_ROCE_VF_MB4_TAG_SHIFT 8 + +#define HNS_ROCE_VF_MB4_CMD_MASK 0xFF +#define HNS_ROCE_VF_MB4_CMD_SHIFT 0 + +#define HNS_ROCE_VF_MB5_EVENT_MASK 0x10000 +#define HNS_ROCE_VF_MB5_EVENT_SHIFT 16 + +#define HNS_ROCE_VF_MB5_TOKEN_MASK 0xFFFF +#define HNS_ROCE_VF_MB5_TOKEN_SHIFT 0 + struct hns_roce_v2_cmq_ring { dma_addr_t desc_dma_addr; struct hns_roce_cmq_desc *desc; From a25d13cbe816a6f8a44382273d3fdd8276318777 Mon Sep 17 00:00:00 2001 From: Shaobo Xu Date: Wed, 30 Aug 2017 17:23:06 +0800 Subject: [PATCH 018/296] RDMA/hns: Add the interfaces to support multi hop addressing for the contexts in hip08 The contexts (QPC/MTPT/CQC/SRQC) in hip08 can support multi hop addressing. The address of context can be retrieved by the BT (Base Address Table) with multi hop addressing. The first hop BT BA can be retrieved from the RAM in the chip by the bt_idx and bt_num. This patch is to add the interfaces in HEM to support multi hop addressing for the contexts. Signed-off-by: Shaobo Xu Signed-off-by: Lijun Ou Signed-off-by: Wei Hu (Xavier) Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_device.h | 23 +- drivers/infiniband/hw/hns/hns_roce_hem.c | 578 +++++++++++++++++++- drivers/infiniband/hw/hns/hns_roce_hem.h | 23 + drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 2 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 13 + drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 3 + 6 files changed, 630 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 7c587504fa9f..4574a7bb8c7f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -78,6 +78,8 @@ #define HNS_ROCE_MAX_GID_NUM 16 #define HNS_ROCE_GID_SIZE 16 +#define HNS_ROCE_HOP_NUM_0 0xff + #define BITMAP_NO_RR 0 #define BITMAP_RR 1 @@ -232,6 +234,10 @@ struct hns_roce_hem_table { int lowmem; struct mutex mutex; struct hns_roce_hem **hem; + u64 **bt_l1; + dma_addr_t *bt_l1_dma_addr; + u64 **bt_l0; + dma_addr_t *bt_l0_dma_addr; }; struct hns_roce_mtt { @@ -507,6 +513,18 @@ struct hns_roce_caps { u32 srqc_bt_num; u32 cqc_bt_num; u32 mpt_bt_num; + u32 qpc_ba_pg_sz; + u32 qpc_buf_pg_sz; + u32 qpc_hop_num; + u32 srqc_ba_pg_sz; + u32 srqc_buf_pg_sz; + u32 srqc_hop_num; + u32 cqc_ba_pg_sz; + u32 cqc_buf_pg_sz; + u32 cqc_hop_num; + u32 mpt_ba_pg_sz; + u32 mpt_buf_pg_sz; + u32 mpt_hop_num; }; struct hns_roce_hw { @@ -530,8 +548,11 @@ struct hns_roce_hw { void (*write_cqc)(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts, dma_addr_t dma_handle, int nent, u32 vector); + int (*set_hem)(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, int obj, int step_idx); int (*clear_hem)(struct hns_roce_dev *hr_dev, - struct hns_roce_hem_table *table, int obj); + struct hns_roce_hem_table *table, int obj, + int step_idx); int (*query_qp)(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); int (*modify_qp)(struct ib_qp *ibqp, const struct ib_qp_attr *attr, diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 0ab49122e8c9..9bc8c6bd301d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -42,8 +42,140 @@ #define DMA_ADDR_T_SHIFT 12 #define BT_BA_SHIFT 32 -struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev, int npages, - gfp_t gfp_mask) +bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type) +{ + if ((hr_dev->caps.qpc_hop_num && type == HEM_TYPE_QPC) || + (hr_dev->caps.mpt_hop_num && type == HEM_TYPE_MTPT) || + (hr_dev->caps.cqc_hop_num && type == HEM_TYPE_CQC) || + (hr_dev->caps.srqc_hop_num && type == HEM_TYPE_SRQC)) + return true; + + return false; +} +EXPORT_SYMBOL_GPL(hns_roce_check_whether_mhop); + +static bool hns_roce_check_hem_null(struct hns_roce_hem **hem, u64 start_idx, + u32 bt_chunk_num) +{ + int i; + + for (i = 0; i < bt_chunk_num; i++) + if (hem[start_idx + i]) + return false; + + return true; +} + +static bool hns_roce_check_bt_null(u64 **bt, u64 start_idx, u32 bt_chunk_num) +{ + int i; + + for (i = 0; i < bt_chunk_num; i++) + if (bt[start_idx + i]) + return false; + + return true; +} + +static int hns_roce_get_bt_num(u32 table_type, u32 hop_num) +{ + if (check_whether_bt_num_3(table_type, hop_num)) + return 3; + else if (check_whether_bt_num_2(table_type, hop_num)) + return 2; + else if (check_whether_bt_num_1(table_type, hop_num)) + return 1; + else + return 0; +} + +int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, unsigned long *obj, + struct hns_roce_hem_mhop *mhop) +{ + struct device *dev = hr_dev->dev; + u32 chunk_ba_num; + u32 table_idx; + u32 bt_num; + u32 chunk_size; + + switch (table->type) { + case HEM_TYPE_QPC: + mhop->buf_chunk_size = 1 << (hr_dev->caps.qpc_buf_pg_sz + + PAGE_SHIFT); + mhop->bt_chunk_size = 1 << (hr_dev->caps.qpc_ba_pg_sz + + PAGE_SHIFT); + mhop->ba_l0_num = hr_dev->caps.qpc_bt_num; + mhop->hop_num = hr_dev->caps.qpc_hop_num; + break; + case HEM_TYPE_MTPT: + mhop->buf_chunk_size = 1 << (hr_dev->caps.mpt_buf_pg_sz + + PAGE_SHIFT); + mhop->bt_chunk_size = 1 << (hr_dev->caps.mpt_ba_pg_sz + + PAGE_SHIFT); + mhop->ba_l0_num = hr_dev->caps.mpt_bt_num; + mhop->hop_num = hr_dev->caps.mpt_hop_num; + break; + case HEM_TYPE_CQC: + mhop->buf_chunk_size = 1 << (hr_dev->caps.cqc_buf_pg_sz + + PAGE_SHIFT); + mhop->bt_chunk_size = 1 << (hr_dev->caps.cqc_ba_pg_sz + + PAGE_SHIFT); + mhop->ba_l0_num = hr_dev->caps.cqc_bt_num; + mhop->hop_num = hr_dev->caps.cqc_hop_num; + break; + case HEM_TYPE_SRQC: + mhop->buf_chunk_size = 1 << (hr_dev->caps.srqc_buf_pg_sz + + PAGE_SHIFT); + mhop->bt_chunk_size = 1 << (hr_dev->caps.srqc_ba_pg_sz + + PAGE_SHIFT); + mhop->ba_l0_num = hr_dev->caps.srqc_bt_num; + mhop->hop_num = hr_dev->caps.srqc_hop_num; + break; + default: + dev_err(dev, "Table %d not support multi-hop addressing!\n", + table->type); + return -EINVAL; + } + + if (!obj) + return 0; + + /* QPC/MTPT/CQC/SRQC alloc hem for buffer pages. */ + bt_num = hns_roce_get_bt_num(table->type, mhop->hop_num); + chunk_ba_num = mhop->bt_chunk_size / 8; + chunk_size = mhop->buf_chunk_size; + table_idx = (*obj & (table->num_obj - 1)) / + (chunk_size / table->obj_size); + switch (bt_num) { + case 3: + mhop->l2_idx = table_idx & (chunk_ba_num - 1); + mhop->l1_idx = table_idx / chunk_ba_num & (chunk_ba_num - 1); + mhop->l0_idx = table_idx / chunk_ba_num / chunk_ba_num; + break; + case 2: + mhop->l1_idx = table_idx & (chunk_ba_num - 1); + mhop->l0_idx = table_idx / chunk_ba_num; + break; + case 1: + mhop->l0_idx = table_idx; + break; + default: + dev_err(dev, "Table %d not support hop_num = %d!\n", + table->type, mhop->hop_num); + return -EINVAL; + } + if (mhop->l0_idx >= mhop->ba_l0_num) + mhop->l0_idx %= mhop->ba_l0_num; + + return 0; +} +EXPORT_SYMBOL_GPL(hns_roce_calc_hem_mhop); + +static struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev, + int npages, + unsigned long hem_alloc_size, + gfp_t gfp_mask) { struct hns_roce_hem_chunk *chunk = NULL; struct hns_roce_hem *hem; @@ -61,7 +193,7 @@ struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev, int npages, hem->refcount = 0; INIT_LIST_HEAD(&hem->chunk_list); - order = get_order(HNS_ROCE_HEM_ALLOC_SIZE); + order = get_order(hem_alloc_size); while (npages > 0) { if (!chunk) { @@ -209,6 +341,169 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev, return ret; } +int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, + unsigned long obj) +{ + struct device *dev = hr_dev->dev; + struct hns_roce_hem_mhop mhop; + struct hns_roce_hem_iter iter; + u32 buf_chunk_size; + u32 bt_chunk_size; + u32 chunk_ba_num; + u32 hop_num; + u32 size; + u32 bt_num; + u64 hem_idx; + u64 bt_l1_idx = 0; + u64 bt_l0_idx = 0; + u64 bt_ba; + unsigned long mhop_obj = obj; + int bt_l1_allocated = 0; + int bt_l0_allocated = 0; + int step_idx; + int ret; + + ret = hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, &mhop); + if (ret) + return ret; + + buf_chunk_size = mhop.buf_chunk_size; + bt_chunk_size = mhop.bt_chunk_size; + hop_num = mhop.hop_num; + chunk_ba_num = bt_chunk_size / 8; + + bt_num = hns_roce_get_bt_num(table->type, hop_num); + switch (bt_num) { + case 3: + hem_idx = mhop.l0_idx * chunk_ba_num * chunk_ba_num + + mhop.l1_idx * chunk_ba_num + mhop.l2_idx; + bt_l1_idx = mhop.l0_idx * chunk_ba_num + mhop.l1_idx; + bt_l0_idx = mhop.l0_idx; + break; + case 2: + hem_idx = mhop.l0_idx * chunk_ba_num + mhop.l1_idx; + bt_l0_idx = mhop.l0_idx; + break; + case 1: + hem_idx = mhop.l0_idx; + break; + default: + dev_err(dev, "Table %d not support hop_num = %d!\n", + table->type, hop_num); + return -EINVAL; + } + + mutex_lock(&table->mutex); + + if (table->hem[hem_idx]) { + ++table->hem[hem_idx]->refcount; + goto out; + } + + /* alloc L1 BA's chunk */ + if ((check_whether_bt_num_3(table->type, hop_num) || + check_whether_bt_num_2(table->type, hop_num)) && + !table->bt_l0[bt_l0_idx]) { + table->bt_l0[bt_l0_idx] = dma_alloc_coherent(dev, bt_chunk_size, + &(table->bt_l0_dma_addr[bt_l0_idx]), + GFP_KERNEL); + if (!table->bt_l0[bt_l0_idx]) { + ret = -ENOMEM; + goto out; + } + bt_l0_allocated = 1; + + /* set base address to hardware */ + if (table->type < HEM_TYPE_MTT) { + step_idx = 0; + if (hr_dev->hw->set_hem(hr_dev, table, obj, step_idx)) { + ret = -ENODEV; + dev_err(dev, "set HEM base address to HW failed!\n"); + goto err_dma_alloc_l1; + } + } + } + + /* alloc L2 BA's chunk */ + if (check_whether_bt_num_3(table->type, hop_num) && + !table->bt_l1[bt_l1_idx]) { + table->bt_l1[bt_l1_idx] = dma_alloc_coherent(dev, bt_chunk_size, + &(table->bt_l1_dma_addr[bt_l1_idx]), + GFP_KERNEL); + if (!table->bt_l1[bt_l1_idx]) { + ret = -ENOMEM; + goto err_dma_alloc_l1; + } + bt_l1_allocated = 1; + *(table->bt_l0[bt_l0_idx] + mhop.l1_idx) = + table->bt_l1_dma_addr[bt_l1_idx]; + + /* set base address to hardware */ + step_idx = 1; + if (hr_dev->hw->set_hem(hr_dev, table, obj, step_idx)) { + ret = -ENODEV; + dev_err(dev, "set HEM base address to HW failed!\n"); + goto err_alloc_hem_buf; + } + } + + /* alloc buffer space chunk for QPC/MTPT/CQC/SRQC. */ + size = buf_chunk_size; + table->hem[hem_idx] = hns_roce_alloc_hem(hr_dev, + size >> PAGE_SHIFT, + size, + (table->lowmem ? GFP_KERNEL : + GFP_HIGHUSER) | __GFP_NOWARN); + if (!table->hem[hem_idx]) { + ret = -ENOMEM; + goto err_alloc_hem_buf; + } + + hns_roce_hem_first(table->hem[hem_idx], &iter); + bt_ba = hns_roce_hem_addr(&iter); + + if (table->type < HEM_TYPE_MTT) { + if (hop_num == 2) { + *(table->bt_l1[bt_l1_idx] + mhop.l2_idx) = bt_ba; + step_idx = 2; + } else if (hop_num == 1) { + *(table->bt_l0[bt_l0_idx] + mhop.l1_idx) = bt_ba; + step_idx = 1; + } else if (hop_num == HNS_ROCE_HOP_NUM_0) { + step_idx = 0; + } + + /* set HEM base address to hardware */ + if (hr_dev->hw->set_hem(hr_dev, table, obj, step_idx)) { + ret = -ENODEV; + dev_err(dev, "set HEM base address to HW failed!\n"); + goto err_alloc_hem_buf; + } + } + + ++table->hem[hem_idx]->refcount; + goto out; + +err_alloc_hem_buf: + if (bt_l1_allocated) { + dma_free_coherent(dev, bt_chunk_size, table->bt_l1[bt_l1_idx], + table->bt_l1_dma_addr[bt_l1_idx]); + table->bt_l1[bt_l1_idx] = NULL; + } + +err_dma_alloc_l1: + if (bt_l0_allocated) { + dma_free_coherent(dev, bt_chunk_size, table->bt_l0[bt_l0_idx], + table->bt_l0_dma_addr[bt_l0_idx]); + table->bt_l0[bt_l0_idx] = NULL; + } + +out: + mutex_unlock(&table->mutex); + return ret; +} + int hns_roce_table_get(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, unsigned long obj) { @@ -216,6 +511,9 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev, int ret = 0; unsigned long i; + if (hns_roce_check_whether_mhop(hr_dev, table->type)) + return hns_roce_table_mhop_get(hr_dev, table, obj); + i = (obj & (table->num_obj - 1)) / (HNS_ROCE_TABLE_CHUNK_SIZE / table->obj_size); @@ -228,6 +526,7 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev, table->hem[i] = hns_roce_alloc_hem(hr_dev, HNS_ROCE_TABLE_CHUNK_SIZE >> PAGE_SHIFT, + HNS_ROCE_HEM_ALLOC_SIZE, (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) | __GFP_NOWARN); if (!table->hem[i]) { @@ -248,12 +547,128 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev, return ret; } +void hns_roce_table_mhop_put(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, + unsigned long obj, + int check_refcount) +{ + struct device *dev = hr_dev->dev; + struct hns_roce_hem_mhop mhop; + unsigned long mhop_obj = obj; + u32 bt_chunk_size; + u32 chunk_ba_num; + u32 hop_num; + u32 start_idx; + u32 bt_num; + u64 hem_idx; + u64 bt_l1_idx = 0; + int ret; + + ret = hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, &mhop); + if (ret) + return; + + bt_chunk_size = mhop.bt_chunk_size; + hop_num = mhop.hop_num; + chunk_ba_num = bt_chunk_size / 8; + + bt_num = hns_roce_get_bt_num(table->type, hop_num); + switch (bt_num) { + case 3: + hem_idx = mhop.l0_idx * chunk_ba_num * chunk_ba_num + + mhop.l1_idx * chunk_ba_num + mhop.l2_idx; + bt_l1_idx = mhop.l0_idx * chunk_ba_num + mhop.l1_idx; + break; + case 2: + hem_idx = mhop.l0_idx * chunk_ba_num + mhop.l1_idx; + break; + case 1: + hem_idx = mhop.l0_idx; + break; + default: + dev_err(dev, "Table %d not support hop_num = %d!\n", + table->type, hop_num); + return; + } + + mutex_lock(&table->mutex); + + if (check_refcount && (--table->hem[hem_idx]->refcount > 0)) { + mutex_unlock(&table->mutex); + return; + } + + if (table->type < HEM_TYPE_MTT && hop_num == 1) { + if (hr_dev->hw->clear_hem(hr_dev, table, obj, 1)) + dev_warn(dev, "Clear HEM base address failed.\n"); + } else if (table->type < HEM_TYPE_MTT && hop_num == 2) { + if (hr_dev->hw->clear_hem(hr_dev, table, obj, 2)) + dev_warn(dev, "Clear HEM base address failed.\n"); + } else if (table->type < HEM_TYPE_MTT && + hop_num == HNS_ROCE_HOP_NUM_0) { + if (hr_dev->hw->clear_hem(hr_dev, table, obj, 0)) + dev_warn(dev, "Clear HEM base address failed.\n"); + } + + /* free buffer space chunk for QPC/MTPT/CQC/SRQC. */ + hns_roce_free_hem(hr_dev, table->hem[hem_idx]); + table->hem[hem_idx] = NULL; + + if (check_whether_bt_num_2(table->type, hop_num)) { + start_idx = mhop.l0_idx * chunk_ba_num; + if (hns_roce_check_hem_null(table->hem, start_idx, + chunk_ba_num)) { + if (table->type < HEM_TYPE_MTT && + hr_dev->hw->clear_hem(hr_dev, table, obj, 0)) + dev_warn(dev, "Clear HEM base address failed.\n"); + + dma_free_coherent(dev, bt_chunk_size, + table->bt_l0[mhop.l0_idx], + table->bt_l0_dma_addr[mhop.l0_idx]); + table->bt_l0[mhop.l0_idx] = NULL; + } + } else if (check_whether_bt_num_3(table->type, hop_num)) { + start_idx = mhop.l0_idx * chunk_ba_num * chunk_ba_num + + mhop.l1_idx * chunk_ba_num; + if (hns_roce_check_hem_null(table->hem, start_idx, + chunk_ba_num)) { + if (hr_dev->hw->clear_hem(hr_dev, table, obj, 1)) + dev_warn(dev, "Clear HEM base address failed.\n"); + + dma_free_coherent(dev, bt_chunk_size, + table->bt_l1[bt_l1_idx], + table->bt_l1_dma_addr[bt_l1_idx]); + table->bt_l1[bt_l1_idx] = NULL; + + start_idx = mhop.l0_idx * chunk_ba_num; + if (hns_roce_check_bt_null(table->bt_l1, start_idx, + chunk_ba_num)) { + if (hr_dev->hw->clear_hem(hr_dev, table, obj, + 0)) + dev_warn(dev, "Clear HEM base address failed.\n"); + + dma_free_coherent(dev, bt_chunk_size, + table->bt_l0[mhop.l0_idx], + table->bt_l0_dma_addr[mhop.l0_idx]); + table->bt_l0[mhop.l0_idx] = NULL; + } + } + } + + mutex_unlock(&table->mutex); +} + void hns_roce_table_put(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, unsigned long obj) { struct device *dev = hr_dev->dev; unsigned long i; + if (hns_roce_check_whether_mhop(hr_dev, table->type)) { + hns_roce_table_mhop_put(hr_dev, table, obj, 1); + return; + } + i = (obj & (table->num_obj - 1)) / (HNS_ROCE_TABLE_CHUNK_SIZE / table->obj_size); @@ -261,7 +676,7 @@ void hns_roce_table_put(struct hns_roce_dev *hr_dev, if (--table->hem[i]->refcount == 0) { /* Clear HEM base address */ - if (hr_dev->hw->clear_hem(hr_dev, table, obj)) + if (hr_dev->hw->clear_hem(hr_dev, table, obj, 0)) dev_warn(dev, "Clear HEM base address failed.\n"); hns_roce_free_hem(hr_dev, table->hem[i]); @@ -357,15 +772,105 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, unsigned long obj_size, unsigned long nobj, int use_lowmem) { + struct device *dev = hr_dev->dev; unsigned long obj_per_chunk; unsigned long num_hem; - obj_per_chunk = HNS_ROCE_TABLE_CHUNK_SIZE / obj_size; - num_hem = (nobj + obj_per_chunk - 1) / obj_per_chunk; + if (!hns_roce_check_whether_mhop(hr_dev, type)) { + obj_per_chunk = HNS_ROCE_TABLE_CHUNK_SIZE / obj_size; + num_hem = (nobj + obj_per_chunk - 1) / obj_per_chunk; - table->hem = kcalloc(num_hem, sizeof(*table->hem), GFP_KERNEL); - if (!table->hem) - return -ENOMEM; + table->hem = kcalloc(num_hem, sizeof(*table->hem), GFP_KERNEL); + if (!table->hem) + return -ENOMEM; + } else { + unsigned long buf_chunk_size; + unsigned long bt_chunk_size; + unsigned long bt_chunk_num; + unsigned long num_bt_l0; + u32 hop_num; + + switch (type) { + case HEM_TYPE_QPC: + buf_chunk_size = 1 << (hr_dev->caps.qpc_buf_pg_sz + + PAGE_SHIFT); + bt_chunk_size = 1 << (hr_dev->caps.qpc_ba_pg_sz + + PAGE_SHIFT); + num_bt_l0 = hr_dev->caps.qpc_bt_num; + hop_num = hr_dev->caps.qpc_hop_num; + break; + case HEM_TYPE_MTPT: + buf_chunk_size = 1 << (hr_dev->caps.mpt_buf_pg_sz + + PAGE_SHIFT); + bt_chunk_size = 1 << (hr_dev->caps.mpt_ba_pg_sz + + PAGE_SHIFT); + num_bt_l0 = hr_dev->caps.mpt_bt_num; + hop_num = hr_dev->caps.mpt_hop_num; + break; + case HEM_TYPE_CQC: + buf_chunk_size = 1 << (hr_dev->caps.cqc_buf_pg_sz + + PAGE_SHIFT); + bt_chunk_size = 1 << (hr_dev->caps.cqc_ba_pg_sz + + PAGE_SHIFT); + num_bt_l0 = hr_dev->caps.cqc_bt_num; + hop_num = hr_dev->caps.cqc_hop_num; + break; + case HEM_TYPE_SRQC: + buf_chunk_size = 1 << (hr_dev->caps.srqc_buf_pg_sz + + PAGE_SHIFT); + bt_chunk_size = 1 << (hr_dev->caps.srqc_ba_pg_sz + + PAGE_SHIFT); + num_bt_l0 = hr_dev->caps.srqc_bt_num; + hop_num = hr_dev->caps.srqc_hop_num; + break; + default: + dev_err(dev, + "Table %d not support to init hem table here!\n", + type); + return -EINVAL; + } + obj_per_chunk = buf_chunk_size / obj_size; + num_hem = (nobj + obj_per_chunk - 1) / obj_per_chunk; + bt_chunk_num = bt_chunk_size / 8; + + table->hem = kcalloc(num_hem, sizeof(*table->hem), + GFP_KERNEL); + if (!table->hem) + goto err_kcalloc_hem_buf; + + if (check_whether_bt_num_3(table->type, hop_num)) { + unsigned long num_bt_l1; + + num_bt_l1 = (num_hem + bt_chunk_num - 1) / + bt_chunk_num; + table->bt_l1 = kcalloc(num_bt_l1, + sizeof(*table->bt_l1), + GFP_KERNEL); + if (!table->bt_l1) + goto err_kcalloc_bt_l1; + + table->bt_l1_dma_addr = kcalloc(num_bt_l1, + sizeof(*table->bt_l1_dma_addr), + GFP_KERNEL); + + if (!table->bt_l1_dma_addr) + goto err_kcalloc_l1_dma; + } + + if (check_whether_bt_num_2(table->type, hop_num) || + check_whether_bt_num_3(table->type, hop_num)) { + table->bt_l0 = kcalloc(num_bt_l0, sizeof(*table->bt_l0), + GFP_KERNEL); + if (!table->bt_l0) + goto err_kcalloc_bt_l0; + + table->bt_l0_dma_addr = kcalloc(num_bt_l0, + sizeof(*table->bt_l0_dma_addr), + GFP_KERNEL); + if (!table->bt_l0_dma_addr) + goto err_kcalloc_l0_dma; + } + } table->type = type; table->num_hem = num_hem; @@ -375,6 +880,54 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, mutex_init(&table->mutex); return 0; + +err_kcalloc_l0_dma: + kfree(table->bt_l0); + table->bt_l0 = NULL; + +err_kcalloc_bt_l0: + kfree(table->bt_l1_dma_addr); + table->bt_l1_dma_addr = NULL; + +err_kcalloc_l1_dma: + kfree(table->bt_l1); + table->bt_l1 = NULL; + +err_kcalloc_bt_l1: + kfree(table->hem); + table->hem = NULL; + +err_kcalloc_hem_buf: + return -ENOMEM; +} + +void hns_roce_cleanup_mhop_hem_table(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table) +{ + struct hns_roce_hem_mhop mhop; + u32 buf_chunk_size; + int i; + u64 obj; + + hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop); + buf_chunk_size = mhop.buf_chunk_size; + + for (i = 0; i < table->num_hem; ++i) { + obj = i * buf_chunk_size / table->obj_size; + if (table->hem[i]) + hns_roce_table_mhop_put(hr_dev, table, obj, 0); + } + + kfree(table->hem); + table->hem = NULL; + kfree(table->bt_l1); + table->bt_l1 = NULL; + kfree(table->bt_l1_dma_addr); + table->bt_l1_dma_addr = NULL; + kfree(table->bt_l0); + table->bt_l0 = NULL; + kfree(table->bt_l0_dma_addr); + table->bt_l0_dma_addr = NULL; } void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, @@ -383,10 +936,15 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, struct device *dev = hr_dev->dev; unsigned long i; + if (hns_roce_check_whether_mhop(hr_dev, table->type)) { + hns_roce_cleanup_mhop_hem_table(hr_dev, table); + return; + } + for (i = 0; i < table->num_hem; ++i) if (table->hem[i]) { if (hr_dev->hw->clear_hem(hr_dev, table, - i * HNS_ROCE_TABLE_CHUNK_SIZE / table->obj_size)) + i * HNS_ROCE_TABLE_CHUNK_SIZE / table->obj_size, 0)) dev_err(dev, "Clear HEM base address failed.\n"); hns_roce_free_hem(hr_dev, table->hem[i]); diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index 435748858252..b064ef5fad08 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -54,6 +54,15 @@ enum { ((256 - sizeof(struct list_head) - 2 * sizeof(int)) / \ (sizeof(struct scatterlist))) +#define check_whether_bt_num_3(type, hop_num) \ + (type < HEM_TYPE_MTT && hop_num == 2) + +#define check_whether_bt_num_2(type, hop_num) \ + (type < HEM_TYPE_MTT && hop_num == 1) + +#define check_whether_bt_num_1(type, hop_num) \ + (type < HEM_TYPE_MTT && hop_num == HNS_ROCE_HOP_NUM_0) + enum { HNS_ROCE_HEM_PAGE_SHIFT = 12, HNS_ROCE_HEM_PAGE_SIZE = 1 << HNS_ROCE_HEM_PAGE_SHIFT, @@ -77,6 +86,16 @@ struct hns_roce_hem_iter { int page_idx; }; +struct hns_roce_hem_mhop { + u32 hop_num; + u32 buf_chunk_size; + u32 bt_chunk_size; + u32 ba_l0_num; + u32 l0_idx;/* level 0 base address table index */ + u32 l1_idx;/* level 1 base address table index */ + u32 l2_idx;/* level 2 base address table index */ +}; + void hns_roce_free_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem *hem); int hns_roce_table_get(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, unsigned long obj); @@ -97,6 +116,10 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table); void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev); +int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, unsigned long *obj, + struct hns_roce_hem_mhop *mhop); +bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type); static inline void hns_roce_hem_first(struct hns_roce_hem *hem, struct hns_roce_hem_iter *iter) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 2bf28169439e..50dee130737e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -2356,7 +2356,7 @@ int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) } int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev, - struct hns_roce_hem_table *table, int obj) + struct hns_roce_hem_table *table, int obj, int step_idx) { struct device *dev = &hr_dev->pdev->dev; struct hns_roce_v1_priv *priv; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 259af2e72695..5127edcec834 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -545,6 +545,19 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->reserved_uars = 0; caps->reserved_cqs = 0; + caps->qpc_ba_pg_sz = 0; + caps->qpc_buf_pg_sz = 0; + caps->qpc_hop_num = HNS_ROCE_CONTEXT_HOP_NUM; + caps->srqc_ba_pg_sz = 0; + caps->srqc_buf_pg_sz = 0; + caps->srqc_hop_num = HNS_ROCE_HOP_NUM_0; + caps->cqc_ba_pg_sz = 0; + caps->cqc_buf_pg_sz = 0; + caps->cqc_hop_num = HNS_ROCE_CONTEXT_HOP_NUM; + caps->mpt_ba_pg_sz = 0; + caps->mpt_buf_pg_sz = 0; + caps->mpt_hop_num = HNS_ROCE_CONTEXT_HOP_NUM; + caps->pkey_table_len[0] = 1; caps->gid_table_len[0] = 2; caps->local_ca_ack_delay = 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index b420ea99e3ae..91f5161f3411 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -72,6 +72,9 @@ #define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2 #define HNS_ROCE_CMQ_TX_TIMEOUT 200 +#define HNS_ROCE_CONTEXT_HOP_NUM 1 +#define HNS_ROCE_MTT_HOP_NUM 1 + #define HNS_ROCE_CMD_FLAG_IN_VALID_SHIFT 0 #define HNS_ROCE_CMD_FLAG_OUT_VALID_SHIFT 1 #define HNS_ROCE_CMD_FLAG_NEXT_SHIFT 2 From a81fba28136d7596bc31bf705d39d4b97cf3f0a7 Mon Sep 17 00:00:00 2001 From: "Wei Hu(Xavier)" Date: Wed, 30 Aug 2017 17:23:07 +0800 Subject: [PATCH 019/296] RDMA/hns: Configure BT BA and BT attribute for the contexts in hip08 BT is used to retrieve the addresses of the contexts(QPC/MPT/CQC/SRQC) in memory. In order to support multi hop addressing for the contexts, the BT BA should be configured by mailbox, and the BT attribution will be set by command. This patch is to configure the BT BA and BT attribution for the contexts. Signed-off-by: Shaobo Xu Signed-off-by: Lijun Ou Signed-off-by: Wei Hu (Xavier) Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_cmd.h | 46 +++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 198 ++++++++++++++++++++- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 49 +++++ 3 files changed, 292 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h index f5a9ee2fc53d..fc400698af1b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.h +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h @@ -36,6 +36,52 @@ #define HNS_ROCE_MAILBOX_SIZE 4096 #define HNS_ROCE_CMD_TIMEOUT_MSECS 10000 +enum { + /* QPC BT commands */ + HNS_ROCE_CMD_WRITE_QPC_BT0 = 0x0, + HNS_ROCE_CMD_WRITE_QPC_BT1 = 0x1, + HNS_ROCE_CMD_WRITE_QPC_BT2 = 0x2, + HNS_ROCE_CMD_READ_QPC_BT0 = 0x4, + HNS_ROCE_CMD_READ_QPC_BT1 = 0x5, + HNS_ROCE_CMD_READ_QPC_BT2 = 0x6, + HNS_ROCE_CMD_DESTROY_QPC_BT0 = 0x8, + HNS_ROCE_CMD_DESTROY_QPC_BT1 = 0x9, + HNS_ROCE_CMD_DESTROY_QPC_BT2 = 0xa, + + /* CQC BT commands */ + HNS_ROCE_CMD_WRITE_CQC_BT0 = 0x10, + HNS_ROCE_CMD_WRITE_CQC_BT1 = 0x11, + HNS_ROCE_CMD_WRITE_CQC_BT2 = 0x12, + HNS_ROCE_CMD_READ_CQC_BT0 = 0x14, + HNS_ROCE_CMD_READ_CQC_BT1 = 0x15, + HNS_ROCE_CMD_READ_CQC_BT2 = 0x1b, + HNS_ROCE_CMD_DESTROY_CQC_BT0 = 0x18, + HNS_ROCE_CMD_DESTROY_CQC_BT1 = 0x19, + HNS_ROCE_CMD_DESTROY_CQC_BT2 = 0x1a, + + /* MPT BT commands */ + HNS_ROCE_CMD_WRITE_MPT_BT0 = 0x20, + HNS_ROCE_CMD_WRITE_MPT_BT1 = 0x21, + HNS_ROCE_CMD_WRITE_MPT_BT2 = 0x22, + HNS_ROCE_CMD_READ_MPT_BT0 = 0x24, + HNS_ROCE_CMD_READ_MPT_BT1 = 0x25, + HNS_ROCE_CMD_READ_MPT_BT2 = 0x26, + HNS_ROCE_CMD_DESTROY_MPT_BT0 = 0x28, + HNS_ROCE_CMD_DESTROY_MPT_BT1 = 0x29, + HNS_ROCE_CMD_DESTROY_MPT_BT2 = 0x2a, + + /* SRQC BT commands */ + HNS_ROCE_CMD_WRITE_SRQC_BT0 = 0x30, + HNS_ROCE_CMD_WRITE_SRQC_BT1 = 0x31, + HNS_ROCE_CMD_WRITE_SRQC_BT2 = 0x32, + HNS_ROCE_CMD_READ_SRQC_BT0 = 0x34, + HNS_ROCE_CMD_READ_SRQC_BT1 = 0x35, + HNS_ROCE_CMD_READ_SRQC_BT2 = 0x36, + HNS_ROCE_CMD_DESTROY_SRQC_BT0 = 0x38, + HNS_ROCE_CMD_DESTROY_SRQC_BT1 = 0x39, + HNS_ROCE_CMD_DESTROY_SRQC_BT2 = 0x3a, +}; + enum { /* TPT commands */ HNS_ROCE_CMD_SW2HW_MPT = 0xd, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 5127edcec834..77cf476d6096 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -475,6 +475,62 @@ static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev) return hns_roce_cmq_send(hr_dev, desc, 2); } +static int hns_roce_v2_set_bt(struct hns_roce_dev *hr_dev) +{ + u8 srqc_hop_num = hr_dev->caps.srqc_hop_num; + u8 qpc_hop_num = hr_dev->caps.qpc_hop_num; + u8 cqc_hop_num = hr_dev->caps.cqc_hop_num; + u8 mpt_hop_num = hr_dev->caps.mpt_hop_num; + struct hns_roce_cfg_bt_attr *req; + struct hns_roce_cmq_desc desc; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_BT_ATTR, false); + req = (struct hns_roce_cfg_bt_attr *)desc.data; + memset(req, 0, sizeof(*req)); + + roce_set_field(req->vf_qpc_cfg, CFG_BT_ATTR_DATA_0_VF_QPC_BA_PGSZ_M, + CFG_BT_ATTR_DATA_0_VF_QPC_BA_PGSZ_S, + hr_dev->caps.qpc_ba_pg_sz); + roce_set_field(req->vf_qpc_cfg, CFG_BT_ATTR_DATA_0_VF_QPC_BUF_PGSZ_M, + CFG_BT_ATTR_DATA_0_VF_QPC_BUF_PGSZ_S, + hr_dev->caps.qpc_buf_pg_sz); + roce_set_field(req->vf_qpc_cfg, CFG_BT_ATTR_DATA_0_VF_QPC_HOPNUM_M, + CFG_BT_ATTR_DATA_0_VF_QPC_HOPNUM_S, + qpc_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : qpc_hop_num); + + roce_set_field(req->vf_srqc_cfg, CFG_BT_ATTR_DATA_1_VF_SRQC_BA_PGSZ_M, + CFG_BT_ATTR_DATA_1_VF_SRQC_BA_PGSZ_S, + hr_dev->caps.srqc_ba_pg_sz); + roce_set_field(req->vf_srqc_cfg, CFG_BT_ATTR_DATA_1_VF_SRQC_BUF_PGSZ_M, + CFG_BT_ATTR_DATA_1_VF_SRQC_BUF_PGSZ_S, + hr_dev->caps.srqc_buf_pg_sz); + roce_set_field(req->vf_srqc_cfg, CFG_BT_ATTR_DATA_1_VF_SRQC_HOPNUM_M, + CFG_BT_ATTR_DATA_1_VF_SRQC_HOPNUM_S, + srqc_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : srqc_hop_num); + + roce_set_field(req->vf_cqc_cfg, CFG_BT_ATTR_DATA_2_VF_CQC_BA_PGSZ_M, + CFG_BT_ATTR_DATA_2_VF_CQC_BA_PGSZ_S, + hr_dev->caps.cqc_ba_pg_sz); + roce_set_field(req->vf_cqc_cfg, CFG_BT_ATTR_DATA_2_VF_CQC_BUF_PGSZ_M, + CFG_BT_ATTR_DATA_2_VF_CQC_BUF_PGSZ_S, + hr_dev->caps.cqc_buf_pg_sz); + roce_set_field(req->vf_cqc_cfg, CFG_BT_ATTR_DATA_2_VF_CQC_HOPNUM_M, + CFG_BT_ATTR_DATA_2_VF_CQC_HOPNUM_S, + cqc_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : cqc_hop_num); + + roce_set_field(req->vf_mpt_cfg, CFG_BT_ATTR_DATA_3_VF_MPT_BA_PGSZ_M, + CFG_BT_ATTR_DATA_3_VF_MPT_BA_PGSZ_S, + hr_dev->caps.mpt_ba_pg_sz); + roce_set_field(req->vf_mpt_cfg, CFG_BT_ATTR_DATA_3_VF_MPT_BUF_PGSZ_M, + CFG_BT_ATTR_DATA_3_VF_MPT_BUF_PGSZ_S, + hr_dev->caps.mpt_buf_pg_sz); + roce_set_field(req->vf_mpt_cfg, CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_M, + CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_S, + mpt_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : mpt_hop_num); + + return hns_roce_cmq_send(hr_dev, &desc, 1); +} + static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) { struct hns_roce_caps *caps = &hr_dev->caps; @@ -563,7 +619,12 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->local_ca_ack_delay = 0; caps->max_mtu = IB_MTU_4096; - return 0; + ret = hns_roce_v2_set_bt(hr_dev); + if (ret) + dev_err(hr_dev->dev, "Configure bt attribute fail, ret = %d.\n", + ret); + + return ret; } static int hns_roce_v2_cmd_pending(struct hns_roce_dev *hr_dev) @@ -648,12 +709,147 @@ static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev, return 0; } +static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, int obj, + int step_idx) +{ + struct device *dev = hr_dev->dev; + struct hns_roce_cmd_mailbox *mailbox; + struct hns_roce_hem_iter iter; + struct hns_roce_hem_mhop mhop; + struct hns_roce_hem *hem; + unsigned long mhop_obj = obj; + int i, j, k; + int ret = 0; + u64 hem_idx = 0; + u64 l1_idx = 0; + u64 bt_ba = 0; + u32 chunk_ba_num; + u32 hop_num; + u16 op = 0xff; + + if (!hns_roce_check_whether_mhop(hr_dev, table->type)) + return 0; + + hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, &mhop); + i = mhop.l0_idx; + j = mhop.l1_idx; + k = mhop.l2_idx; + hop_num = mhop.hop_num; + chunk_ba_num = mhop.bt_chunk_size / 8; + + if (hop_num == 2) { + hem_idx = i * chunk_ba_num * chunk_ba_num + j * chunk_ba_num + + k; + l1_idx = i * chunk_ba_num + j; + } else if (hop_num == 1) { + hem_idx = i * chunk_ba_num + j; + } else if (hop_num == HNS_ROCE_HOP_NUM_0) { + hem_idx = i; + } + + switch (table->type) { + case HEM_TYPE_QPC: + op = HNS_ROCE_CMD_WRITE_QPC_BT0; + break; + case HEM_TYPE_MTPT: + op = HNS_ROCE_CMD_WRITE_MPT_BT0; + break; + case HEM_TYPE_CQC: + op = HNS_ROCE_CMD_WRITE_CQC_BT0; + break; + case HEM_TYPE_SRQC: + op = HNS_ROCE_CMD_WRITE_SRQC_BT0; + break; + default: + dev_warn(dev, "Table %d not to be written by mailbox!\n", + table->type); + return 0; + } + op += step_idx; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + if (check_whether_last_step(hop_num, step_idx)) { + hem = table->hem[hem_idx]; + for (hns_roce_hem_first(hem, &iter); + !hns_roce_hem_last(&iter); hns_roce_hem_next(&iter)) { + bt_ba = hns_roce_hem_addr(&iter); + + /* configure the ba, tag, and op */ + ret = hns_roce_cmd_mbox(hr_dev, bt_ba, mailbox->dma, + obj, 0, op, + HNS_ROCE_CMD_TIMEOUT_MSECS); + } + } else { + if (step_idx == 0) + bt_ba = table->bt_l0_dma_addr[i]; + else if (step_idx == 1 && hop_num == 2) + bt_ba = table->bt_l1_dma_addr[l1_idx]; + + /* configure the ba, tag, and op */ + ret = hns_roce_cmd_mbox(hr_dev, bt_ba, mailbox->dma, obj, + 0, op, HNS_ROCE_CMD_TIMEOUT_MSECS); + } + + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + return ret; +} + +static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, int obj, + int step_idx) +{ + struct device *dev = hr_dev->dev; + struct hns_roce_cmd_mailbox *mailbox; + int ret = 0; + u16 op = 0xff; + + if (!hns_roce_check_whether_mhop(hr_dev, table->type)) + return 0; + + switch (table->type) { + case HEM_TYPE_QPC: + op = HNS_ROCE_CMD_DESTROY_QPC_BT0; + break; + case HEM_TYPE_MTPT: + op = HNS_ROCE_CMD_DESTROY_MPT_BT0; + break; + case HEM_TYPE_CQC: + op = HNS_ROCE_CMD_DESTROY_CQC_BT0; + break; + case HEM_TYPE_SRQC: + op = HNS_ROCE_CMD_DESTROY_SRQC_BT0; + break; + default: + dev_warn(dev, "Table %d not to be destroyed by mailbox!\n", + table->type); + return 0; + } + op += step_idx; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + /* configure the tag and op */ + ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, obj, 0, op, + HNS_ROCE_CMD_TIMEOUT_MSECS); + + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + return ret; +} + static const struct hns_roce_hw hns_roce_hw_v2 = { .cmq_init = hns_roce_v2_cmq_init, .cmq_exit = hns_roce_v2_cmq_exit, .hw_profile = hns_roce_v2_profile, .post_mbox = hns_roce_v2_post_mbox, .chk_mbox = hns_roce_v2_chk_mbox, + .set_hem = hns_roce_v2_set_hem, + .clear_hem = hns_roce_v2_clear_hem, }; static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 91f5161f3411..cc8c8d05d699 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -93,6 +93,11 @@ #define HNS_ROCE_CMQ_EN_B 16 #define HNS_ROCE_CMQ_ENABLE BIT(HNS_ROCE_CMQ_EN_B) +#define check_whether_last_step(hop_num, step_idx) \ + ((step_idx == 0 && hop_num == HNS_ROCE_HOP_NUM_0) || \ + (step_idx == 1 && hop_num == 1) || \ + (step_idx == 2 && hop_num == 2)) + /* CMQ command */ enum hns_roce_opcode_type { HNS_ROCE_OPC_QUERY_HW_VER = 0x8000, @@ -239,6 +244,50 @@ struct hns_roce_vf_res_b { #define VF_RES_B_DATA_3_VF_SL_NUM_S 16 #define VF_RES_B_DATA_3_VF_SL_NUM_M GENMASK(19, 16) +struct hns_roce_cfg_bt_attr { + u32 vf_qpc_cfg; + u32 vf_srqc_cfg; + u32 vf_cqc_cfg; + u32 vf_mpt_cfg; + u32 rsv[2]; +}; + +#define CFG_BT_ATTR_DATA_0_VF_QPC_BA_PGSZ_S 0 +#define CFG_BT_ATTR_DATA_0_VF_QPC_BA_PGSZ_M GENMASK(3, 0) + +#define CFG_BT_ATTR_DATA_0_VF_QPC_BUF_PGSZ_S 4 +#define CFG_BT_ATTR_DATA_0_VF_QPC_BUF_PGSZ_M GENMASK(7, 4) + +#define CFG_BT_ATTR_DATA_0_VF_QPC_HOPNUM_S 8 +#define CFG_BT_ATTR_DATA_0_VF_QPC_HOPNUM_M GENMASK(9, 8) + +#define CFG_BT_ATTR_DATA_1_VF_SRQC_BA_PGSZ_S 0 +#define CFG_BT_ATTR_DATA_1_VF_SRQC_BA_PGSZ_M GENMASK(3, 0) + +#define CFG_BT_ATTR_DATA_1_VF_SRQC_BUF_PGSZ_S 4 +#define CFG_BT_ATTR_DATA_1_VF_SRQC_BUF_PGSZ_M GENMASK(7, 4) + +#define CFG_BT_ATTR_DATA_1_VF_SRQC_HOPNUM_S 8 +#define CFG_BT_ATTR_DATA_1_VF_SRQC_HOPNUM_M GENMASK(9, 8) + +#define CFG_BT_ATTR_DATA_2_VF_CQC_BA_PGSZ_S 0 +#define CFG_BT_ATTR_DATA_2_VF_CQC_BA_PGSZ_M GENMASK(3, 0) + +#define CFG_BT_ATTR_DATA_2_VF_CQC_BUF_PGSZ_S 4 +#define CFG_BT_ATTR_DATA_2_VF_CQC_BUF_PGSZ_M GENMASK(7, 4) + +#define CFG_BT_ATTR_DATA_2_VF_CQC_HOPNUM_S 8 +#define CFG_BT_ATTR_DATA_2_VF_CQC_HOPNUM_M GENMASK(9, 8) + +#define CFG_BT_ATTR_DATA_3_VF_MPT_BA_PGSZ_S 0 +#define CFG_BT_ATTR_DATA_3_VF_MPT_BA_PGSZ_M GENMASK(3, 0) + +#define CFG_BT_ATTR_DATA_3_VF_MPT_BUF_PGSZ_S 4 +#define CFG_BT_ATTR_DATA_3_VF_MPT_BUF_PGSZ_M GENMASK(7, 4) + +#define CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_S 8 +#define CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_M GENMASK(9, 8) + struct hns_roce_cmq_desc { u16 opcode; u16 flag; From 6a93c77afe088225363f6941a29fff415b1f7172 Mon Sep 17 00:00:00 2001 From: Shaobo Xu Date: Wed, 30 Aug 2017 17:23:08 +0800 Subject: [PATCH 020/296] RDMA/hns: Update the interfaces for MTT/CQE multi hop addressing in hip08 The MTT(SQWQE/SGE/RQWQE) and CQE in hip08 can support multi hop addressing. The address of MTT/CQE can be retrieved by the BT (Base Address Table) with multi hop addressing. This patch is to update the interfaces in HEM to support multi hop addressing for the MTT/CQE. Signed-off-by: Shaobo Xu Signed-off-by: Lijun Ou Signed-off-by: Wei Hu (Xavier) Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_cq.c | 12 +- drivers/infiniband/hw/hns/hns_roce_device.h | 6 + drivers/infiniband/hw/hns/hns_roce_hem.c | 119 ++++++++++++++++---- drivers/infiniband/hw/hns/hns_roce_hem.h | 11 +- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 8 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 6 + drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 1 + drivers/infiniband/hw/hns/hns_roce_mr.c | 16 ++- 8 files changed, 143 insertions(+), 36 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 5f512429f634..362aeecc1a08 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -85,17 +85,19 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, struct hns_roce_uar *hr_uar, struct hns_roce_cq *hr_cq, int vector) { - struct hns_roce_cmd_mailbox *mailbox = NULL; - struct hns_roce_cq_table *cq_table = NULL; + struct hns_roce_cmd_mailbox *mailbox; + struct hns_roce_hem_table *mtt_table; + struct hns_roce_cq_table *cq_table; struct device *dev = hr_dev->dev; dma_addr_t dma_handle; - u64 *mtts = NULL; - int ret = 0; + u64 *mtts; + int ret; cq_table = &hr_dev->cq_table; /* Get the physical address of cq buf */ - mtts = hns_roce_table_find(&hr_dev->mr_table.mtt_table, + mtt_table = &hr_dev->mr_table.mtt_table; + mtts = hns_roce_table_find(hr_dev, mtt_table, hr_mtt->first_seg, &dma_handle); if (!mtts) { dev_err(dev, "CQ alloc.Failed to find cq buf addr.\n"); diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 4574a7bb8c7f..7dd8fbb2b9f2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -525,6 +525,12 @@ struct hns_roce_caps { u32 mpt_ba_pg_sz; u32 mpt_buf_pg_sz; u32 mpt_hop_num; + u32 mtt_ba_pg_sz; + u32 mtt_buf_pg_sz; + u32 mtt_hop_num; + u32 cqe_ba_pg_sz; + u32 cqe_buf_pg_sz; + u32 cqe_hop_num; }; struct hns_roce_hw { diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 9bc8c6bd301d..ac2d671ed2f9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -47,7 +47,9 @@ bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type) if ((hr_dev->caps.qpc_hop_num && type == HEM_TYPE_QPC) || (hr_dev->caps.mpt_hop_num && type == HEM_TYPE_MTPT) || (hr_dev->caps.cqc_hop_num && type == HEM_TYPE_CQC) || - (hr_dev->caps.srqc_hop_num && type == HEM_TYPE_SRQC)) + (hr_dev->caps.srqc_hop_num && type == HEM_TYPE_SRQC) || + (hr_dev->caps.cqe_hop_num && type == HEM_TYPE_CQE) || + (hr_dev->caps.mtt_hop_num && type == HEM_TYPE_MTT)) return true; return false; @@ -132,6 +134,22 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, mhop->ba_l0_num = hr_dev->caps.srqc_bt_num; mhop->hop_num = hr_dev->caps.srqc_hop_num; break; + case HEM_TYPE_MTT: + mhop->buf_chunk_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + + PAGE_SHIFT); + mhop->bt_chunk_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + + PAGE_SHIFT); + mhop->ba_l0_num = mhop->bt_chunk_size / 8; + mhop->hop_num = hr_dev->caps.mtt_hop_num; + break; + case HEM_TYPE_CQE: + mhop->buf_chunk_size = 1 << (hr_dev->caps.cqe_buf_pg_sz + + PAGE_SHIFT); + mhop->bt_chunk_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + + PAGE_SHIFT); + mhop->ba_l0_num = mhop->bt_chunk_size / 8; + mhop->hop_num = hr_dev->caps.cqe_hop_num; + break; default: dev_err(dev, "Table %d not support multi-hop addressing!\n", table->type); @@ -141,10 +159,14 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, if (!obj) return 0; - /* QPC/MTPT/CQC/SRQC alloc hem for buffer pages. */ + /* + * QPC/MTPT/CQC/SRQC alloc hem for buffer pages. + * MTT/CQE alloc hem for bt pages. + */ bt_num = hns_roce_get_bt_num(table->type, mhop->hop_num); chunk_ba_num = mhop->bt_chunk_size / 8; - chunk_size = mhop->buf_chunk_size; + chunk_size = table->type < HEM_TYPE_MTT ? mhop->buf_chunk_size : + mhop->bt_chunk_size; table_idx = (*obj & (table->num_obj - 1)) / (chunk_size / table->obj_size); switch (bt_num) { @@ -448,8 +470,11 @@ int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev, } } - /* alloc buffer space chunk for QPC/MTPT/CQC/SRQC. */ - size = buf_chunk_size; + /* + * alloc buffer space chunk for QPC/MTPT/CQC/SRQC. + * alloc bt space chunk for MTT/CQE. + */ + size = table->type < HEM_TYPE_MTT ? buf_chunk_size : bt_chunk_size; table->hem[hem_idx] = hns_roce_alloc_hem(hr_dev, size >> PAGE_SHIFT, size, @@ -480,6 +505,8 @@ int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev, dev_err(dev, "set HEM base address to HW failed!\n"); goto err_alloc_hem_buf; } + } else if (hop_num == 2) { + *(table->bt_l0[bt_l0_idx] + mhop.l1_idx) = bt_ba; } ++table->hem[hem_idx]->refcount; @@ -610,7 +637,10 @@ void hns_roce_table_mhop_put(struct hns_roce_dev *hr_dev, dev_warn(dev, "Clear HEM base address failed.\n"); } - /* free buffer space chunk for QPC/MTPT/CQC/SRQC. */ + /* + * free buffer space chunk for QPC/MTPT/CQC/SRQC. + * free bt space chunk for MTT/CQE. + */ hns_roce_free_hem(hr_dev, table->hem[hem_idx]); table->hem[hem_idx] = NULL; @@ -686,23 +716,46 @@ void hns_roce_table_put(struct hns_roce_dev *hr_dev, mutex_unlock(&table->mutex); } -void *hns_roce_table_find(struct hns_roce_hem_table *table, unsigned long obj, - dma_addr_t *dma_handle) +void *hns_roce_table_find(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, + unsigned long obj, dma_addr_t *dma_handle) { struct hns_roce_hem_chunk *chunk; - unsigned long idx; - int i; - int offset, dma_offset; + struct hns_roce_hem_mhop mhop; struct hns_roce_hem *hem; struct page *page = NULL; + unsigned long mhop_obj = obj; + unsigned long idx; + int offset, dma_offset; + int i, j; + u32 hem_idx = 0; if (!table->lowmem) return NULL; mutex_lock(&table->mutex); - idx = (obj & (table->num_obj - 1)) * table->obj_size; - hem = table->hem[idx / HNS_ROCE_TABLE_CHUNK_SIZE]; - dma_offset = offset = idx % HNS_ROCE_TABLE_CHUNK_SIZE; + + if (!hns_roce_check_whether_mhop(hr_dev, table->type)) { + idx = (obj & (table->num_obj - 1)) * table->obj_size; + hem = table->hem[idx / HNS_ROCE_TABLE_CHUNK_SIZE]; + dma_offset = offset = idx % HNS_ROCE_TABLE_CHUNK_SIZE; + } else { + hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, &mhop); + /* mtt mhop */ + i = mhop.l0_idx; + j = mhop.l1_idx; + if (mhop.hop_num == 2) + hem_idx = i * (mhop.bt_chunk_size / 8) + j; + else if (mhop.hop_num == 1 || + mhop.hop_num == HNS_ROCE_HOP_NUM_0) + hem_idx = i; + + hem = table->hem[hem_idx]; + dma_offset = offset = (obj & (table->num_obj - 1)) * + table->obj_size % mhop.bt_chunk_size; + if (mhop.hop_num == 2) + dma_offset = offset = 0; + } if (!hem) goto out; @@ -735,9 +788,15 @@ int hns_roce_table_get_range(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, unsigned long start, unsigned long end) { + struct hns_roce_hem_mhop mhop; unsigned long inc = HNS_ROCE_TABLE_CHUNK_SIZE / table->obj_size; - unsigned long i = 0; - int ret = 0; + unsigned long i; + int ret; + + if (hns_roce_check_whether_mhop(hr_dev, table->type)) { + hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop); + inc = mhop.bt_chunk_size / table->obj_size; + } /* Allocate MTT entry memory according to chunk(128K) */ for (i = start; i <= end; i += inc) { @@ -760,10 +819,17 @@ void hns_roce_table_put_range(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, unsigned long start, unsigned long end) { + struct hns_roce_hem_mhop mhop; + unsigned long inc = HNS_ROCE_TABLE_CHUNK_SIZE / table->obj_size; unsigned long i; + if (hns_roce_check_whether_mhop(hr_dev, table->type)) { + hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop); + inc = mhop.bt_chunk_size / table->obj_size; + } + for (i = start; i <= end; - i += HNS_ROCE_TABLE_CHUNK_SIZE / table->obj_size) + i += inc) hns_roce_table_put(hr_dev, table, i); } @@ -787,7 +853,7 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, unsigned long buf_chunk_size; unsigned long bt_chunk_size; unsigned long bt_chunk_num; - unsigned long num_bt_l0; + unsigned long num_bt_l0 = 0; u32 hop_num; switch (type) { @@ -823,6 +889,18 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, num_bt_l0 = hr_dev->caps.srqc_bt_num; hop_num = hr_dev->caps.srqc_hop_num; break; + case HEM_TYPE_MTT: + buf_chunk_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + + PAGE_SHIFT); + bt_chunk_size = buf_chunk_size; + hop_num = hr_dev->caps.mtt_hop_num; + break; + case HEM_TYPE_CQE: + buf_chunk_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + + PAGE_SHIFT); + bt_chunk_size = buf_chunk_size; + hop_num = hr_dev->caps.cqe_hop_num; + break; default: dev_err(dev, "Table %d not support to init hem table here!\n", @@ -832,6 +910,8 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, obj_per_chunk = buf_chunk_size / obj_size; num_hem = (nobj + obj_per_chunk - 1) / obj_per_chunk; bt_chunk_num = bt_chunk_size / 8; + if (table->type >= HEM_TYPE_MTT) + num_bt_l0 = bt_chunk_num; table->hem = kcalloc(num_hem, sizeof(*table->hem), GFP_KERNEL); @@ -910,7 +990,8 @@ void hns_roce_cleanup_mhop_hem_table(struct hns_roce_dev *hr_dev, u64 obj; hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop); - buf_chunk_size = mhop.buf_chunk_size; + buf_chunk_size = table->type < HEM_TYPE_MTT ? mhop.buf_chunk_size : + mhop.bt_chunk_size; for (i = 0; i < table->num_hem; ++i) { obj = i * buf_chunk_size / table->obj_size; diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index b064ef5fad08..1aa54b6d583e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -47,6 +47,7 @@ enum { /* UNMAP HEM */ HEM_TYPE_MTT, + HEM_TYPE_CQE, HEM_TYPE_IRRL, }; @@ -58,10 +59,13 @@ enum { (type < HEM_TYPE_MTT && hop_num == 2) #define check_whether_bt_num_2(type, hop_num) \ - (type < HEM_TYPE_MTT && hop_num == 1) + ((type < HEM_TYPE_MTT && hop_num == 1) || \ + (type >= HEM_TYPE_MTT && hop_num == 2)) #define check_whether_bt_num_1(type, hop_num) \ - (type < HEM_TYPE_MTT && hop_num == HNS_ROCE_HOP_NUM_0) + ((type < HEM_TYPE_MTT && hop_num == HNS_ROCE_HOP_NUM_0) || \ + (type >= HEM_TYPE_MTT && hop_num == 1) || \ + (type >= HEM_TYPE_MTT && hop_num == HNS_ROCE_HOP_NUM_0)) enum { HNS_ROCE_HEM_PAGE_SHIFT = 12, @@ -101,7 +105,8 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, unsigned long obj); void hns_roce_table_put(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, unsigned long obj); -void *hns_roce_table_find(struct hns_roce_hem_table *table, unsigned long obj, +void *hns_roce_table_find(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, unsigned long obj, dma_addr_t *dma_handle); int hns_roce_table_get_range(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 50dee130737e..0a634c58dfcb 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -2524,7 +2524,7 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, return -ENOMEM; /* Search QP buf's MTTs */ - mtts = hns_roce_table_find(&hr_dev->mr_table.mtt_table, + mtts = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_table, hr_qp->mtt.first_seg, &dma_handle); if (!mtts) { dev_err(dev, "qp buf pa find failed\n"); @@ -2671,7 +2671,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, return -ENOMEM; /* Search qp buf's mtts */ - mtts = hns_roce_table_find(&hr_dev->mr_table.mtt_table, + mtts = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_table, hr_qp->mtt.first_seg, &dma_handle); if (mtts == NULL) { dev_err(dev, "qp buf pa find failed\n"); @@ -2679,8 +2679,8 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, } /* Search IRRL's mtts */ - mtts_2 = hns_roce_table_find(&hr_dev->qp_table.irrl_table, hr_qp->qpn, - &dma_handle_2); + mtts_2 = hns_roce_table_find(hr_dev, &hr_dev->qp_table.irrl_table, + hr_qp->qpn, &dma_handle_2); if (mtts_2 == NULL) { dev_err(dev, "qp irrl_table find failed\n"); goto out; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 77cf476d6096..c59d15ec6d5e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -613,6 +613,12 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->mpt_ba_pg_sz = 0; caps->mpt_buf_pg_sz = 0; caps->mpt_hop_num = HNS_ROCE_CONTEXT_HOP_NUM; + caps->mtt_ba_pg_sz = 0; + caps->mtt_buf_pg_sz = 0; + caps->mtt_hop_num = HNS_ROCE_MTT_HOP_NUM; + caps->cqe_ba_pg_sz = 0; + caps->cqe_buf_pg_sz = 0; + caps->cqe_hop_num = HNS_ROCE_CQE_HOP_NUM; caps->pkey_table_len[0] = 1; caps->gid_table_len[0] = 2; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index cc8c8d05d699..212d51135f9d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -74,6 +74,7 @@ #define HNS_ROCE_CONTEXT_HOP_NUM 1 #define HNS_ROCE_MTT_HOP_NUM 1 +#define HNS_ROCE_CQE_HOP_NUM 1 #define HNS_ROCE_CMD_FLAG_IN_VALID_SHIFT 0 #define HNS_ROCE_CMD_FLAG_OUT_VALID_SHIFT 1 diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 923d2b40e9c5..d8f7b41148d1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -348,10 +348,11 @@ static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt, u32 start_index, u32 npages, u64 *page_list) { - u32 i = 0; - __le64 *mtts = NULL; + struct hns_roce_hem_table *table; dma_addr_t dma_handle; + __le64 *mtts; u32 s = start_index * sizeof(u64); + u32 i; /* All MTTs must fit in the same page */ if (start_index / (PAGE_SIZE / sizeof(u64)) != @@ -361,15 +362,20 @@ static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev, if (start_index & (HNS_ROCE_MTT_ENTRY_PER_SEG - 1)) return -EINVAL; - mtts = hns_roce_table_find(&hr_dev->mr_table.mtt_table, + table = &hr_dev->mr_table.mtt_table; + mtts = hns_roce_table_find(hr_dev, table, mtt->first_seg + s / hr_dev->caps.mtt_entry_sz, &dma_handle); if (!mtts) return -ENOMEM; /* Save page addr, low 12 bits : 0 */ - for (i = 0; i < npages; ++i) - mtts[i] = (cpu_to_le64(page_list[i])) >> PAGE_ADDR_SHIFT; + for (i = 0; i < npages; ++i) { + if (!hr_dev->caps.mtt_hop_num) + mtts[i] = cpu_to_le64(page_list[i] >> PAGE_ADDR_SHIFT); + else + mtts[i] = cpu_to_le64(page_list[i]); + } return 0; } From 9766edc34ea17a8264b76696367aeb88a52ab108 Mon Sep 17 00:00:00 2001 From: Shaobo Xu Date: Wed, 30 Aug 2017 17:23:09 +0800 Subject: [PATCH 021/296] RDMA/hns: Split CQE from MTT in hip08 In hip08, the SQWQE/SGE/RQWQE and CQE have different hop num and page size, so we need to manage the base address table of the SQWQE/SGE/RQWQE and CQE separately. This patch is to split CQE from MTT(SQWQE/SGE/RQWQE). Signed-off-by: Shaobo Xu Signed-off-by: Lijun Ou Signed-off-by: Wei Hu (Xavier) Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_cq.c | 15 ++++- drivers/infiniband/hw/hns/hns_roce_device.h | 14 ++++- drivers/infiniband/hw/hns/hns_roce_hem.c | 3 + drivers/infiniband/hw/hns/hns_roce_main.c | 17 ++++++ drivers/infiniband/hw/hns/hns_roce_mr.c | 66 ++++++++++++++++----- drivers/infiniband/hw/hns/hns_roce_qp.c | 2 + 6 files changed, 98 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 362aeecc1a08..9cf213c53f4c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -96,7 +96,11 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, cq_table = &hr_dev->cq_table; /* Get the physical address of cq buf */ - mtt_table = &hr_dev->mr_table.mtt_table; + if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) + mtt_table = &hr_dev->mr_table.mtt_cqe_table; + else + mtt_table = &hr_dev->mr_table.mtt_table; + mtts = hns_roce_table_find(hr_dev, mtt_table, hr_mtt->first_seg, &dma_handle); if (!mtts) { @@ -221,6 +225,10 @@ static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev, if (IS_ERR(*umem)) return PTR_ERR(*umem); + if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) + buf->hr_mtt.mtt_type = MTT_TYPE_CQE; + else + buf->hr_mtt.mtt_type = MTT_TYPE_WQE; ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(*umem), (*umem)->page_shift, &buf->hr_mtt); if (ret) @@ -250,6 +258,11 @@ static int hns_roce_ib_alloc_cq_buf(struct hns_roce_dev *hr_dev, if (ret) goto out; + if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) + buf->hr_mtt.mtt_type = MTT_TYPE_CQE; + else + buf->hr_mtt.mtt_type = MTT_TYPE_WQE; + ret = hns_roce_mtt_init(hr_dev, buf->hr_buf.npages, buf->hr_buf.page_shift, &buf->hr_mtt); if (ret) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 7dd8fbb2b9f2..319c7054905d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -170,6 +170,11 @@ enum { HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE = 0x07, }; +enum hns_roce_mtt_type { + MTT_TYPE_WQE = 0, + MTT_TYPE_CQE, +}; + #define HNS_ROCE_CMD_SUCCESS 1 #define HNS_ROCE_PORT_DOWN 0 @@ -241,9 +246,10 @@ struct hns_roce_hem_table { }; struct hns_roce_mtt { - unsigned long first_seg; - int order; - int page_shift; + unsigned long first_seg; + int order; + int page_shift; + enum hns_roce_mtt_type mtt_type; }; /* Only support 4K page size for mr register */ @@ -268,6 +274,8 @@ struct hns_roce_mr_table { struct hns_roce_buddy mtt_buddy; struct hns_roce_hem_table mtt_table; struct hns_roce_hem_table mtpt_table; + struct hns_roce_buddy mtt_cqe_buddy; + struct hns_roce_hem_table mtt_cqe_table; }; struct hns_roce_wq { diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index ac2d671ed2f9..8a3e174f5b5f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -1041,4 +1041,7 @@ void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.qp_table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtt_table); + if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) + hns_roce_cleanup_hem_table(hr_dev, + &hr_dev->mr_table.mtt_cqe_table); } diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index db6593e15e02..a110f968987f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -546,6 +546,17 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) return ret; } + if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) { + ret = hns_roce_init_hem_table(hr_dev, + &hr_dev->mr_table.mtt_cqe_table, + HEM_TYPE_CQE, hr_dev->caps.mtt_entry_sz, + hr_dev->caps.num_cqe_segs, 1); + if (ret) { + dev_err(dev, "Failed to init MTT CQE context memory, aborting.\n"); + goto err_unmap_cqe; + } + } + ret = hns_roce_init_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table, HEM_TYPE_MTPT, hr_dev->caps.mtpt_entry_sz, hr_dev->caps.num_mtpts, 1); @@ -593,6 +604,12 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) err_unmap_mtt: hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtt_table); + if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) + hns_roce_cleanup_hem_table(hr_dev, + &hr_dev->mr_table.mtt_cqe_table); + +err_unmap_cqe: + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtt_table); return ret; } diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index d8f7b41148d1..88b436fb92ee 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -177,18 +177,28 @@ static void hns_roce_buddy_cleanup(struct hns_roce_buddy *buddy) } static int hns_roce_alloc_mtt_range(struct hns_roce_dev *hr_dev, int order, - unsigned long *seg) + unsigned long *seg, u32 mtt_type) { struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; - int ret = 0; + struct hns_roce_hem_table *table; + struct hns_roce_buddy *buddy; + int ret; - ret = hns_roce_buddy_alloc(&mr_table->mtt_buddy, order, seg); + if (mtt_type == MTT_TYPE_WQE) { + buddy = &mr_table->mtt_buddy; + table = &mr_table->mtt_table; + } else { + buddy = &mr_table->mtt_cqe_buddy; + table = &mr_table->mtt_cqe_table; + } + + ret = hns_roce_buddy_alloc(buddy, order, seg); if (ret == -1) return -1; - if (hns_roce_table_get_range(hr_dev, &mr_table->mtt_table, *seg, + if (hns_roce_table_get_range(hr_dev, table, *seg, *seg + (1 << order) - 1)) { - hns_roce_buddy_free(&mr_table->mtt_buddy, *seg, order); + hns_roce_buddy_free(buddy, *seg, order); return -1; } @@ -198,7 +208,7 @@ static int hns_roce_alloc_mtt_range(struct hns_roce_dev *hr_dev, int order, int hns_roce_mtt_init(struct hns_roce_dev *hr_dev, int npages, int page_shift, struct hns_roce_mtt *mtt) { - int ret = 0; + int ret; int i; /* Page num is zero, correspond to DMA memory register */ @@ -217,7 +227,8 @@ int hns_roce_mtt_init(struct hns_roce_dev *hr_dev, int npages, int page_shift, ++mtt->order; /* Allocate MTT entry */ - ret = hns_roce_alloc_mtt_range(hr_dev, mtt->order, &mtt->first_seg); + ret = hns_roce_alloc_mtt_range(hr_dev, mtt->order, &mtt->first_seg, + mtt->mtt_type); if (ret == -1) return -ENOMEM; @@ -231,9 +242,19 @@ void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt) if (mtt->order < 0) return; - hns_roce_buddy_free(&mr_table->mtt_buddy, mtt->first_seg, mtt->order); - hns_roce_table_put_range(hr_dev, &mr_table->mtt_table, mtt->first_seg, - mtt->first_seg + (1 << mtt->order) - 1); + if (mtt->mtt_type == MTT_TYPE_WQE) { + hns_roce_buddy_free(&mr_table->mtt_buddy, mtt->first_seg, + mtt->order); + hns_roce_table_put_range(hr_dev, &mr_table->mtt_table, + mtt->first_seg, + mtt->first_seg + (1 << mtt->order) - 1); + } else { + hns_roce_buddy_free(&mr_table->mtt_cqe_buddy, mtt->first_seg, + mtt->order); + hns_roce_table_put_range(hr_dev, &mr_table->mtt_cqe_table, + mtt->first_seg, + mtt->first_seg + (1 << mtt->order) - 1); + } } EXPORT_SYMBOL_GPL(hns_roce_mtt_cleanup); @@ -362,7 +383,11 @@ static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev, if (start_index & (HNS_ROCE_MTT_ENTRY_PER_SEG - 1)) return -EINVAL; - table = &hr_dev->mr_table.mtt_table; + if (mtt->mtt_type == MTT_TYPE_WQE) + table = &hr_dev->mr_table.mtt_table; + else + table = &hr_dev->mr_table.mtt_cqe_table; + mtts = hns_roce_table_find(hr_dev, table, mtt->first_seg + s / hr_dev->caps.mtt_entry_sz, &dma_handle); @@ -409,9 +434,9 @@ static int hns_roce_write_mtt(struct hns_roce_dev *hr_dev, int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt, struct hns_roce_buf *buf) { - u32 i = 0; - int ret = 0; - u64 *page_list = NULL; + u64 *page_list; + int ret; + u32 i; page_list = kmalloc_array(buf->npages, sizeof(*page_list), GFP_KERNEL); if (!page_list) @@ -434,7 +459,7 @@ int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev, int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev) { struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; - int ret = 0; + int ret; ret = hns_roce_bitmap_init(&mr_table->mtpt_bitmap, hr_dev->caps.num_mtpts, @@ -448,8 +473,17 @@ int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev) if (ret) goto err_buddy; + if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) { + ret = hns_roce_buddy_init(&mr_table->mtt_cqe_buddy, + ilog2(hr_dev->caps.num_cqe_segs)); + if (ret) + goto err_buddy_cqe; + } return 0; +err_buddy_cqe: + hns_roce_buddy_cleanup(&mr_table->mtt_buddy); + err_buddy: hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap); return ret; @@ -460,6 +494,8 @@ void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev) struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; hns_roce_buddy_cleanup(&mr_table->mtt_buddy); + if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) + hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy); hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap); } diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index b512ecc6df22..881ea67e6bac 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -440,6 +440,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, goto err_out; } + hr_qp->mtt.mtt_type = MTT_TYPE_WQE; ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(hr_qp->umem), hr_qp->umem->page_shift, &hr_qp->mtt); if (ret) { @@ -490,6 +491,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, goto err_out; } + hr_qp->mtt.mtt_type = MTT_TYPE_WQE; /* Write MTT */ ret = hns_roce_mtt_init(hr_dev, hr_qp->hr_buf.npages, hr_qp->hr_buf.page_shift, &hr_qp->mtt); From ff795f71787c945d6abb6eabe95475bf9cbd6083 Mon Sep 17 00:00:00 2001 From: "Wei Hu(Xavier)" Date: Wed, 30 Aug 2017 17:23:10 +0800 Subject: [PATCH 022/296] RDMA/hns: Support multi hop addressing for PBL in hip08 The block base address in the MR can be retrieved by the block number which is calculated with the VA in the SGE and MTPT. In hip08, the PBL supports multi hop addressing to retrieve the block base address by the block number. This patch is to add the interfaces in the MR to support multi hop addressing for the PBL. Signed-off-by: Shaobo Xu Signed-off-by: Lijun Ou Signed-off-by: Wei Hu (Xavier) Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_device.h | 16 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 3 + drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 1 + drivers/infiniband/hw/hns/hns_roce_mr.c | 422 ++++++++++++++++++-- 4 files changed, 418 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 319c7054905d..7815e4eb9939 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -267,6 +267,19 @@ struct hns_roce_mr { int type; /* MR's register type */ u64 *pbl_buf;/* MR's PBL space */ dma_addr_t pbl_dma_addr; /* MR's PBL space PA */ + u32 pbl_size;/* PA number in the PBL */ + u64 pbl_ba;/* page table address */ + u32 l0_chunk_last_num;/* L0 last number */ + u32 l1_chunk_last_num;/* L1 last number */ + u64 **pbl_bt_l2;/* PBL BT L2 */ + u64 **pbl_bt_l1;/* PBL BT L1 */ + u64 *pbl_bt_l0;/* PBL BT L0 */ + dma_addr_t *pbl_l2_dma_addr;/* PBL BT L2 dma addr */ + dma_addr_t *pbl_l1_dma_addr;/* PBL BT L1 dma addr */ + dma_addr_t pbl_l0_dma_addr;/* PBL BT L0 dma addr */ + u32 pbl_ba_pg_sz;/* BT chunk page size */ + u32 pbl_buf_pg_sz;/* buf chunk page size */ + u32 pbl_hop_num;/* multi-hop number */ }; struct hns_roce_mr_table { @@ -514,6 +527,9 @@ struct hns_roce_caps { int qpc_entry_sz; int irrl_entry_sz; int cqc_entry_sz; + u32 pbl_ba_pg_sz; + u32 pbl_buf_pg_sz; + u32 pbl_hop_num; int aeqe_depth; int ceqe_depth[HNS_ROCE_COMP_VEC_NUM]; enum ib_mtu max_mtu; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index c59d15ec6d5e..542540db45be 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -613,6 +613,9 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->mpt_ba_pg_sz = 0; caps->mpt_buf_pg_sz = 0; caps->mpt_hop_num = HNS_ROCE_CONTEXT_HOP_NUM; + caps->pbl_ba_pg_sz = 0; + caps->pbl_buf_pg_sz = 0; + caps->pbl_hop_num = HNS_ROCE_PBL_HOP_NUM; caps->mtt_ba_pg_sz = 0; caps->mtt_buf_pg_sz = 0; caps->mtt_hop_num = HNS_ROCE_MTT_HOP_NUM; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 212d51135f9d..593d886943f1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -75,6 +75,7 @@ #define HNS_ROCE_CONTEXT_HOP_NUM 1 #define HNS_ROCE_MTT_HOP_NUM 1 #define HNS_ROCE_CQE_HOP_NUM 1 +#define HNS_ROCE_PBL_HOP_NUM 2 #define HNS_ROCE_CMD_FLAG_IN_VALID_SHIFT 0 #define HNS_ROCE_CMD_FLAG_OUT_VALID_SHIFT 1 diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 88b436fb92ee..7e6ce76b32df 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -258,6 +258,239 @@ void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt) } EXPORT_SYMBOL_GPL(hns_roce_mtt_cleanup); +static void hns_roce_loop_free(struct hns_roce_dev *hr_dev, + struct hns_roce_mr *mr, int err_loop_index, + int loop_i, int loop_j) +{ + struct device *dev = hr_dev->dev; + u32 mhop_num; + u32 pbl_bt_sz; + u64 bt_idx; + int i, j; + + pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); + mhop_num = hr_dev->caps.pbl_hop_num; + + i = loop_i; + j = loop_j; + if (mhop_num == 3 && err_loop_index == 2) { + for (; i >= 0; i--) { + dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], + mr->pbl_l1_dma_addr[i]); + + for (j = 0; j < pbl_bt_sz / 8; j++) { + if (i == loop_i && j >= loop_j) + break; + + bt_idx = i * pbl_bt_sz / 8 + j; + dma_free_coherent(dev, pbl_bt_sz, + mr->pbl_bt_l2[bt_idx], + mr->pbl_l2_dma_addr[bt_idx]); + } + } + } else if (mhop_num == 3 && err_loop_index == 1) { + for (i -= 1; i >= 0; i--) { + dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], + mr->pbl_l1_dma_addr[i]); + + for (j = 0; j < pbl_bt_sz / 8; j++) { + bt_idx = i * pbl_bt_sz / 8 + j; + dma_free_coherent(dev, pbl_bt_sz, + mr->pbl_bt_l2[bt_idx], + mr->pbl_l2_dma_addr[bt_idx]); + } + } + } else if (mhop_num == 2 && err_loop_index == 1) { + for (i -= 1; i >= 0; i--) + dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], + mr->pbl_l1_dma_addr[i]); + } else { + dev_warn(dev, "not support: mhop_num=%d, err_loop_index=%d.", + mhop_num, err_loop_index); + return; + } + + dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l0, mr->pbl_l0_dma_addr); + mr->pbl_bt_l0 = NULL; + mr->pbl_l0_dma_addr = 0; +} + +/* PBL multi hop addressing */ +static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages, + struct hns_roce_mr *mr) +{ + struct device *dev = hr_dev->dev; + int mr_alloc_done = 0; + int npages_allocated; + int i = 0, j = 0; + u32 pbl_bt_sz; + u32 mhop_num; + u64 pbl_last_bt_num; + u64 pbl_bt_cnt = 0; + u64 bt_idx; + u64 size; + + mhop_num = hr_dev->caps.pbl_hop_num; + pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); + pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8); + + if (mhop_num == HNS_ROCE_HOP_NUM_0) + return 0; + + /* hop_num = 1 */ + if (mhop_num == 1) { + if (npages > pbl_bt_sz / 8) { + dev_err(dev, "npages %d is larger than buf_pg_sz!", + npages); + return -EINVAL; + } + mr->pbl_buf = dma_alloc_coherent(dev, npages * 8, + &(mr->pbl_dma_addr), + GFP_KERNEL); + if (!mr->pbl_buf) + return -ENOMEM; + + mr->pbl_size = npages; + mr->pbl_ba = mr->pbl_dma_addr; + mr->pbl_hop_num = hr_dev->caps.pbl_hop_num; + mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz; + mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz; + return 0; + } + + mr->pbl_l1_dma_addr = kcalloc(pbl_bt_sz / 8, + sizeof(*mr->pbl_l1_dma_addr), + GFP_KERNEL); + if (!mr->pbl_l1_dma_addr) + return -ENOMEM; + + mr->pbl_bt_l1 = kcalloc(pbl_bt_sz / 8, sizeof(*mr->pbl_bt_l1), + GFP_KERNEL); + if (!mr->pbl_bt_l1) + goto err_kcalloc_bt_l1; + + if (mhop_num == 3) { + mr->pbl_l2_dma_addr = kcalloc(pbl_last_bt_num, + sizeof(*mr->pbl_l2_dma_addr), + GFP_KERNEL); + if (!mr->pbl_l2_dma_addr) + goto err_kcalloc_l2_dma; + + mr->pbl_bt_l2 = kcalloc(pbl_last_bt_num, + sizeof(*mr->pbl_bt_l2), + GFP_KERNEL); + if (!mr->pbl_bt_l2) + goto err_kcalloc_bt_l2; + } + + /* alloc L0 BT */ + mr->pbl_bt_l0 = dma_alloc_coherent(dev, pbl_bt_sz, + &(mr->pbl_l0_dma_addr), + GFP_KERNEL); + if (!mr->pbl_bt_l0) + goto err_dma_alloc_l0; + + if (mhop_num == 2) { + /* alloc L1 BT */ + for (i = 0; i < pbl_bt_sz / 8; i++) { + if (pbl_bt_cnt + 1 < pbl_last_bt_num) { + size = pbl_bt_sz; + } else { + npages_allocated = i * (pbl_bt_sz / 8); + size = (npages - npages_allocated) * 8; + } + mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, size, + &(mr->pbl_l1_dma_addr[i]), + GFP_KERNEL); + if (!mr->pbl_bt_l1[i]) { + hns_roce_loop_free(hr_dev, mr, 1, i, 0); + goto err_dma_alloc_l0; + } + + *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i]; + + pbl_bt_cnt++; + if (pbl_bt_cnt >= pbl_last_bt_num) + break; + } + } else if (mhop_num == 3) { + /* alloc L1, L2 BT */ + for (i = 0; i < pbl_bt_sz / 8; i++) { + mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, pbl_bt_sz, + &(mr->pbl_l1_dma_addr[i]), + GFP_KERNEL); + if (!mr->pbl_bt_l1[i]) { + hns_roce_loop_free(hr_dev, mr, 1, i, 0); + goto err_dma_alloc_l0; + } + + *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i]; + + for (j = 0; j < pbl_bt_sz / 8; j++) { + bt_idx = i * pbl_bt_sz / 8 + j; + + if (pbl_bt_cnt + 1 < pbl_last_bt_num) { + size = pbl_bt_sz; + } else { + npages_allocated = bt_idx * + (pbl_bt_sz / 8); + size = (npages - npages_allocated) * 8; + } + mr->pbl_bt_l2[bt_idx] = dma_alloc_coherent( + dev, size, + &(mr->pbl_l2_dma_addr[bt_idx]), + GFP_KERNEL); + if (!mr->pbl_bt_l2[bt_idx]) { + hns_roce_loop_free(hr_dev, mr, 2, i, j); + goto err_dma_alloc_l0; + } + + *(mr->pbl_bt_l1[i] + j) = + mr->pbl_l2_dma_addr[bt_idx]; + + pbl_bt_cnt++; + if (pbl_bt_cnt >= pbl_last_bt_num) { + mr_alloc_done = 1; + break; + } + } + + if (mr_alloc_done) + break; + } + } + + mr->l0_chunk_last_num = i + 1; + if (mhop_num == 3) + mr->l1_chunk_last_num = j + 1; + + mr->pbl_size = npages; + mr->pbl_ba = mr->pbl_l0_dma_addr; + mr->pbl_hop_num = hr_dev->caps.pbl_hop_num; + mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz; + mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz; + + return 0; + +err_dma_alloc_l0: + kfree(mr->pbl_bt_l2); + mr->pbl_bt_l2 = NULL; + +err_kcalloc_bt_l2: + kfree(mr->pbl_l2_dma_addr); + mr->pbl_l2_dma_addr = NULL; + +err_kcalloc_l2_dma: + kfree(mr->pbl_bt_l1); + mr->pbl_bt_l1 = NULL; + +err_kcalloc_bt_l1: + kfree(mr->pbl_l1_dma_addr); + mr->pbl_l1_dma_addr = NULL; + + return -ENOMEM; +} + static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova, u64 size, u32 access, int npages, struct hns_roce_mr *mr) @@ -282,16 +515,111 @@ static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova, mr->type = MR_TYPE_DMA; mr->pbl_buf = NULL; mr->pbl_dma_addr = 0; + /* PBL multi-hop addressing parameters */ + mr->pbl_bt_l2 = NULL; + mr->pbl_bt_l1 = NULL; + mr->pbl_bt_l0 = NULL; + mr->pbl_l2_dma_addr = NULL; + mr->pbl_l1_dma_addr = NULL; + mr->pbl_l0_dma_addr = 0; } else { mr->type = MR_TYPE_MR; - mr->pbl_buf = dma_alloc_coherent(dev, npages * 8, - &(mr->pbl_dma_addr), - GFP_KERNEL); - if (!mr->pbl_buf) - return -ENOMEM; + if (!hr_dev->caps.pbl_hop_num) { + mr->pbl_buf = dma_alloc_coherent(dev, npages * 8, + &(mr->pbl_dma_addr), + GFP_KERNEL); + if (!mr->pbl_buf) + return -ENOMEM; + } else { + ret = hns_roce_mhop_alloc(hr_dev, npages, mr); + } } - return 0; + return ret; +} + +static void hns_roce_mhop_free(struct hns_roce_dev *hr_dev, + struct hns_roce_mr *mr) +{ + struct device *dev = hr_dev->dev; + int npages_allocated; + int npages; + int i, j; + u32 pbl_bt_sz; + u32 mhop_num; + u64 bt_idx; + + npages = ib_umem_page_count(mr->umem); + pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); + mhop_num = hr_dev->caps.pbl_hop_num; + + if (mhop_num == HNS_ROCE_HOP_NUM_0) + return; + + /* hop_num = 1 */ + if (mhop_num == 1) { + dma_free_coherent(dev, (unsigned int)(npages * 8), + mr->pbl_buf, mr->pbl_dma_addr); + return; + } + + dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l0, + mr->pbl_l0_dma_addr); + + if (mhop_num == 2) { + for (i = 0; i < mr->l0_chunk_last_num; i++) { + if (i == mr->l0_chunk_last_num - 1) { + npages_allocated = i * (pbl_bt_sz / 8); + + dma_free_coherent(dev, + (npages - npages_allocated) * 8, + mr->pbl_bt_l1[i], + mr->pbl_l1_dma_addr[i]); + + break; + } + + dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], + mr->pbl_l1_dma_addr[i]); + } + } else if (mhop_num == 3) { + for (i = 0; i < mr->l0_chunk_last_num; i++) { + dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], + mr->pbl_l1_dma_addr[i]); + + for (j = 0; j < pbl_bt_sz / 8; j++) { + bt_idx = i * (pbl_bt_sz / 8) + j; + + if ((i == mr->l0_chunk_last_num - 1) + && j == mr->l1_chunk_last_num - 1) { + npages_allocated = bt_idx * + (pbl_bt_sz / 8); + + dma_free_coherent(dev, + (npages - npages_allocated) * 8, + mr->pbl_bt_l2[bt_idx], + mr->pbl_l2_dma_addr[bt_idx]); + + break; + } + + dma_free_coherent(dev, pbl_bt_sz, + mr->pbl_bt_l2[bt_idx], + mr->pbl_l2_dma_addr[bt_idx]); + } + } + } + + kfree(mr->pbl_bt_l1); + kfree(mr->pbl_l1_dma_addr); + mr->pbl_bt_l1 = NULL; + mr->pbl_l1_dma_addr = NULL; + if (mhop_num == 3) { + kfree(mr->pbl_bt_l2); + kfree(mr->pbl_l2_dma_addr); + mr->pbl_bt_l2 = NULL; + mr->pbl_l2_dma_addr = NULL; + } } static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, @@ -310,10 +638,18 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, if (mr->size != ~0ULL) { npages = ib_umem_page_count(mr->umem); - dma_free_coherent(dev, (unsigned int)(npages * 8), mr->pbl_buf, - mr->pbl_dma_addr); + + if (!hr_dev->caps.pbl_hop_num) + dma_free_coherent(dev, (unsigned int)(npages * 8), + mr->pbl_buf, mr->pbl_dma_addr); + else + hns_roce_mhop_free(hr_dev, mr); } + if (mr->enabled) + hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table, + key_to_hw_index(mr->key)); + hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap, key_to_hw_index(mr->key), BITMAP_NO_RR); } @@ -501,8 +837,8 @@ void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev) struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc) { - int ret = 0; - struct hns_roce_mr *mr = NULL; + struct hns_roce_mr *mr; + int ret; mr = kmalloc(sizeof(*mr), GFP_KERNEL); if (mr == NULL) @@ -571,16 +907,36 @@ int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev, return ret; } -static int hns_roce_ib_umem_write_mr(struct hns_roce_mr *mr, +static int hns_roce_ib_umem_write_mr(struct hns_roce_dev *hr_dev, + struct hns_roce_mr *mr, struct ib_umem *umem) { - int i = 0; - int entry; struct scatterlist *sg; + int i = 0, j = 0; + int entry; + + if (hr_dev->caps.pbl_hop_num == HNS_ROCE_HOP_NUM_0) + return 0; for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - mr->pbl_buf[i] = ((u64)sg_dma_address(sg)) >> 12; - i++; + if (!hr_dev->caps.pbl_hop_num) { + mr->pbl_buf[i] = ((u64)sg_dma_address(sg)) >> 12; + i++; + } else if (hr_dev->caps.pbl_hop_num == 1) { + mr->pbl_buf[i] = sg_dma_address(sg); + i++; + } else { + if (hr_dev->caps.pbl_hop_num == 2) + mr->pbl_bt_l1[i][j] = sg_dma_address(sg); + else if (hr_dev->caps.pbl_hop_num == 3) + mr->pbl_bt_l2[i][j] = sg_dma_address(sg); + + j++; + if (j >= (PAGE_SIZE / 8)) { + i++; + j = 0; + } + } } /* Memory barrier */ @@ -595,9 +951,11 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, { struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); struct device *dev = hr_dev->dev; - struct hns_roce_mr *mr = NULL; - int ret = 0; - int n = 0; + struct hns_roce_mr *mr; + int bt_size; + int ret; + int n; + int i; mr = kmalloc(sizeof(*mr), GFP_KERNEL); if (!mr) @@ -618,11 +976,27 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto err_umem; } - if (n > HNS_ROCE_MAX_MTPT_PBL_NUM) { - dev_err(dev, " MR len %lld err. MR is limited to 4G at most!\n", - length); - ret = -EINVAL; - goto err_umem; + if (!hr_dev->caps.pbl_hop_num) { + if (n > HNS_ROCE_MAX_MTPT_PBL_NUM) { + dev_err(dev, + " MR len %lld err. MR is limited to 4G at most!\n", + length); + ret = -EINVAL; + goto err_umem; + } + } else { + int pbl_size = 1; + + bt_size = (1 << PAGE_SHIFT) / 8; + for (i = 0; i < hr_dev->caps.pbl_hop_num; i++) + pbl_size *= bt_size; + if (n > pbl_size) { + dev_err(dev, + " MR len %lld err. MR page num is limited to %d!\n", + length, pbl_size); + ret = -EINVAL; + goto err_umem; + } } ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, virt_addr, length, @@ -630,7 +1004,7 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (ret) goto err_umem; - ret = hns_roce_ib_umem_write_mr(mr, mr->umem); + ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem); if (ret) goto err_mr; From 7afddafa8403842a2dd50cd6c696a80b1f74a22f Mon Sep 17 00:00:00 2001 From: "Wei Hu(Xavier)" Date: Wed, 30 Aug 2017 17:23:11 +0800 Subject: [PATCH 023/296] RDMA/hns: Configure mac&gid and user access region for hip08 RoCE driver In hip08, the user access region(UAR) pfn is calculated from pci device memory resource. This patch mainly sets mac and gid table by configuring the relevant registers and updates the uar pfn for hip08 SoC. Signed-off-by: Lijun Ou Signed-off-by: Shaobo Xu Signed-off-by: Wei Hu (Xavier) Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_common.h | 9 ++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 49 +++++++++++++++++++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 7 +++ drivers/infiniband/hw/hns/hns_roce_pd.c | 16 ++++--- 4 files changed, 76 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h index 0c950f8e4249..7ecb7a4147a8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_common.h +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -376,4 +376,13 @@ #define ROCEE_RX_CMQ_TAIL_REG 0x07024 #define ROCEE_RX_CMQ_HEAD_REG 0x07028 +#define ROCEE_VF_SMAC_CFG0_REG 0x12000 +#define ROCEE_VF_SMAC_CFG1_REG 0x12004 + +#define ROCEE_VF_SGID_CFG0_REG 0x10000 +#define ROCEE_VF_SGID_CFG1_REG 0x10004 +#define ROCEE_VF_SGID_CFG2_REG 0x10008 +#define ROCEE_VF_SGID_CFG3_REG 0x1000c +#define ROCEE_VF_SGID_CFG4_REG 0x10010 + #endif /* _HNS_ROCE_COMMON_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 542540db45be..e7aeb531b7a0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -718,6 +718,53 @@ static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev, return 0; } +static void hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port, + int gid_index, union ib_gid *gid) +{ + u32 *p; + u32 val; + + p = (u32 *)&gid->raw[0]; + roce_raw_write(*p, hr_dev->reg_base + ROCEE_VF_SGID_CFG0_REG + + 0x20 * gid_index); + + p = (u32 *)&gid->raw[4]; + roce_raw_write(*p, hr_dev->reg_base + ROCEE_VF_SGID_CFG1_REG + + 0x20 * gid_index); + + p = (u32 *)&gid->raw[8]; + roce_raw_write(*p, hr_dev->reg_base + ROCEE_VF_SGID_CFG2_REG + + 0x20 * gid_index); + + p = (u32 *)&gid->raw[0xc]; + roce_raw_write(*p, hr_dev->reg_base + ROCEE_VF_SGID_CFG3_REG + + 0x20 * gid_index); + + val = roce_read(hr_dev, ROCEE_VF_SGID_CFG4_REG + 0x20 * gid_index); + roce_set_field(val, ROCEE_VF_SGID_CFG4_SGID_TYPE_M, + ROCEE_VF_SGID_CFG4_SGID_TYPE_S, 0); + + roce_write(hr_dev, ROCEE_VF_SGID_CFG4_REG + 0x20 * gid_index, val); +} + +static void hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, + u8 *addr) +{ + u16 reg_smac_h; + u32 reg_smac_l; + u32 val; + + reg_smac_l = *(u32 *)(&addr[0]); + roce_raw_write(reg_smac_l, hr_dev->reg_base + ROCEE_VF_SMAC_CFG0_REG + + 0x08 * phy_port); + val = roce_read(hr_dev, ROCEE_VF_SMAC_CFG1_REG + 0x08 * phy_port); + + reg_smac_h = *(u16 *)(&addr[4]); + roce_set_field(val, ROCEE_VF_SMAC_CFG1_VF_SMAC_H_M, + ROCEE_VF_SMAC_CFG1_VF_SMAC_H_S, reg_smac_h); + roce_write(hr_dev, ROCEE_VF_SMAC_CFG1_REG + 0x08 * phy_port, val); +} + static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, int obj, int step_idx) @@ -857,6 +904,8 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { .hw_profile = hns_roce_v2_profile, .post_mbox = hns_roce_v2_post_mbox, .chk_mbox = hns_roce_v2_chk_mbox, + .set_gid = hns_roce_v2_set_gid, + .set_mac = hns_roce_v2_set_mac, .set_hem = hns_roce_v2_set_hem, .clear_hem = hns_roce_v2_clear_hem, }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 593d886943f1..aee8f34be1a1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -246,6 +246,13 @@ struct hns_roce_vf_res_b { #define VF_RES_B_DATA_3_VF_SL_NUM_S 16 #define VF_RES_B_DATA_3_VF_SL_NUM_M GENMASK(19, 16) +/* Reg field definition */ +#define ROCEE_VF_SMAC_CFG1_VF_SMAC_H_S 0 +#define ROCEE_VF_SMAC_CFG1_VF_SMAC_H_M GENMASK(15, 0) + +#define ROCEE_VF_SGID_CFG4_SGID_TYPE_S 0 +#define ROCEE_VF_SGID_CFG4_SGID_TYPE_M GENMASK(1, 0) + struct hns_roce_cfg_bt_attr { u32 vf_qpc_cfg; u32 vf_srqc_cfg; diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index 079bb10237cf..bdab2188c04a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -31,6 +31,7 @@ */ #include +#include #include "hns_roce_device.h" static int hns_roce_pd_alloc(struct hns_roce_dev *hr_dev, unsigned long *pdn) @@ -111,12 +112,17 @@ int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar) uar->index = (uar->index - 1) % (hr_dev->caps.phy_num_uars - 1) + 1; - res = platform_get_resource(hr_dev->pdev, IORESOURCE_MEM, 0); - if (!res) { - dev_err(&hr_dev->pdev->dev, "memory resource not found!\n"); - return -EINVAL; + if (!dev_is_pci(hr_dev->dev)) { + res = platform_get_resource(hr_dev->pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&hr_dev->pdev->dev, "memory resource not found!\n"); + return -EINVAL; + } + uar->pfn = ((res->start) >> PAGE_SHIFT) + uar->index; + } else { + uar->pfn = ((pci_resource_start(hr_dev->pci_dev, 2)) + >> PAGE_SHIFT); } - uar->pfn = ((res->start) >> PAGE_SHIFT) + uar->index; return 0; } From 93aa21871b91ff1fed9e4b2ce444d0d978deddfc Mon Sep 17 00:00:00 2001 From: "Wei Hu(Xavier)" Date: Wed, 30 Aug 2017 17:23:12 +0800 Subject: [PATCH 024/296] RDMA/hns: Add CQ operations support for hip08 RoCE driver This patch adds CQ relevant operations for hip08 RoCE driver, such as create CQ, destroy CQ, poll CQ and Request Completion Notification(req_notify_cq). Signed-off-by: Lijun Ou Signed-off-by: Shaobo Xu Signed-off-by: Wei Hu (Xavier) Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_cq.c | 26 +- drivers/infiniband/hw/hns/hns_roce_device.h | 1 + drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 1 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 376 ++++++++++++++++++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 222 ++++++++++++ 5 files changed, 613 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 9cf213c53f4c..fa17dbc78467 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -195,14 +195,15 @@ void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) if (ret) dev_err(dev, "HW2SW_CQ failed (%d) for CQN %06lx\n", ret, hr_cq->cqn); + if (hr_dev->eq_table.eq) { + /* Waiting interrupt process procedure carried out */ + synchronize_irq(hr_dev->eq_table.eq[hr_cq->vector].irq); - /* Waiting interrupt process procedure carried out */ - synchronize_irq(hr_dev->eq_table.eq[hr_cq->vector].irq); - - /* wait for all interrupt processed */ - if (atomic_dec_and_test(&hr_cq->refcount)) - complete(&hr_cq->free); - wait_for_completion(&hr_cq->free); + /* wait for all interrupt processed */ + if (atomic_dec_and_test(&hr_cq->refcount)) + complete(&hr_cq->free); + wait_for_completion(&hr_cq->free); + } spin_lock_irq(&cq_table->lock); radix_tree_delete(&cq_table->tree, hr_cq->cqn); @@ -303,7 +304,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, struct hns_roce_uar *uar = NULL; int vector = attr->comp_vector; int cq_entries = attr->cqe; - int ret = 0; + int ret; if (cq_entries < 1 || cq_entries > hr_dev->caps.max_cqes) { dev_err(dev, "Creat CQ failed. entries=%d, max=%d\n", @@ -311,13 +312,12 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, return ERR_PTR(-EINVAL); } - hr_cq = kmalloc(sizeof(*hr_cq), GFP_KERNEL); + hr_cq = kzalloc(sizeof(*hr_cq), GFP_KERNEL); if (!hr_cq) return ERR_PTR(-ENOMEM); - /* In v1 engine, parameter verification */ - if (cq_entries < HNS_ROCE_MIN_CQE_NUM) - cq_entries = HNS_ROCE_MIN_CQE_NUM; + if (hr_dev->caps.min_cqes) + cq_entries = max(cq_entries, hr_dev->caps.min_cqes); cq_entries = roundup_pow_of_two((unsigned int)cq_entries); hr_cq->ib_cq.cqe = cq_entries - 1; @@ -369,7 +369,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, * problems if tptr is set to zero here, so we initialze it in user * space. */ - if (!context) + if (!context && hr_cq->tptr_addr) *hr_cq->tptr_addr = 0; /* Get created cq handler and carry out event */ diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 7815e4eb9939..bee20886c889 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -508,6 +508,7 @@ struct hns_roce_caps { int max_qp_dest_rdma; int num_cqs; int max_cqes; + int min_cqes; int reserved_cqs; int num_aeq_vectors; /* 1 */ int num_comp_vectors; /* 32 ceq */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 0a634c58dfcb..1aa2be634e16 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -1476,6 +1476,7 @@ int hns_roce_v1_profile(struct hns_roce_dev *hr_dev) caps->num_qps = HNS_ROCE_V1_MAX_QP_NUM; caps->max_wqes = HNS_ROCE_V1_MAX_WQE_NUM; caps->num_cqs = HNS_ROCE_V1_MAX_CQ_NUM; + caps->min_cqes = HNS_ROCE_MIN_CQE_NUM; caps->max_cqes = HNS_ROCE_V1_MAX_CQE_NUM; caps->max_sq_sg = HNS_ROCE_V1_SG_NUM; caps->max_rq_sg = HNS_ROCE_V1_SG_NUM; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index e7aeb531b7a0..94339af99aec 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -765,6 +765,379 @@ static void hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, roce_write(hr_dev, ROCEE_VF_SMAC_CFG1_REG + 0x08 * phy_port, val); } +static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n) +{ + return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf, + n * HNS_ROCE_V2_CQE_ENTRY_SIZE); +} + +static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, int n) +{ + struct hns_roce_v2_cqe *cqe = get_cqe_v2(hr_cq, n & hr_cq->ib_cq.cqe); + + /* Get cqe when Owner bit is Conversely with the MSB of cons_idx */ + return (roce_get_bit(cqe->byte_4, V2_CQE_BYTE_4_OWNER_S) ^ + !!(n & (hr_cq->ib_cq.cqe + 1))) ? cqe : NULL; +} + +static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *hr_cq) +{ + return get_sw_cqe_v2(hr_cq, hr_cq->cons_index); +} + +static void hns_roce_v2_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index) +{ + struct hns_roce_v2_cq_db cq_db; + + cq_db.byte_4 = 0; + cq_db.parameter = 0; + + roce_set_field(cq_db.byte_4, V2_CQ_DB_BYTE_4_TAG_M, + V2_CQ_DB_BYTE_4_TAG_S, hr_cq->cqn); + roce_set_field(cq_db.byte_4, V2_CQ_DB_BYTE_4_CMD_M, + V2_CQ_DB_BYTE_4_CMD_S, HNS_ROCE_V2_CQ_DB_PTR); + + roce_set_field(cq_db.parameter, V2_CQ_DB_PARAMETER_CONS_IDX_M, + V2_CQ_DB_PARAMETER_CONS_IDX_S, + cons_index & ((hr_cq->cq_depth << 1) - 1)); + roce_set_field(cq_db.parameter, V2_CQ_DB_PARAMETER_CMD_SN_M, + V2_CQ_DB_PARAMETER_CMD_SN_S, 1); + + hns_roce_write64_k((__be32 *)&cq_db, hr_cq->cq_db_l); + +} + +static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, + struct hns_roce_cq *hr_cq, void *mb_buf, + u64 *mtts, dma_addr_t dma_handle, int nent, + u32 vector) +{ + struct hns_roce_v2_cq_context *cq_context; + + cq_context = mb_buf; + memset(cq_context, 0, sizeof(*cq_context)); + + roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_CQ_ST_M, + V2_CQC_BYTE_4_CQ_ST_S, V2_CQ_STATE_VALID); + roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_SHIFT_M, + V2_CQC_BYTE_4_SHIFT_S, ilog2((unsigned int)nent)); + roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_CEQN_M, + V2_CQC_BYTE_4_CEQN_S, vector); + cq_context->byte_4_pg_ceqn = cpu_to_le32(cq_context->byte_4_pg_ceqn); + + roce_set_field(cq_context->byte_8_cqn, V2_CQC_BYTE_8_CQN_M, + V2_CQC_BYTE_8_CQN_S, hr_cq->cqn); + + cq_context->cqe_cur_blk_addr = (u32)(mtts[0] >> PAGE_ADDR_SHIFT); + cq_context->cqe_cur_blk_addr = + cpu_to_le32(cq_context->cqe_cur_blk_addr); + + roce_set_field(cq_context->byte_16_hop_addr, + V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_M, + V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_S, + cpu_to_le32((mtts[0]) >> (32 + PAGE_ADDR_SHIFT))); + roce_set_field(cq_context->byte_16_hop_addr, + V2_CQC_BYTE_16_CQE_HOP_NUM_M, + V2_CQC_BYTE_16_CQE_HOP_NUM_S, hr_dev->caps.cqe_hop_num == + HNS_ROCE_HOP_NUM_0 ? 0 : hr_dev->caps.cqe_hop_num); + + cq_context->cqe_nxt_blk_addr = (u32)(mtts[1] >> PAGE_ADDR_SHIFT); + roce_set_field(cq_context->byte_24_pgsz_addr, + V2_CQC_BYTE_24_CQE_NXT_BLK_ADDR_M, + V2_CQC_BYTE_24_CQE_NXT_BLK_ADDR_S, + cpu_to_le32((mtts[1]) >> (32 + PAGE_ADDR_SHIFT))); + roce_set_field(cq_context->byte_24_pgsz_addr, + V2_CQC_BYTE_24_CQE_BA_PG_SZ_M, + V2_CQC_BYTE_24_CQE_BA_PG_SZ_S, + hr_dev->caps.cqe_ba_pg_sz); + roce_set_field(cq_context->byte_24_pgsz_addr, + V2_CQC_BYTE_24_CQE_BUF_PG_SZ_M, + V2_CQC_BYTE_24_CQE_BUF_PG_SZ_S, + hr_dev->caps.cqe_buf_pg_sz); + + cq_context->cqe_ba = (u32)(dma_handle >> 3); + + roce_set_field(cq_context->byte_40_cqe_ba, V2_CQC_BYTE_40_CQE_BA_M, + V2_CQC_BYTE_40_CQE_BA_S, (dma_handle >> (32 + 3))); +} + +static int hns_roce_v2_req_notify_cq(struct ib_cq *ibcq, + enum ib_cq_notify_flags flags) +{ + struct hns_roce_cq *hr_cq = to_hr_cq(ibcq); + u32 notification_flag; + u32 doorbell[2]; + + doorbell[0] = 0; + doorbell[1] = 0; + + notification_flag = (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? + V2_CQ_DB_REQ_NOT : V2_CQ_DB_REQ_NOT_SOL; + /* + * flags = 0; Notification Flag = 1, next + * flags = 1; Notification Flag = 0, solocited + */ + roce_set_field(doorbell[0], V2_CQ_DB_BYTE_4_TAG_M, V2_DB_BYTE_4_TAG_S, + hr_cq->cqn); + roce_set_field(doorbell[0], V2_CQ_DB_BYTE_4_CMD_M, V2_DB_BYTE_4_CMD_S, + HNS_ROCE_V2_CQ_DB_NTR); + roce_set_field(doorbell[1], V2_CQ_DB_PARAMETER_CONS_IDX_M, + V2_CQ_DB_PARAMETER_CONS_IDX_S, + hr_cq->cons_index & ((hr_cq->cq_depth << 1) - 1)); + roce_set_field(doorbell[1], V2_CQ_DB_PARAMETER_CMD_SN_M, + V2_CQ_DB_PARAMETER_CMD_SN_S, 1); + roce_set_bit(doorbell[1], V2_CQ_DB_PARAMETER_NOTIFY_S, + notification_flag); + + hns_roce_write64_k(doorbell, hr_cq->cq_db_l); + + return 0; +} + +static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, + struct hns_roce_qp **cur_qp, struct ib_wc *wc) +{ + struct hns_roce_dev *hr_dev; + struct hns_roce_v2_cqe *cqe; + struct hns_roce_qp *hr_qp; + struct hns_roce_wq *wq; + int is_send; + u16 wqe_ctr; + u32 opcode; + u32 status; + int qpn; + + /* Find cqe according to consumer index */ + cqe = next_cqe_sw_v2(hr_cq); + if (!cqe) + return -EAGAIN; + + ++hr_cq->cons_index; + /* Memory barrier */ + rmb(); + + /* 0->SQ, 1->RQ */ + is_send = !roce_get_bit(cqe->byte_4, V2_CQE_BYTE_4_S_R_S); + + qpn = roce_get_field(cqe->byte_16, V2_CQE_BYTE_16_LCL_QPN_M, + V2_CQE_BYTE_16_LCL_QPN_S); + + if (!*cur_qp || (qpn & HNS_ROCE_V2_CQE_QPN_MASK) != (*cur_qp)->qpn) { + hr_dev = to_hr_dev(hr_cq->ib_cq.device); + hr_qp = __hns_roce_qp_lookup(hr_dev, qpn); + if (unlikely(!hr_qp)) { + dev_err(hr_dev->dev, "CQ %06lx with entry for unknown QPN %06x\n", + hr_cq->cqn, (qpn & HNS_ROCE_V2_CQE_QPN_MASK)); + return -EINVAL; + } + *cur_qp = hr_qp; + } + + wc->qp = &(*cur_qp)->ibqp; + wc->vendor_err = 0; + + status = roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_STATUS_M, + V2_CQE_BYTE_4_STATUS_S); + switch (status & HNS_ROCE_V2_CQE_STATUS_MASK) { + case HNS_ROCE_CQE_V2_SUCCESS: + wc->status = IB_WC_SUCCESS; + break; + case HNS_ROCE_CQE_V2_LOCAL_LENGTH_ERR: + wc->status = IB_WC_LOC_LEN_ERR; + break; + case HNS_ROCE_CQE_V2_LOCAL_QP_OP_ERR: + wc->status = IB_WC_LOC_QP_OP_ERR; + break; + case HNS_ROCE_CQE_V2_LOCAL_PROT_ERR: + wc->status = IB_WC_LOC_PROT_ERR; + break; + case HNS_ROCE_CQE_V2_WR_FLUSH_ERR: + wc->status = IB_WC_WR_FLUSH_ERR; + break; + case HNS_ROCE_CQE_V2_MW_BIND_ERR: + wc->status = IB_WC_MW_BIND_ERR; + break; + case HNS_ROCE_CQE_V2_BAD_RESP_ERR: + wc->status = IB_WC_BAD_RESP_ERR; + break; + case HNS_ROCE_CQE_V2_LOCAL_ACCESS_ERR: + wc->status = IB_WC_LOC_ACCESS_ERR; + break; + case HNS_ROCE_CQE_V2_REMOTE_INVAL_REQ_ERR: + wc->status = IB_WC_REM_INV_REQ_ERR; + break; + case HNS_ROCE_CQE_V2_REMOTE_ACCESS_ERR: + wc->status = IB_WC_REM_ACCESS_ERR; + break; + case HNS_ROCE_CQE_V2_REMOTE_OP_ERR: + wc->status = IB_WC_REM_OP_ERR; + break; + case HNS_ROCE_CQE_V2_TRANSPORT_RETRY_EXC_ERR: + wc->status = IB_WC_RETRY_EXC_ERR; + break; + case HNS_ROCE_CQE_V2_RNR_RETRY_EXC_ERR: + wc->status = IB_WC_RNR_RETRY_EXC_ERR; + break; + case HNS_ROCE_CQE_V2_REMOTE_ABORT_ERR: + wc->status = IB_WC_REM_ABORT_ERR; + break; + default: + wc->status = IB_WC_GENERAL_ERR; + break; + } + + /* CQE status error, directly return */ + if (wc->status != IB_WC_SUCCESS) + return 0; + + if (is_send) { + wc->wc_flags = 0; + /* SQ corresponding to CQE */ + switch (roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_OPCODE_M, + V2_CQE_BYTE_4_OPCODE_S) & 0x1f) { + case HNS_ROCE_SQ_OPCODE_SEND: + wc->opcode = IB_WC_SEND; + break; + case HNS_ROCE_SQ_OPCODE_SEND_WITH_INV: + wc->opcode = IB_WC_SEND; + break; + case HNS_ROCE_SQ_OPCODE_SEND_WITH_IMM: + wc->opcode = IB_WC_SEND; + wc->wc_flags |= IB_WC_WITH_IMM; + break; + case HNS_ROCE_SQ_OPCODE_RDMA_READ: + wc->opcode = IB_WC_RDMA_READ; + wc->byte_len = le32_to_cpu(cqe->byte_cnt); + break; + case HNS_ROCE_SQ_OPCODE_RDMA_WRITE: + wc->opcode = IB_WC_RDMA_WRITE; + break; + case HNS_ROCE_SQ_OPCODE_RDMA_WRITE_WITH_IMM: + wc->opcode = IB_WC_RDMA_WRITE; + wc->wc_flags |= IB_WC_WITH_IMM; + break; + case HNS_ROCE_SQ_OPCODE_LOCAL_INV: + wc->opcode = IB_WC_LOCAL_INV; + wc->wc_flags |= IB_WC_WITH_INVALIDATE; + break; + case HNS_ROCE_SQ_OPCODE_ATOMIC_COMP_AND_SWAP: + wc->opcode = IB_WC_COMP_SWAP; + wc->byte_len = 8; + break; + case HNS_ROCE_SQ_OPCODE_ATOMIC_FETCH_AND_ADD: + wc->opcode = IB_WC_FETCH_ADD; + wc->byte_len = 8; + break; + case HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_COMP_AND_SWAP: + wc->opcode = IB_WC_MASKED_COMP_SWAP; + wc->byte_len = 8; + break; + case HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_FETCH_AND_ADD: + wc->opcode = IB_WC_MASKED_FETCH_ADD; + wc->byte_len = 8; + break; + case HNS_ROCE_SQ_OPCODE_FAST_REG_WR: + wc->opcode = IB_WC_REG_MR; + break; + case HNS_ROCE_SQ_OPCODE_BIND_MW: + wc->opcode = IB_WC_REG_MR; + break; + default: + wc->status = IB_WC_GENERAL_ERR; + break; + } + + wq = &(*cur_qp)->sq; + if ((*cur_qp)->sq_signal_bits) { + /* + * If sg_signal_bit is 1, + * firstly tail pointer updated to wqe + * which current cqe correspond to + */ + wqe_ctr = (u16)roce_get_field(cqe->byte_4, + V2_CQE_BYTE_4_WQE_INDX_M, + V2_CQE_BYTE_4_WQE_INDX_S); + wq->tail += (wqe_ctr - (u16)wq->tail) & + (wq->wqe_cnt - 1); + } + + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + ++wq->tail; + } else { + /* RQ correspond to CQE */ + wc->byte_len = le32_to_cpu(cqe->byte_cnt); + + opcode = roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_OPCODE_M, + V2_CQE_BYTE_4_OPCODE_S); + switch (opcode & 0x1f) { + case HNS_ROCE_V2_OPCODE_RDMA_WRITE_IMM: + wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; + wc->wc_flags = IB_WC_WITH_IMM; + wc->ex.imm_data = le32_to_cpu(cqe->rkey_immtdata); + break; + case HNS_ROCE_V2_OPCODE_SEND: + wc->opcode = IB_WC_RECV; + wc->wc_flags = 0; + break; + case HNS_ROCE_V2_OPCODE_SEND_WITH_IMM: + wc->opcode = IB_WC_RECV; + wc->wc_flags = IB_WC_WITH_IMM; + wc->ex.imm_data = le32_to_cpu(cqe->rkey_immtdata); + break; + case HNS_ROCE_V2_OPCODE_SEND_WITH_INV: + wc->opcode = IB_WC_RECV; + wc->wc_flags = IB_WC_WITH_INVALIDATE; + wc->ex.invalidate_rkey = cqe->rkey_immtdata; + break; + default: + wc->status = IB_WC_GENERAL_ERR; + break; + } + + /* Update tail pointer, record wr_id */ + wq = &(*cur_qp)->rq; + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + ++wq->tail; + + wc->sl = (u8)roce_get_field(cqe->byte_32, V2_CQE_BYTE_32_SL_M, + V2_CQE_BYTE_32_SL_S); + wc->src_qp = (u8)roce_get_field(cqe->byte_32, + V2_CQE_BYTE_32_RMT_QPN_M, + V2_CQE_BYTE_32_RMT_QPN_S); + wc->wc_flags |= (roce_get_bit(cqe->byte_32, + V2_CQE_BYTE_32_GRH_S) ? + IB_WC_GRH : 0); + } + + return 0; +} + +static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries, + struct ib_wc *wc) +{ + struct hns_roce_cq *hr_cq = to_hr_cq(ibcq); + struct hns_roce_qp *cur_qp = NULL; + unsigned long flags; + int npolled; + + spin_lock_irqsave(&hr_cq->lock, flags); + + for (npolled = 0; npolled < num_entries; ++npolled) { + if (hns_roce_v2_poll_one(hr_cq, &cur_qp, wc + npolled)) + break; + } + + if (npolled) { + /* Memory barrier */ + wmb(); + hns_roce_v2_cq_set_ci(hr_cq, hr_cq->cons_index); + } + + spin_unlock_irqrestore(&hr_cq->lock, flags); + + return npolled; +} + static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, int obj, int step_idx) @@ -906,8 +1279,11 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { .chk_mbox = hns_roce_v2_chk_mbox, .set_gid = hns_roce_v2_set_gid, .set_mac = hns_roce_v2_set_mac, + .write_cqc = hns_roce_v2_write_cqc, .set_hem = hns_roce_v2_set_hem, .clear_hem = hns_roce_v2_clear_hem, + .req_notify_cq = hns_roce_v2_req_notify_cq, + .poll_cq = hns_roce_v2_poll_cq, }; static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index aee8f34be1a1..8b69875534c0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -100,6 +100,61 @@ (step_idx == 1 && hop_num == 1) || \ (step_idx == 2 && hop_num == 2)) +#define V2_CQ_DB_REQ_NOT_SOL 0 +#define V2_CQ_DB_REQ_NOT 1 + +#define V2_CQ_STATE_VALID 1 + +#define HNS_ROCE_V2_CQE_QPN_MASK 0x3ffff + +enum { + HNS_ROCE_SQ_OPCODE_SEND = 0x0, + HNS_ROCE_SQ_OPCODE_SEND_WITH_INV = 0x1, + HNS_ROCE_SQ_OPCODE_SEND_WITH_IMM = 0x2, + HNS_ROCE_SQ_OPCODE_RDMA_WRITE = 0x3, + HNS_ROCE_SQ_OPCODE_RDMA_WRITE_WITH_IMM = 0x4, + HNS_ROCE_SQ_OPCODE_RDMA_READ = 0x5, + HNS_ROCE_SQ_OPCODE_ATOMIC_COMP_AND_SWAP = 0x6, + HNS_ROCE_SQ_OPCODE_ATOMIC_FETCH_AND_ADD = 0x7, + HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_COMP_AND_SWAP = 0x8, + HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_FETCH_AND_ADD = 0x9, + HNS_ROCE_SQ_OPCODE_FAST_REG_WR = 0xa, + HNS_ROCE_SQ_OPCODE_LOCAL_INV = 0xb, + HNS_ROCE_SQ_OPCODE_BIND_MW = 0xc, +}; + +enum { + /* rq operations */ + HNS_ROCE_V2_OPCODE_RDMA_WRITE_IMM = 0x0, + HNS_ROCE_V2_OPCODE_SEND = 0x1, + HNS_ROCE_V2_OPCODE_SEND_WITH_IMM = 0x2, + HNS_ROCE_V2_OPCODE_SEND_WITH_INV = 0x3, +}; + +enum { + HNS_ROCE_V2_CQ_DB_PTR = 0x3, + HNS_ROCE_V2_CQ_DB_NTR = 0x4, +}; + +enum { + HNS_ROCE_CQE_V2_SUCCESS = 0x00, + HNS_ROCE_CQE_V2_LOCAL_LENGTH_ERR = 0x01, + HNS_ROCE_CQE_V2_LOCAL_QP_OP_ERR = 0x02, + HNS_ROCE_CQE_V2_LOCAL_PROT_ERR = 0x04, + HNS_ROCE_CQE_V2_WR_FLUSH_ERR = 0x05, + HNS_ROCE_CQE_V2_MW_BIND_ERR = 0x06, + HNS_ROCE_CQE_V2_BAD_RESP_ERR = 0x10, + HNS_ROCE_CQE_V2_LOCAL_ACCESS_ERR = 0x11, + HNS_ROCE_CQE_V2_REMOTE_INVAL_REQ_ERR = 0x12, + HNS_ROCE_CQE_V2_REMOTE_ACCESS_ERR = 0x13, + HNS_ROCE_CQE_V2_REMOTE_OP_ERR = 0x14, + HNS_ROCE_CQE_V2_TRANSPORT_RETRY_EXC_ERR = 0x15, + HNS_ROCE_CQE_V2_RNR_RETRY_EXC_ERR = 0x16, + HNS_ROCE_CQE_V2_REMOTE_ABORT_ERR = 0x22, + + HNS_ROCE_V2_CQE_STATUS_MASK = 0xff, +}; + /* CMQ command */ enum hns_roce_opcode_type { HNS_ROCE_OPC_QUERY_HW_VER = 0x8000, @@ -122,6 +177,173 @@ enum hns_roce_cmd_return_status { CMD_QUEUE_FULL = 3, }; +struct hns_roce_v2_cq_context { + u32 byte_4_pg_ceqn; + u32 byte_8_cqn; + u32 cqe_cur_blk_addr; + u32 byte_16_hop_addr; + u32 cqe_nxt_blk_addr; + u32 byte_24_pgsz_addr; + u32 byte_28_cq_pi; + u32 byte_32_cq_ci; + u32 cqe_ba; + u32 byte_40_cqe_ba; + u32 byte_44_db_record; + u32 db_record_addr; + u32 byte_52_cqe_cnt; + u32 byte_56_cqe_period_maxcnt; + u32 cqe_report_timer; + u32 byte_64_se_cqe_idx; +}; +#define V2_CQC_BYTE_4_CQ_ST_S 0 +#define V2_CQC_BYTE_4_CQ_ST_M GENMASK(1, 0) + +#define V2_CQC_BYTE_4_POLL_S 2 + +#define V2_CQC_BYTE_4_SE_S 3 + +#define V2_CQC_BYTE_4_OVER_IGNORE_S 4 + +#define V2_CQC_BYTE_4_COALESCE_S 5 + +#define V2_CQC_BYTE_4_ARM_ST_S 6 +#define V2_CQC_BYTE_4_ARM_ST_M GENMASK(7, 6) + +#define V2_CQC_BYTE_4_SHIFT_S 8 +#define V2_CQC_BYTE_4_SHIFT_M GENMASK(12, 8) + +#define V2_CQC_BYTE_4_CMD_SN_S 13 +#define V2_CQC_BYTE_4_CMD_SN_M GENMASK(14, 13) + +#define V2_CQC_BYTE_4_CEQN_S 15 +#define V2_CQC_BYTE_4_CEQN_M GENMASK(23, 15) + +#define V2_CQC_BYTE_4_PAGE_OFFSET_S 24 +#define V2_CQC_BYTE_4_PAGE_OFFSET_M GENMASK(31, 24) + +#define V2_CQC_BYTE_8_CQN_S 0 +#define V2_CQC_BYTE_8_CQN_M GENMASK(23, 0) + +#define V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_S 0 +#define V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_M GENMASK(19, 0) + +#define V2_CQC_BYTE_16_CQE_HOP_NUM_S 30 +#define V2_CQC_BYTE_16_CQE_HOP_NUM_M GENMASK(31, 30) + +#define V2_CQC_BYTE_24_CQE_NXT_BLK_ADDR_S 0 +#define V2_CQC_BYTE_24_CQE_NXT_BLK_ADDR_M GENMASK(19, 0) + +#define V2_CQC_BYTE_24_CQE_BA_PG_SZ_S 24 +#define V2_CQC_BYTE_24_CQE_BA_PG_SZ_M GENMASK(27, 24) + +#define V2_CQC_BYTE_24_CQE_BUF_PG_SZ_S 28 +#define V2_CQC_BYTE_24_CQE_BUF_PG_SZ_M GENMASK(31, 28) + +#define V2_CQC_BYTE_28_CQ_PRODUCER_IDX_S 0 +#define V2_CQC_BYTE_28_CQ_PRODUCER_IDX_M GENMASK(23, 0) + +#define V2_CQC_BYTE_32_CQ_CONSUMER_IDX_S 0 +#define V2_CQC_BYTE_32_CQ_CONSUMER_IDX_M GENMASK(23, 0) + +#define V2_CQC_BYTE_40_CQE_BA_S 0 +#define V2_CQC_BYTE_40_CQE_BA_M GENMASK(28, 0) + +#define V2_CQC_BYTE_44_DB_RECORD_EN_S 0 + +#define V2_CQC_BYTE_52_CQE_CNT_S 0 +#define V2_CQC_BYTE_52_CQE_CNT_M GENMASK(23, 0) + +#define V2_CQC_BYTE_56_CQ_MAX_CNT_S 0 +#define V2_CQC_BYTE_56_CQ_MAX_CNT_M GENMASK(15, 0) + +#define V2_CQC_BYTE_56_CQ_PERIOD_S 16 +#define V2_CQC_BYTE_56_CQ_PERIOD_M GENMASK(31, 16) + +#define V2_CQC_BYTE_64_SE_CQE_IDX_S 0 +#define V2_CQC_BYTE_64_SE_CQE_IDX_M GENMASK(23, 0) + +struct hns_roce_v2_cqe { + u32 byte_4; + u32 rkey_immtdata; + u32 byte_12; + u32 byte_16; + u32 byte_cnt; + u32 smac; + u32 byte_28; + u32 byte_32; +}; + +#define V2_CQE_BYTE_4_OPCODE_S 0 +#define V2_CQE_BYTE_4_OPCODE_M GENMASK(4, 0) + +#define V2_CQE_BYTE_4_RQ_INLINE_S 5 + +#define V2_CQE_BYTE_4_S_R_S 6 + +#define V2_CQE_BYTE_4_OWNER_S 7 + +#define V2_CQE_BYTE_4_STATUS_S 8 +#define V2_CQE_BYTE_4_STATUS_M GENMASK(15, 8) + +#define V2_CQE_BYTE_4_WQE_INDX_S 16 +#define V2_CQE_BYTE_4_WQE_INDX_M GENMASK(31, 16) + +#define V2_CQE_BYTE_12_XRC_SRQN_S 0 +#define V2_CQE_BYTE_12_XRC_SRQN_M GENMASK(23, 0) + +#define V2_CQE_BYTE_16_LCL_QPN_S 0 +#define V2_CQE_BYTE_16_LCL_QPN_M GENMASK(23, 0) + +#define V2_CQE_BYTE_16_SUB_STATUS_S 24 +#define V2_CQE_BYTE_16_SUB_STATUS_M GENMASK(31, 24) + +#define V2_CQE_BYTE_28_SMAC_4_S 0 +#define V2_CQE_BYTE_28_SMAC_4_M GENMASK(7, 0) + +#define V2_CQE_BYTE_28_SMAC_5_S 8 +#define V2_CQE_BYTE_28_SMAC_5_M GENMASK(15, 8) + +#define V2_CQE_BYTE_28_PORT_TYPE_S 16 +#define V2_CQE_BYTE_28_PORT_TYPE_M GENMASK(17, 16) + +#define V2_CQE_BYTE_32_RMT_QPN_S 0 +#define V2_CQE_BYTE_32_RMT_QPN_M GENMASK(23, 0) + +#define V2_CQE_BYTE_32_SL_S 24 +#define V2_CQE_BYTE_32_SL_M GENMASK(26, 24) + +#define V2_CQE_BYTE_32_PORTN_S 27 +#define V2_CQE_BYTE_32_PORTN_M GENMASK(29, 27) + +#define V2_CQE_BYTE_32_GRH_S 30 + +#define V2_CQE_BYTE_32_LPK_S 31 + +#define V2_DB_BYTE_4_TAG_S 0 +#define V2_DB_BYTE_4_TAG_M GENMASK(23, 0) + +#define V2_DB_BYTE_4_CMD_S 24 +#define V2_DB_BYTE_4_CMD_M GENMASK(27, 24) + +struct hns_roce_v2_cq_db { + u32 byte_4; + u32 parameter; +}; + +#define V2_CQ_DB_BYTE_4_TAG_S 0 +#define V2_CQ_DB_BYTE_4_TAG_M GENMASK(23, 0) + +#define V2_CQ_DB_BYTE_4_CMD_S 24 +#define V2_CQ_DB_BYTE_4_CMD_M GENMASK(27, 24) + +#define V2_CQ_DB_PARAMETER_CONS_IDX_S 0 +#define V2_CQ_DB_PARAMETER_CONS_IDX_M GENMASK(23, 0) + +#define V2_CQ_DB_PARAMETER_CMD_SN_S 25 +#define V2_CQ_DB_PARAMETER_CMD_SN_M GENMASK(26, 25) + +#define V2_CQ_DB_PARAMETER_NOTIFY_S 24 + struct hns_roce_query_version { __le16 rocee_vendor_id; __le16 rocee_hw_version; From 926a01dc000d76df3f5b110dddcebfb517b8f57b Mon Sep 17 00:00:00 2001 From: "Wei Hu(Xavier)" Date: Wed, 30 Aug 2017 17:23:13 +0800 Subject: [PATCH 025/296] RDMA/hns: Add QP operations support for hip08 SoC This patch implements QP operations for hip08 RoCE driver and fixes some checkpatch warning about print message in QP function. The QP operations includes create QP, query QP, modify QP and destroy QP. Signed-off-by: Lijun Ou Signed-off-by: Shaobo Xu Signed-off-by: Wei Hu (Xavier) Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_cmd.h | 4 + drivers/infiniband/hw/hns/hns_roce_device.h | 11 + drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 1 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 1306 +++++++++++++++++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 450 +++++++ drivers/infiniband/hw/hns/hns_roce_qp.c | 140 +- 6 files changed, 1883 insertions(+), 29 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h index fc400698af1b..77299ae51d82 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.h +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h @@ -48,6 +48,10 @@ enum { HNS_ROCE_CMD_DESTROY_QPC_BT1 = 0x9, HNS_ROCE_CMD_DESTROY_QPC_BT2 = 0xa, + /* QPC operation */ + HNS_ROCE_CMD_MODIFY_QPC = 0x41, + HNS_ROCE_CMD_QUERY_QPC = 0x42, + /* CQC BT commands */ HNS_ROCE_CMD_WRITE_CQC_BT0 = 0x10, HNS_ROCE_CMD_WRITE_CQC_BT1 = 0x11, diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index bee20886c889..22f20795cef5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -304,6 +304,12 @@ struct hns_roce_wq { void __iomem *db_reg_l; }; +struct hns_roce_sge { + int sge_cnt; /* SGE num */ + int offset; + int sge_shift;/* SGE size */ +}; + struct hns_roce_buf_list { void *buf; dma_addr_t map; @@ -455,6 +461,9 @@ struct hns_roce_qp { atomic_t refcount; struct completion free; + + struct hns_roce_sge sge; + u32 next_sge; }; struct hns_roce_sqp { @@ -509,6 +518,7 @@ struct hns_roce_caps { int num_cqs; int max_cqes; int min_cqes; + u32 min_wqes; int reserved_cqs; int num_aeq_vectors; /* 1 */ int num_comp_vectors; /* 32 ceq */ @@ -788,6 +798,7 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); void *get_recv_wqe(struct hns_roce_qp *hr_qp, int n); void *get_send_wqe(struct hns_roce_qp *hr_qp, int n); +void *get_send_extend_sge(struct hns_roce_qp *hr_qp, int n); bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, struct ib_cq *ib_cq); enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 1aa2be634e16..8ed053809cea 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -1475,6 +1475,7 @@ int hns_roce_v1_profile(struct hns_roce_dev *hr_dev) caps->num_qps = HNS_ROCE_V1_MAX_QP_NUM; caps->max_wqes = HNS_ROCE_V1_MAX_WQE_NUM; + caps->min_wqes = HNS_ROCE_MIN_WQE_NUM; caps->num_cqs = HNS_ROCE_V1_MAX_CQ_NUM; caps->min_cqes = HNS_ROCE_MIN_CQE_NUM; caps->max_cqes = HNS_ROCE_V1_MAX_CQE_NUM; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 94339af99aec..f2425eecf7d1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -807,6 +807,61 @@ static void hns_roce_v2_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index) } +static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, + struct hns_roce_srq *srq) +{ + struct hns_roce_v2_cqe *cqe, *dest; + u32 prod_index; + int nfreed = 0; + u8 owner_bit; + + for (prod_index = hr_cq->cons_index; get_sw_cqe_v2(hr_cq, prod_index); + ++prod_index) { + if (prod_index == hr_cq->cons_index + hr_cq->ib_cq.cqe) + break; + } + + /* + * Now backwards through the CQ, removing CQ entries + * that match our QP by overwriting them with next entries. + */ + while ((int) --prod_index - (int) hr_cq->cons_index >= 0) { + cqe = get_cqe_v2(hr_cq, prod_index & hr_cq->ib_cq.cqe); + if ((roce_get_field(cqe->byte_16, V2_CQE_BYTE_16_LCL_QPN_M, + V2_CQE_BYTE_16_LCL_QPN_S) & + HNS_ROCE_V2_CQE_QPN_MASK) == qpn) { + /* In v1 engine, not support SRQ */ + ++nfreed; + } else if (nfreed) { + dest = get_cqe_v2(hr_cq, (prod_index + nfreed) & + hr_cq->ib_cq.cqe); + owner_bit = roce_get_bit(dest->byte_4, + V2_CQE_BYTE_4_OWNER_S); + memcpy(dest, cqe, sizeof(*cqe)); + roce_set_bit(dest->byte_4, V2_CQE_BYTE_4_OWNER_S, + owner_bit); + } + } + + if (nfreed) { + hr_cq->cons_index += nfreed; + /* + * Make sure update of buffer contents is done before + * updating consumer index. + */ + wmb(); + hns_roce_v2_cq_set_ci(hr_cq, hr_cq->cons_index); + } +} + +static void hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, + struct hns_roce_srq *srq) +{ + spin_lock_irq(&hr_cq->lock); + __hns_roce_v2_cq_clean(hr_cq, qpn, srq); + spin_unlock_irq(&hr_cq->lock); +} + static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts, dma_addr_t dma_handle, int nent, @@ -1271,6 +1326,1254 @@ static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev, return ret; } +static int hns_roce_v2_qp_modify(struct hns_roce_dev *hr_dev, + struct hns_roce_mtt *mtt, + enum ib_qp_state cur_state, + enum ib_qp_state new_state, + struct hns_roce_v2_qp_context *context, + struct hns_roce_qp *hr_qp) +{ + struct hns_roce_cmd_mailbox *mailbox; + int ret; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + memcpy(mailbox->buf, context, sizeof(*context) * 2); + + ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_qp->qpn, 0, + HNS_ROCE_CMD_MODIFY_QPC, + HNS_ROCE_CMD_TIMEOUT_MSECS); + + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + + return ret; +} + +static void modify_qp_reset_to_init(struct ib_qp *ibqp, + const struct ib_qp_attr *attr, + struct hns_roce_v2_qp_context *context, + struct hns_roce_v2_qp_context *qpc_mask) +{ + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + + /* + * In v2 engine, software pass context and context mask to hardware + * when modifying qp. If software need modify some fields in context, + * we should set all bits of the relevant fields in context mask to + * 0 at the same time, else set them to 0x1. + */ + roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M, + V2_QPC_BYTE_4_TST_S, to_hr_qp_type(hr_qp->ibqp.qp_type)); + roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M, + V2_QPC_BYTE_4_TST_S, 0); + + roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M, + V2_QPC_BYTE_4_SGE_SHIFT_S, hr_qp->sq.max_gs > 2 ? + ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0); + roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M, + V2_QPC_BYTE_4_SGE_SHIFT_S, 0); + + roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M, + V2_QPC_BYTE_4_SQPN_S, hr_qp->qpn); + roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M, + V2_QPC_BYTE_4_SQPN_S, 0); + + roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M, + V2_QPC_BYTE_16_PD_S, to_hr_pd(ibqp->pd)->pdn); + roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M, + V2_QPC_BYTE_16_PD_S, 0); + + roce_set_field(context->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQWS_M, + V2_QPC_BYTE_20_RQWS_S, ilog2(hr_qp->rq.max_gs)); + roce_set_field(qpc_mask->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQWS_M, + V2_QPC_BYTE_20_RQWS_S, 0); + + roce_set_field(context->byte_20_smac_sgid_idx, + V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S, + ilog2((unsigned int)hr_qp->sq.wqe_cnt)); + roce_set_field(qpc_mask->byte_20_smac_sgid_idx, + V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S, 0); + + roce_set_field(context->byte_20_smac_sgid_idx, + V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, + ilog2((unsigned int)hr_qp->rq.wqe_cnt)); + roce_set_field(qpc_mask->byte_20_smac_sgid_idx, + V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0); + + /* No VLAN need to set 0xFFF */ + roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_IDX_M, + V2_QPC_BYTE_24_VLAN_IDX_S, 0xfff); + roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_IDX_M, + V2_QPC_BYTE_24_VLAN_IDX_S, 0); + + /* + * Set some fields in context to zero, Because the default values + * of all fields in context are zero, we need not set them to 0 again. + * but we should set the relevant fields of context mask to 0. + */ + roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_SQ_TX_ERR_S, 0); + roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_SQ_RX_ERR_S, 0); + roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_RQ_TX_ERR_S, 0); + roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_RQ_RX_ERR_S, 0); + + roce_set_field(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_MAPID_M, + V2_QPC_BYTE_60_MAPID_S, 0); + + roce_set_bit(qpc_mask->byte_60_qpst_mapid, + V2_QPC_BYTE_60_INNER_MAP_IND_S, 0); + roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_SQ_MAP_IND_S, + 0); + roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_RQ_MAP_IND_S, + 0); + roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_EXT_MAP_IND_S, + 0); + roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_SQ_RLS_IND_S, + 0); + roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_SQ_EXT_IND_S, + 0); + roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CNP_TX_FLAG_S, 0); + roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CE_FLAG_S, 0); + + roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, + !!(attr->qp_access_flags & IB_ACCESS_REMOTE_READ)); + roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, 0); + + roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S, + !!(attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE)); + roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S, 0); + + roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, + !!(attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)); + roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 0); + + roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, 0); + + roce_set_field(context->byte_80_rnr_rx_cqn, V2_QPC_BYTE_80_RX_CQN_M, + V2_QPC_BYTE_80_RX_CQN_S, to_hr_cq(ibqp->recv_cq)->cqn); + roce_set_field(qpc_mask->byte_80_rnr_rx_cqn, V2_QPC_BYTE_80_RX_CQN_M, + V2_QPC_BYTE_80_RX_CQN_S, 0); + if (ibqp->srq) { + roce_set_field(context->byte_76_srqn_op_en, + V2_QPC_BYTE_76_SRQN_M, V2_QPC_BYTE_76_SRQN_S, + to_hr_srq(ibqp->srq)->srqn); + roce_set_field(qpc_mask->byte_76_srqn_op_en, + V2_QPC_BYTE_76_SRQN_M, V2_QPC_BYTE_76_SRQN_S, 0); + roce_set_bit(context->byte_76_srqn_op_en, + V2_QPC_BYTE_76_SRQ_EN_S, 1); + roce_set_bit(qpc_mask->byte_76_srqn_op_en, + V2_QPC_BYTE_76_SRQ_EN_S, 0); + } + + roce_set_field(qpc_mask->byte_84_rq_ci_pi, + V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M, + V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, 0); + roce_set_field(qpc_mask->byte_84_rq_ci_pi, + V2_QPC_BYTE_84_RQ_CONSUMER_IDX_M, + V2_QPC_BYTE_84_RQ_CONSUMER_IDX_S, 0); + + roce_set_field(qpc_mask->byte_92_srq_info, V2_QPC_BYTE_92_SRQ_INFO_M, + V2_QPC_BYTE_92_SRQ_INFO_S, 0); + + roce_set_field(qpc_mask->byte_96_rx_reqmsn, V2_QPC_BYTE_96_RX_REQ_MSN_M, + V2_QPC_BYTE_96_RX_REQ_MSN_S, 0); + + roce_set_field(qpc_mask->byte_104_rq_sge, + V2_QPC_BYTE_104_RQ_CUR_WQE_SGE_NUM_M, + V2_QPC_BYTE_104_RQ_CUR_WQE_SGE_NUM_S, 0); + + roce_set_bit(qpc_mask->byte_108_rx_reqepsn, + V2_QPC_BYTE_108_RX_REQ_PSN_ERR_S, 0); + roce_set_field(qpc_mask->byte_108_rx_reqepsn, + V2_QPC_BYTE_108_RX_REQ_LAST_OPTYPE_M, + V2_QPC_BYTE_108_RX_REQ_LAST_OPTYPE_S, 0); + roce_set_bit(qpc_mask->byte_108_rx_reqepsn, + V2_QPC_BYTE_108_RX_REQ_RNR_S, 0); + + qpc_mask->rq_rnr_timer = 0; + qpc_mask->rx_msg_len = 0; + qpc_mask->rx_rkey_pkt_info = 0; + qpc_mask->rx_va = 0; + + roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_HEAD_MAX_M, + V2_QPC_BYTE_132_TRRL_HEAD_MAX_S, 0); + roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_TAIL_MAX_M, + V2_QPC_BYTE_132_TRRL_TAIL_MAX_S, 0); + + roce_set_bit(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RSVD_RAQ_MAP_S, 0); + roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RAQ_TRRL_HEAD_M, + V2_QPC_BYTE_140_RAQ_TRRL_HEAD_S, 0); + roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RAQ_TRRL_TAIL_M, + V2_QPC_BYTE_140_RAQ_TRRL_TAIL_S, 0); + + roce_set_field(qpc_mask->byte_144_raq, + V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_M, + V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_S, 0); + roce_set_bit(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RAQ_RTY_INI_IND_S, + 0); + roce_set_field(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RAQ_CREDIT_M, + V2_QPC_BYTE_144_RAQ_CREDIT_S, 0); + roce_set_bit(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RESP_RTY_FLG_S, 0); + + roce_set_field(qpc_mask->byte_148_raq, V2_QPC_BYTE_148_RQ_MSN_M, + V2_QPC_BYTE_148_RQ_MSN_S, 0); + roce_set_field(qpc_mask->byte_148_raq, V2_QPC_BYTE_148_RAQ_SYNDROME_M, + V2_QPC_BYTE_148_RAQ_SYNDROME_S, 0); + + roce_set_field(qpc_mask->byte_152_raq, V2_QPC_BYTE_152_RAQ_PSN_M, + V2_QPC_BYTE_152_RAQ_PSN_S, 0); + roce_set_field(qpc_mask->byte_152_raq, + V2_QPC_BYTE_152_RAQ_TRRL_RTY_HEAD_M, + V2_QPC_BYTE_152_RAQ_TRRL_RTY_HEAD_S, 0); + + roce_set_field(qpc_mask->byte_156_raq, V2_QPC_BYTE_156_RAQ_USE_PKTN_M, + V2_QPC_BYTE_156_RAQ_USE_PKTN_S, 0); + + roce_set_field(qpc_mask->byte_160_sq_ci_pi, + V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M, + V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, 0); + roce_set_field(qpc_mask->byte_160_sq_ci_pi, + V2_QPC_BYTE_160_SQ_CONSUMER_IDX_M, + V2_QPC_BYTE_160_SQ_CONSUMER_IDX_S, 0); + + roce_set_field(context->byte_168_irrl_idx, + V2_QPC_BYTE_168_SQ_SHIFT_BAK_M, + V2_QPC_BYTE_168_SQ_SHIFT_BAK_S, + ilog2((unsigned int)hr_qp->sq.wqe_cnt)); + roce_set_field(qpc_mask->byte_168_irrl_idx, + V2_QPC_BYTE_168_SQ_SHIFT_BAK_M, + V2_QPC_BYTE_168_SQ_SHIFT_BAK_S, 0); + + roce_set_bit(qpc_mask->byte_168_irrl_idx, + V2_QPC_BYTE_168_MSG_RTY_LP_FLG_S, 0); + roce_set_field(qpc_mask->byte_168_irrl_idx, + V2_QPC_BYTE_168_IRRL_IDX_LSB_M, + V2_QPC_BYTE_168_IRRL_IDX_LSB_S, 0); + + roce_set_field(context->byte_172_sq_psn, V2_QPC_BYTE_172_ACK_REQ_FREQ_M, + V2_QPC_BYTE_172_ACK_REQ_FREQ_S, 4); + roce_set_field(qpc_mask->byte_172_sq_psn, + V2_QPC_BYTE_172_ACK_REQ_FREQ_M, + V2_QPC_BYTE_172_ACK_REQ_FREQ_S, 0); + + roce_set_bit(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_MSG_RNR_FLG_S, + 0); + + roce_set_field(qpc_mask->byte_176_msg_pktn, + V2_QPC_BYTE_176_MSG_USE_PKTN_M, + V2_QPC_BYTE_176_MSG_USE_PKTN_S, 0); + roce_set_field(qpc_mask->byte_176_msg_pktn, + V2_QPC_BYTE_176_IRRL_HEAD_PRE_M, + V2_QPC_BYTE_176_IRRL_HEAD_PRE_S, 0); + + roce_set_field(qpc_mask->byte_184_irrl_idx, + V2_QPC_BYTE_184_IRRL_IDX_MSB_M, + V2_QPC_BYTE_184_IRRL_IDX_MSB_S, 0); + + qpc_mask->cur_sge_offset = 0; + + roce_set_field(qpc_mask->byte_192_ext_sge, + V2_QPC_BYTE_192_CUR_SGE_IDX_M, + V2_QPC_BYTE_192_CUR_SGE_IDX_S, 0); + roce_set_field(qpc_mask->byte_192_ext_sge, + V2_QPC_BYTE_192_EXT_SGE_NUM_LEFT_M, + V2_QPC_BYTE_192_EXT_SGE_NUM_LEFT_S, 0); + + roce_set_field(qpc_mask->byte_196_sq_psn, V2_QPC_BYTE_196_IRRL_HEAD_M, + V2_QPC_BYTE_196_IRRL_HEAD_S, 0); + + roce_set_field(qpc_mask->byte_200_sq_max, V2_QPC_BYTE_200_SQ_MAX_IDX_M, + V2_QPC_BYTE_200_SQ_MAX_IDX_S, 0); + roce_set_field(qpc_mask->byte_200_sq_max, + V2_QPC_BYTE_200_LCL_OPERATED_CNT_M, + V2_QPC_BYTE_200_LCL_OPERATED_CNT_S, 0); + + roce_set_bit(qpc_mask->byte_208_irrl, V2_QPC_BYTE_208_PKT_RNR_FLG_S, 0); + roce_set_bit(qpc_mask->byte_208_irrl, V2_QPC_BYTE_208_PKT_RTY_FLG_S, 0); + + roce_set_field(qpc_mask->byte_212_lsn, V2_QPC_BYTE_212_CHECK_FLG_M, + V2_QPC_BYTE_212_CHECK_FLG_S, 0); + + qpc_mask->sq_timer = 0; + + roce_set_field(qpc_mask->byte_220_retry_psn_msn, + V2_QPC_BYTE_220_RETRY_MSG_MSN_M, + V2_QPC_BYTE_220_RETRY_MSG_MSN_S, 0); + roce_set_field(qpc_mask->byte_232_irrl_sge, + V2_QPC_BYTE_232_IRRL_SGE_IDX_M, + V2_QPC_BYTE_232_IRRL_SGE_IDX_S, 0); + + qpc_mask->irrl_cur_sge_offset = 0; + + roce_set_field(qpc_mask->byte_240_irrl_tail, + V2_QPC_BYTE_240_IRRL_TAIL_REAL_M, + V2_QPC_BYTE_240_IRRL_TAIL_REAL_S, 0); + roce_set_field(qpc_mask->byte_240_irrl_tail, + V2_QPC_BYTE_240_IRRL_TAIL_RD_M, + V2_QPC_BYTE_240_IRRL_TAIL_RD_S, 0); + roce_set_field(qpc_mask->byte_240_irrl_tail, + V2_QPC_BYTE_240_RX_ACK_MSN_M, + V2_QPC_BYTE_240_RX_ACK_MSN_S, 0); + + roce_set_field(qpc_mask->byte_248_ack_psn, V2_QPC_BYTE_248_IRRL_PSN_M, + V2_QPC_BYTE_248_IRRL_PSN_S, 0); + roce_set_bit(qpc_mask->byte_248_ack_psn, V2_QPC_BYTE_248_ACK_PSN_ERR_S, + 0); + roce_set_field(qpc_mask->byte_248_ack_psn, + V2_QPC_BYTE_248_ACK_LAST_OPTYPE_M, + V2_QPC_BYTE_248_ACK_LAST_OPTYPE_S, 0); + roce_set_bit(qpc_mask->byte_248_ack_psn, V2_QPC_BYTE_248_IRRL_PSN_VLD_S, + 0); + roce_set_bit(qpc_mask->byte_248_ack_psn, + V2_QPC_BYTE_248_RNR_RETRY_FLAG_S, 0); + roce_set_bit(qpc_mask->byte_248_ack_psn, V2_QPC_BYTE_248_CQ_ERR_IND_S, + 0); + + hr_qp->access_flags = attr->qp_access_flags; + hr_qp->pkey_index = attr->pkey_index; + roce_set_field(context->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M, + V2_QPC_BYTE_252_TX_CQN_S, to_hr_cq(ibqp->send_cq)->cqn); + roce_set_field(qpc_mask->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M, + V2_QPC_BYTE_252_TX_CQN_S, 0); + + roce_set_field(qpc_mask->byte_252_err_txcqn, V2_QPC_BYTE_252_ERR_TYPE_M, + V2_QPC_BYTE_252_ERR_TYPE_S, 0); + + roce_set_field(qpc_mask->byte_256_sqflush_rqcqe, + V2_QPC_BYTE_256_RQ_CQE_IDX_M, + V2_QPC_BYTE_256_RQ_CQE_IDX_S, 0); + roce_set_field(qpc_mask->byte_256_sqflush_rqcqe, + V2_QPC_BYTE_256_SQ_FLUSH_IDX_M, + V2_QPC_BYTE_256_SQ_FLUSH_IDX_S, 0); +} + +static void modify_qp_init_to_init(struct ib_qp *ibqp, + const struct ib_qp_attr *attr, int attr_mask, + struct hns_roce_v2_qp_context *context, + struct hns_roce_v2_qp_context *qpc_mask) +{ + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + + /* + * In v2 engine, software pass context and context mask to hardware + * when modifying qp. If software need modify some fields in context, + * we should set all bits of the relevant fields in context mask to + * 0 at the same time, else set them to 0x1. + */ + roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M, + V2_QPC_BYTE_4_TST_S, to_hr_qp_type(hr_qp->ibqp.qp_type)); + roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M, + V2_QPC_BYTE_4_TST_S, 0); + + roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M, + V2_QPC_BYTE_4_SGE_SHIFT_S, hr_qp->sq.max_gs > 2 ? + ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0); + roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M, + V2_QPC_BYTE_4_SGE_SHIFT_S, 0); + + if (attr_mask & IB_QP_ACCESS_FLAGS) { + roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, + !!(attr->qp_access_flags & IB_ACCESS_REMOTE_READ)); + roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, + 0); + + roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S, + !!(attr->qp_access_flags & + IB_ACCESS_REMOTE_WRITE)); + roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S, + 0); + + roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, + !!(attr->qp_access_flags & + IB_ACCESS_REMOTE_ATOMIC)); + roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, + 0); + } else { + roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, + !!(hr_qp->access_flags & IB_ACCESS_REMOTE_READ)); + roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, + 0); + + roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S, + !!(hr_qp->access_flags & IB_ACCESS_REMOTE_WRITE)); + roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S, + 0); + + roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, + !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC)); + roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, + 0); + } + + roce_set_field(context->byte_20_smac_sgid_idx, + V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S, + ilog2((unsigned int)hr_qp->sq.wqe_cnt)); + roce_set_field(qpc_mask->byte_20_smac_sgid_idx, + V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S, 0); + + roce_set_field(context->byte_20_smac_sgid_idx, + V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, + ilog2((unsigned int)hr_qp->rq.wqe_cnt)); + roce_set_field(qpc_mask->byte_20_smac_sgid_idx, + V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0); + + roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M, + V2_QPC_BYTE_16_PD_S, to_hr_pd(ibqp->pd)->pdn); + roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M, + V2_QPC_BYTE_16_PD_S, 0); + + roce_set_field(context->byte_80_rnr_rx_cqn, V2_QPC_BYTE_80_RX_CQN_M, + V2_QPC_BYTE_80_RX_CQN_S, to_hr_cq(ibqp->recv_cq)->cqn); + roce_set_field(qpc_mask->byte_80_rnr_rx_cqn, V2_QPC_BYTE_80_RX_CQN_M, + V2_QPC_BYTE_80_RX_CQN_S, 0); + + roce_set_field(context->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M, + V2_QPC_BYTE_252_TX_CQN_S, to_hr_cq(ibqp->recv_cq)->cqn); + roce_set_field(qpc_mask->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M, + V2_QPC_BYTE_252_TX_CQN_S, 0); + + if (ibqp->srq) { + roce_set_bit(context->byte_76_srqn_op_en, + V2_QPC_BYTE_76_SRQ_EN_S, 1); + roce_set_bit(qpc_mask->byte_76_srqn_op_en, + V2_QPC_BYTE_76_SRQ_EN_S, 0); + roce_set_field(context->byte_76_srqn_op_en, + V2_QPC_BYTE_76_SRQN_M, V2_QPC_BYTE_76_SRQN_S, + to_hr_srq(ibqp->srq)->srqn); + roce_set_field(qpc_mask->byte_76_srqn_op_en, + V2_QPC_BYTE_76_SRQN_M, V2_QPC_BYTE_76_SRQN_S, 0); + } + + if (attr_mask & IB_QP_PKEY_INDEX) + context->qkey_xrcd = attr->pkey_index; + else + context->qkey_xrcd = hr_qp->pkey_index; + + roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M, + V2_QPC_BYTE_4_SQPN_S, hr_qp->qpn); + roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M, + V2_QPC_BYTE_4_SQPN_S, 0); + + roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M, + V2_QPC_BYTE_56_DQPN_S, hr_qp->qpn); + roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M, + V2_QPC_BYTE_56_DQPN_S, 0); + roce_set_field(context->byte_168_irrl_idx, + V2_QPC_BYTE_168_SQ_SHIFT_BAK_M, + V2_QPC_BYTE_168_SQ_SHIFT_BAK_S, + ilog2((unsigned int)hr_qp->sq.wqe_cnt)); + roce_set_field(qpc_mask->byte_168_irrl_idx, + V2_QPC_BYTE_168_SQ_SHIFT_BAK_M, + V2_QPC_BYTE_168_SQ_SHIFT_BAK_S, 0); +} + +static int modify_qp_init_to_rtr(struct ib_qp *ibqp, + const struct ib_qp_attr *attr, int attr_mask, + struct hns_roce_v2_qp_context *context, + struct hns_roce_v2_qp_context *qpc_mask) +{ + const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + struct device *dev = hr_dev->dev; + dma_addr_t dma_handle_2; + dma_addr_t dma_handle; + u32 page_size; + u8 port_num; + u64 *mtts_2; + u64 *mtts; + u8 *dmac; + u8 *smac; + int port; + + /* Search qp buf's mtts */ + mtts = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_table, + hr_qp->mtt.first_seg, &dma_handle); + if (!mtts) { + dev_err(dev, "qp buf pa find failed\n"); + return -EINVAL; + } + + /* Search IRRL's mtts */ + mtts_2 = hns_roce_table_find(hr_dev, &hr_dev->qp_table.irrl_table, + hr_qp->qpn, &dma_handle_2); + if (!mtts_2) { + dev_err(dev, "qp irrl_table find failed\n"); + return -EINVAL; + } + + if ((attr_mask & IB_QP_ALT_PATH) || (attr_mask & IB_QP_ACCESS_FLAGS) || + (attr_mask & IB_QP_PKEY_INDEX) || (attr_mask & IB_QP_QKEY)) { + dev_err(dev, "INIT2RTR attr_mask (0x%x) error\n", attr_mask); + return -EINVAL; + } + + dmac = (u8 *)attr->ah_attr.roce.dmac; + context->wqe_sge_ba = (u32)(dma_handle >> 3); + qpc_mask->wqe_sge_ba = 0; + + /* + * In v2 engine, software pass context and context mask to hardware + * when modifying qp. If software need modify some fields in context, + * we should set all bits of the relevant fields in context mask to + * 0 at the same time, else set them to 0x1. + */ + roce_set_field(context->byte_12_sq_hop, V2_QPC_BYTE_12_WQE_SGE_BA_M, + V2_QPC_BYTE_12_WQE_SGE_BA_S, dma_handle >> (32 + 3)); + roce_set_field(qpc_mask->byte_12_sq_hop, V2_QPC_BYTE_12_WQE_SGE_BA_M, + V2_QPC_BYTE_12_WQE_SGE_BA_S, 0); + + roce_set_field(context->byte_12_sq_hop, V2_QPC_BYTE_12_SQ_HOP_NUM_M, + V2_QPC_BYTE_12_SQ_HOP_NUM_S, + hr_dev->caps.mtt_hop_num == HNS_ROCE_HOP_NUM_0 ? + 0 : hr_dev->caps.mtt_hop_num); + roce_set_field(qpc_mask->byte_12_sq_hop, V2_QPC_BYTE_12_SQ_HOP_NUM_M, + V2_QPC_BYTE_12_SQ_HOP_NUM_S, 0); + + roce_set_field(context->byte_20_smac_sgid_idx, + V2_QPC_BYTE_20_SGE_HOP_NUM_M, + V2_QPC_BYTE_20_SGE_HOP_NUM_S, + hr_qp->sq.max_gs > 2 ? hr_dev->caps.mtt_hop_num : 0); + roce_set_field(qpc_mask->byte_20_smac_sgid_idx, + V2_QPC_BYTE_20_SGE_HOP_NUM_M, + V2_QPC_BYTE_20_SGE_HOP_NUM_S, 0); + + roce_set_field(context->byte_20_smac_sgid_idx, + V2_QPC_BYTE_20_RQ_HOP_NUM_M, + V2_QPC_BYTE_20_RQ_HOP_NUM_S, + hr_dev->caps.mtt_hop_num == HNS_ROCE_HOP_NUM_0 ? + 0 : hr_dev->caps.mtt_hop_num); + roce_set_field(qpc_mask->byte_20_smac_sgid_idx, + V2_QPC_BYTE_20_RQ_HOP_NUM_M, + V2_QPC_BYTE_20_RQ_HOP_NUM_S, 0); + + roce_set_field(context->byte_16_buf_ba_pg_sz, + V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_M, + V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_S, + hr_dev->caps.mtt_ba_pg_sz); + roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz, + V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_M, + V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_S, 0); + + roce_set_field(context->byte_16_buf_ba_pg_sz, + V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_M, + V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_S, + hr_dev->caps.mtt_buf_pg_sz); + roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz, + V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_M, + V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_S, 0); + + roce_set_field(context->byte_80_rnr_rx_cqn, + V2_QPC_BYTE_80_MIN_RNR_TIME_M, + V2_QPC_BYTE_80_MIN_RNR_TIME_S, attr->min_rnr_timer); + roce_set_field(qpc_mask->byte_80_rnr_rx_cqn, + V2_QPC_BYTE_80_MIN_RNR_TIME_M, + V2_QPC_BYTE_80_MIN_RNR_TIME_S, 0); + + page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); + context->rq_cur_blk_addr = (u32)(mtts[hr_qp->rq.offset / page_size] + >> PAGE_ADDR_SHIFT); + qpc_mask->rq_cur_blk_addr = 0; + + roce_set_field(context->byte_92_srq_info, + V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_M, + V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_S, + mtts[hr_qp->rq.offset / page_size] + >> (32 + PAGE_ADDR_SHIFT)); + roce_set_field(qpc_mask->byte_92_srq_info, + V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_M, + V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_S, 0); + + context->rq_nxt_blk_addr = (u32)(mtts[hr_qp->rq.offset / page_size + 1] + >> PAGE_ADDR_SHIFT); + qpc_mask->rq_nxt_blk_addr = 0; + + roce_set_field(context->byte_104_rq_sge, + V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_M, + V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_S, + mtts[hr_qp->rq.offset / page_size + 1] + >> (32 + PAGE_ADDR_SHIFT)); + roce_set_field(qpc_mask->byte_104_rq_sge, + V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_M, + V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_S, 0); + + roce_set_field(context->byte_108_rx_reqepsn, + V2_QPC_BYTE_108_RX_REQ_EPSN_M, + V2_QPC_BYTE_108_RX_REQ_EPSN_S, attr->rq_psn); + roce_set_field(qpc_mask->byte_108_rx_reqepsn, + V2_QPC_BYTE_108_RX_REQ_EPSN_M, + V2_QPC_BYTE_108_RX_REQ_EPSN_S, 0); + + context->irrl_ba = (u32)dma_handle_2; + qpc_mask->irrl_ba = 0; + roce_set_field(context->byte_208_irrl, V2_QPC_BYTE_208_IRRL_BA_M, + V2_QPC_BYTE_208_IRRL_BA_S, + (dma_handle_2 >> 32) & V2_QPC_BYTE_208_IRRL_BA_M); + roce_set_field(qpc_mask->byte_208_irrl, V2_QPC_BYTE_208_IRRL_BA_M, + V2_QPC_BYTE_208_IRRL_BA_S, 0); + + roce_set_bit(context->byte_208_irrl, V2_QPC_BYTE_208_RMT_E2E_S, 1); + roce_set_bit(qpc_mask->byte_208_irrl, V2_QPC_BYTE_208_RMT_E2E_S, 0); + + roce_set_bit(context->byte_252_err_txcqn, V2_QPC_BYTE_252_SIG_TYPE_S, + hr_qp->sq_signal_bits); + roce_set_bit(qpc_mask->byte_252_err_txcqn, V2_QPC_BYTE_252_SIG_TYPE_S, + 0); + + port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) : hr_qp->port; + + smac = (u8 *)hr_dev->dev_addr[port]; + /* when dmac equals smac or loop_idc is 1, it should loopback */ + if (ether_addr_equal_unaligned(dmac, smac) || + hr_dev->loop_idc == 0x1) { + roce_set_bit(context->byte_28_at_fl, V2_QPC_BYTE_28_LBI_S, 1); + roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_LBI_S, 0); + } + + roce_set_field(context->byte_140_raq, V2_QPC_BYTE_140_RR_MAX_M, + V2_QPC_BYTE_140_RR_MAX_S, + ilog2((unsigned int)attr->max_dest_rd_atomic)); + roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RR_MAX_M, + V2_QPC_BYTE_140_RR_MAX_S, 0); + + roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M, + V2_QPC_BYTE_56_DQPN_S, attr->dest_qp_num); + roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M, + V2_QPC_BYTE_56_DQPN_S, 0); + + /* Configure GID index */ + port_num = rdma_ah_get_port_num(&attr->ah_attr); + roce_set_field(context->byte_20_smac_sgid_idx, + V2_QPC_BYTE_20_SGID_IDX_M, + V2_QPC_BYTE_20_SGID_IDX_S, + hns_get_gid_index(hr_dev, port_num - 1, + grh->sgid_index)); + roce_set_field(qpc_mask->byte_20_smac_sgid_idx, + V2_QPC_BYTE_20_SGID_IDX_M, + V2_QPC_BYTE_20_SGID_IDX_S, 0); + memcpy(&(context->dmac), dmac, 4); + roce_set_field(context->byte_52_udpspn_dmac, V2_QPC_BYTE_52_DMAC_M, + V2_QPC_BYTE_52_DMAC_S, *((u16 *)(&dmac[4]))); + qpc_mask->dmac = 0; + roce_set_field(qpc_mask->byte_52_udpspn_dmac, V2_QPC_BYTE_52_DMAC_M, + V2_QPC_BYTE_52_DMAC_S, 0); + + roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M, + V2_QPC_BYTE_56_LP_PKTN_INI_S, 4); + roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M, + V2_QPC_BYTE_56_LP_PKTN_INI_S, 0); + + roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M, + V2_QPC_BYTE_24_HOP_LIMIT_S, grh->hop_limit); + roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M, + V2_QPC_BYTE_24_HOP_LIMIT_S, 0); + + roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, + V2_QPC_BYTE_28_FL_S, grh->flow_label); + roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, + V2_QPC_BYTE_28_FL_S, 0); + + roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, + V2_QPC_BYTE_24_TC_S, grh->traffic_class); + roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, + V2_QPC_BYTE_24_TC_S, 0); + + roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M, + V2_QPC_BYTE_24_MTU_S, attr->path_mtu); + roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M, + V2_QPC_BYTE_24_MTU_S, 0); + + memcpy(context->dgid, grh->dgid.raw, sizeof(grh->dgid.raw)); + memset(qpc_mask->dgid, 0, sizeof(grh->dgid.raw)); + + roce_set_field(context->byte_84_rq_ci_pi, + V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M, + V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, hr_qp->rq.head); + roce_set_field(qpc_mask->byte_84_rq_ci_pi, + V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M, + V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, 0); + + roce_set_field(qpc_mask->byte_84_rq_ci_pi, + V2_QPC_BYTE_84_RQ_CONSUMER_IDX_M, + V2_QPC_BYTE_84_RQ_CONSUMER_IDX_S, 0); + roce_set_bit(qpc_mask->byte_108_rx_reqepsn, + V2_QPC_BYTE_108_RX_REQ_PSN_ERR_S, 0); + roce_set_field(qpc_mask->byte_96_rx_reqmsn, V2_QPC_BYTE_96_RX_REQ_MSN_M, + V2_QPC_BYTE_96_RX_REQ_MSN_S, 0); + roce_set_field(qpc_mask->byte_108_rx_reqepsn, + V2_QPC_BYTE_108_RX_REQ_LAST_OPTYPE_M, + V2_QPC_BYTE_108_RX_REQ_LAST_OPTYPE_S, 0); + + context->rq_rnr_timer = 0; + qpc_mask->rq_rnr_timer = 0; + + roce_set_field(context->byte_152_raq, V2_QPC_BYTE_152_RAQ_PSN_M, + V2_QPC_BYTE_152_RAQ_PSN_S, attr->rq_psn - 1); + roce_set_field(qpc_mask->byte_152_raq, V2_QPC_BYTE_152_RAQ_PSN_M, + V2_QPC_BYTE_152_RAQ_PSN_S, 0); + + roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_HEAD_MAX_M, + V2_QPC_BYTE_132_TRRL_HEAD_MAX_S, 0); + roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_TAIL_MAX_M, + V2_QPC_BYTE_132_TRRL_TAIL_MAX_S, 0); + + roce_set_field(context->byte_168_irrl_idx, + V2_QPC_BYTE_168_LP_SGEN_INI_M, + V2_QPC_BYTE_168_LP_SGEN_INI_S, 3); + roce_set_field(qpc_mask->byte_168_irrl_idx, + V2_QPC_BYTE_168_LP_SGEN_INI_M, + V2_QPC_BYTE_168_LP_SGEN_INI_S, 0); + + roce_set_field(context->byte_208_irrl, V2_QPC_BYTE_208_SR_MAX_M, + V2_QPC_BYTE_208_SR_MAX_S, + ilog2((unsigned int)attr->max_rd_atomic)); + roce_set_field(qpc_mask->byte_208_irrl, V2_QPC_BYTE_208_SR_MAX_M, + V2_QPC_BYTE_208_SR_MAX_S, 0); + + roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M, + V2_QPC_BYTE_28_SL_S, rdma_ah_get_sl(&attr->ah_attr)); + roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_SL_M, + V2_QPC_BYTE_28_SL_S, 0); + hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); + + return 0; +} + +static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, + const struct ib_qp_attr *attr, int attr_mask, + struct hns_roce_v2_qp_context *context, + struct hns_roce_v2_qp_context *qpc_mask) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + struct device *dev = hr_dev->dev; + dma_addr_t dma_handle; + u64 *mtts; + + /* Search qp buf's mtts */ + mtts = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_table, + hr_qp->mtt.first_seg, &dma_handle); + if (!mtts) { + dev_err(dev, "qp buf pa find failed\n"); + return -EINVAL; + } + + /* If exist optional param, return error */ + if ((attr_mask & IB_QP_ALT_PATH) || (attr_mask & IB_QP_ACCESS_FLAGS) || + (attr_mask & IB_QP_QKEY) || (attr_mask & IB_QP_PATH_MIG_STATE) || + (attr_mask & IB_QP_CUR_STATE) || + (attr_mask & IB_QP_MIN_RNR_TIMER)) { + dev_err(dev, "RTR2RTS attr_mask (0x%x)error\n", attr_mask); + return -EINVAL; + } + + /* + * In v2 engine, software pass context and context mask to hardware + * when modifying qp. If software need modify some fields in context, + * we should set all bits of the relevant fields in context mask to + * 0 at the same time, else set them to 0x1. + */ + roce_set_field(context->byte_60_qpst_mapid, + V2_QPC_BYTE_60_RTY_NUM_INI_BAK_M, + V2_QPC_BYTE_60_RTY_NUM_INI_BAK_S, attr->retry_cnt); + roce_set_field(qpc_mask->byte_60_qpst_mapid, + V2_QPC_BYTE_60_RTY_NUM_INI_BAK_M, + V2_QPC_BYTE_60_RTY_NUM_INI_BAK_S, 0); + + context->sq_cur_blk_addr = (u32)(mtts[0] >> PAGE_ADDR_SHIFT); + roce_set_field(context->byte_168_irrl_idx, + V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M, + V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_S, + mtts[0] >> (32 + PAGE_ADDR_SHIFT)); + qpc_mask->sq_cur_blk_addr = 0; + roce_set_field(qpc_mask->byte_168_irrl_idx, + V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M, + V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_S, 0); + + context->rx_sq_cur_blk_addr = (u32)(mtts[0] >> PAGE_ADDR_SHIFT); + roce_set_field(context->byte_232_irrl_sge, + V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_M, + V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_S, + mtts[0] >> (32 + PAGE_ADDR_SHIFT)); + qpc_mask->rx_sq_cur_blk_addr = 0; + roce_set_field(qpc_mask->byte_232_irrl_sge, + V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_M, + V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_S, 0); + + /* + * Set some fields in context to zero, Because the default values + * of all fields in context are zero, we need not set them to 0 again. + * but we should set the relevant fields of context mask to 0. + */ + roce_set_field(qpc_mask->byte_232_irrl_sge, + V2_QPC_BYTE_232_IRRL_SGE_IDX_M, + V2_QPC_BYTE_232_IRRL_SGE_IDX_S, 0); + + roce_set_field(qpc_mask->byte_240_irrl_tail, + V2_QPC_BYTE_240_RX_ACK_MSN_M, + V2_QPC_BYTE_240_RX_ACK_MSN_S, 0); + + roce_set_field(context->byte_244_rnr_rxack, + V2_QPC_BYTE_244_RX_ACK_EPSN_M, + V2_QPC_BYTE_244_RX_ACK_EPSN_S, attr->sq_psn); + roce_set_field(qpc_mask->byte_244_rnr_rxack, + V2_QPC_BYTE_244_RX_ACK_EPSN_M, + V2_QPC_BYTE_244_RX_ACK_EPSN_S, 0); + + roce_set_field(qpc_mask->byte_248_ack_psn, + V2_QPC_BYTE_248_ACK_LAST_OPTYPE_M, + V2_QPC_BYTE_248_ACK_LAST_OPTYPE_S, 0); + roce_set_bit(qpc_mask->byte_248_ack_psn, + V2_QPC_BYTE_248_IRRL_PSN_VLD_S, 0); + roce_set_field(qpc_mask->byte_248_ack_psn, + V2_QPC_BYTE_248_IRRL_PSN_M, + V2_QPC_BYTE_248_IRRL_PSN_S, 0); + + roce_set_field(qpc_mask->byte_240_irrl_tail, + V2_QPC_BYTE_240_IRRL_TAIL_REAL_M, + V2_QPC_BYTE_240_IRRL_TAIL_REAL_S, 0); + + roce_set_field(context->byte_220_retry_psn_msn, + V2_QPC_BYTE_220_RETRY_MSG_PSN_M, + V2_QPC_BYTE_220_RETRY_MSG_PSN_S, attr->sq_psn); + roce_set_field(qpc_mask->byte_220_retry_psn_msn, + V2_QPC_BYTE_220_RETRY_MSG_PSN_M, + V2_QPC_BYTE_220_RETRY_MSG_PSN_S, 0); + + roce_set_field(context->byte_224_retry_msg, + V2_QPC_BYTE_224_RETRY_MSG_PSN_M, + V2_QPC_BYTE_224_RETRY_MSG_PSN_S, attr->sq_psn >> 16); + roce_set_field(qpc_mask->byte_224_retry_msg, + V2_QPC_BYTE_224_RETRY_MSG_PSN_M, + V2_QPC_BYTE_224_RETRY_MSG_PSN_S, 0); + + roce_set_field(context->byte_224_retry_msg, + V2_QPC_BYTE_224_RETRY_MSG_FPKT_PSN_M, + V2_QPC_BYTE_224_RETRY_MSG_FPKT_PSN_S, attr->sq_psn); + roce_set_field(qpc_mask->byte_224_retry_msg, + V2_QPC_BYTE_224_RETRY_MSG_FPKT_PSN_M, + V2_QPC_BYTE_224_RETRY_MSG_FPKT_PSN_S, 0); + + roce_set_field(qpc_mask->byte_220_retry_psn_msn, + V2_QPC_BYTE_220_RETRY_MSG_MSN_M, + V2_QPC_BYTE_220_RETRY_MSG_MSN_S, 0); + + roce_set_bit(qpc_mask->byte_248_ack_psn, + V2_QPC_BYTE_248_RNR_RETRY_FLAG_S, 0); + + roce_set_field(qpc_mask->byte_212_lsn, V2_QPC_BYTE_212_CHECK_FLG_M, + V2_QPC_BYTE_212_CHECK_FLG_S, 0); + + roce_set_field(context->byte_212_lsn, V2_QPC_BYTE_212_RETRY_CNT_M, + V2_QPC_BYTE_212_RETRY_CNT_S, attr->retry_cnt); + roce_set_field(qpc_mask->byte_212_lsn, V2_QPC_BYTE_212_RETRY_CNT_M, + V2_QPC_BYTE_212_RETRY_CNT_S, 0); + + roce_set_field(context->byte_212_lsn, V2_QPC_BYTE_212_RETRY_NUM_INIT_M, + V2_QPC_BYTE_212_RETRY_NUM_INIT_S, attr->retry_cnt); + roce_set_field(qpc_mask->byte_212_lsn, V2_QPC_BYTE_212_RETRY_NUM_INIT_M, + V2_QPC_BYTE_212_RETRY_NUM_INIT_S, 0); + + roce_set_field(context->byte_244_rnr_rxack, + V2_QPC_BYTE_244_RNR_NUM_INIT_M, + V2_QPC_BYTE_244_RNR_NUM_INIT_S, attr->rnr_retry); + roce_set_field(qpc_mask->byte_244_rnr_rxack, + V2_QPC_BYTE_244_RNR_NUM_INIT_M, + V2_QPC_BYTE_244_RNR_NUM_INIT_S, 0); + + roce_set_field(context->byte_244_rnr_rxack, V2_QPC_BYTE_244_RNR_CNT_M, + V2_QPC_BYTE_244_RNR_CNT_S, attr->rnr_retry); + roce_set_field(qpc_mask->byte_244_rnr_rxack, V2_QPC_BYTE_244_RNR_CNT_M, + V2_QPC_BYTE_244_RNR_CNT_S, 0); + + roce_set_field(context->byte_212_lsn, V2_QPC_BYTE_212_LSN_M, + V2_QPC_BYTE_212_LSN_S, 0x100); + roce_set_field(qpc_mask->byte_212_lsn, V2_QPC_BYTE_212_LSN_M, + V2_QPC_BYTE_212_LSN_S, 0); + + if (attr->timeout < 0xf) + roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_AT_M, + V2_QPC_BYTE_28_AT_S, 0xf); + else + roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_AT_M, + V2_QPC_BYTE_28_AT_S, attr->timeout); + + roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_AT_M, + V2_QPC_BYTE_28_AT_S, 0); + + roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M, + V2_QPC_BYTE_28_SL_S, + rdma_ah_get_sl(&attr->ah_attr)); + roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_SL_M, + V2_QPC_BYTE_28_SL_S, 0); + hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); + + roce_set_field(context->byte_172_sq_psn, V2_QPC_BYTE_172_SQ_CUR_PSN_M, + V2_QPC_BYTE_172_SQ_CUR_PSN_S, attr->sq_psn); + roce_set_field(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_SQ_CUR_PSN_M, + V2_QPC_BYTE_172_SQ_CUR_PSN_S, 0); + + roce_set_field(qpc_mask->byte_196_sq_psn, V2_QPC_BYTE_196_IRRL_HEAD_M, + V2_QPC_BYTE_196_IRRL_HEAD_S, 0); + roce_set_field(context->byte_196_sq_psn, V2_QPC_BYTE_196_SQ_MAX_PSN_M, + V2_QPC_BYTE_196_SQ_MAX_PSN_S, attr->sq_psn); + roce_set_field(qpc_mask->byte_196_sq_psn, V2_QPC_BYTE_196_SQ_MAX_PSN_M, + V2_QPC_BYTE_196_SQ_MAX_PSN_S, 0); + + return 0; +} + +static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, + const struct ib_qp_attr *attr, + int attr_mask, enum ib_qp_state cur_state, + enum ib_qp_state new_state) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + struct hns_roce_v2_qp_context *context; + struct hns_roce_v2_qp_context *qpc_mask; + struct device *dev = hr_dev->dev; + int ret = -EINVAL; + + context = kzalloc(2 * sizeof(*context), GFP_KERNEL); + if (!context) + return -ENOMEM; + + qpc_mask = context + 1; + /* + * In v2 engine, software pass context and context mask to hardware + * when modifying qp. If software need modify some fields in context, + * we should set all bits of the relevant fields in context mask to + * 0 at the same time, else set them to 0x1. + */ + memset(qpc_mask, 0xff, sizeof(*qpc_mask)); + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { + modify_qp_reset_to_init(ibqp, attr, context, qpc_mask); + } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { + modify_qp_init_to_init(ibqp, attr, attr_mask, context, + qpc_mask); + } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { + ret = modify_qp_init_to_rtr(ibqp, attr, attr_mask, context, + qpc_mask); + if (ret) + goto out; + } else if (cur_state == IB_QPS_RTR && new_state == IB_QPS_RTS) { + ret = modify_qp_rtr_to_rts(ibqp, attr, attr_mask, context, + qpc_mask); + if (ret) + goto out; + } else if ((cur_state == IB_QPS_RTS && new_state == IB_QPS_RTS) || + (cur_state == IB_QPS_SQE && new_state == IB_QPS_RTS) || + (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD) || + (cur_state == IB_QPS_SQD && new_state == IB_QPS_SQD) || + (cur_state == IB_QPS_SQD && new_state == IB_QPS_RTS) || + (cur_state == IB_QPS_INIT && new_state == IB_QPS_RESET) || + (cur_state == IB_QPS_RTR && new_state == IB_QPS_RESET) || + (cur_state == IB_QPS_RTS && new_state == IB_QPS_RESET) || + (cur_state == IB_QPS_ERR && new_state == IB_QPS_RESET) || + (cur_state == IB_QPS_INIT && new_state == IB_QPS_ERR) || + (cur_state == IB_QPS_RTR && new_state == IB_QPS_ERR) || + (cur_state == IB_QPS_RTS && new_state == IB_QPS_ERR) || + (cur_state == IB_QPS_SQD && new_state == IB_QPS_ERR) || + (cur_state == IB_QPS_SQE && new_state == IB_QPS_ERR)) { + /* Nothing */ + ; + } else { + dev_err(dev, "Illegal state for QP!\n"); + goto out; + } + + /* Every status migrate must change state */ + roce_set_field(context->byte_60_qpst_mapid, V2_QPC_BYTE_60_QP_ST_M, + V2_QPC_BYTE_60_QP_ST_S, new_state); + roce_set_field(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_QP_ST_M, + V2_QPC_BYTE_60_QP_ST_S, 0); + + /* SW pass context to HW */ + ret = hns_roce_v2_qp_modify(hr_dev, &hr_qp->mtt, cur_state, new_state, + context, hr_qp); + if (ret) { + dev_err(dev, "hns_roce_qp_modify failed(%d)\n", ret); + goto out; + } + + hr_qp->state = new_state; + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + hr_qp->resp_depth = attr->max_dest_rd_atomic; + if (attr_mask & IB_QP_PORT) { + hr_qp->port = attr->port_num - 1; + hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port]; + } + + if (new_state == IB_QPS_RESET && !ibqp->uobject) { + hns_roce_v2_cq_clean(to_hr_cq(ibqp->recv_cq), hr_qp->qpn, + ibqp->srq ? to_hr_srq(ibqp->srq) : NULL); + if (ibqp->send_cq != ibqp->recv_cq) + hns_roce_v2_cq_clean(to_hr_cq(ibqp->send_cq), + hr_qp->qpn, NULL); + + hr_qp->rq.head = 0; + hr_qp->rq.tail = 0; + hr_qp->sq.head = 0; + hr_qp->sq.tail = 0; + hr_qp->sq_next_wqe = 0; + hr_qp->next_sge = 0; + } + +out: + kfree(context); + return ret; +} + +static inline enum ib_qp_state to_ib_qp_st(enum hns_roce_v2_qp_state state) +{ + switch (state) { + case HNS_ROCE_QP_ST_RST: return IB_QPS_RESET; + case HNS_ROCE_QP_ST_INIT: return IB_QPS_INIT; + case HNS_ROCE_QP_ST_RTR: return IB_QPS_RTR; + case HNS_ROCE_QP_ST_RTS: return IB_QPS_RTS; + case HNS_ROCE_QP_ST_SQ_DRAINING: + case HNS_ROCE_QP_ST_SQD: return IB_QPS_SQD; + case HNS_ROCE_QP_ST_SQER: return IB_QPS_SQE; + case HNS_ROCE_QP_ST_ERR: return IB_QPS_ERR; + default: return -1; + } +} + +static int hns_roce_v2_query_qpc(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + struct hns_roce_v2_qp_context *hr_context) +{ + struct hns_roce_cmd_mailbox *mailbox; + int ret; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, hr_qp->qpn, 0, + HNS_ROCE_CMD_QUERY_QPC, + HNS_ROCE_CMD_TIMEOUT_MSECS); + if (ret) { + dev_err(hr_dev->dev, "QUERY QP cmd process error\n"); + goto out; + } + + memcpy(hr_context, mailbox->buf, sizeof(*hr_context)); + +out: + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + return ret; +} + +static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + struct hns_roce_v2_qp_context *context; + struct device *dev = hr_dev->dev; + int tmp_qp_state; + int state; + int ret; + + context = kzalloc(sizeof(*context), GFP_KERNEL); + if (!context) + return -ENOMEM; + + memset(qp_attr, 0, sizeof(*qp_attr)); + memset(qp_init_attr, 0, sizeof(*qp_init_attr)); + + mutex_lock(&hr_qp->mutex); + + if (hr_qp->state == IB_QPS_RESET) { + qp_attr->qp_state = IB_QPS_RESET; + goto done; + } + + ret = hns_roce_v2_query_qpc(hr_dev, hr_qp, context); + if (ret) { + dev_err(dev, "query qpc error\n"); + ret = -EINVAL; + goto out; + } + + state = roce_get_field(context->byte_60_qpst_mapid, + V2_QPC_BYTE_60_QP_ST_M, V2_QPC_BYTE_60_QP_ST_S); + tmp_qp_state = to_ib_qp_st((enum hns_roce_v2_qp_state)state); + if (tmp_qp_state == -1) { + dev_err(dev, "Illegal ib_qp_state\n"); + ret = -EINVAL; + goto out; + } + hr_qp->state = (u8)tmp_qp_state; + qp_attr->qp_state = (enum ib_qp_state)hr_qp->state; + qp_attr->path_mtu = (enum ib_mtu)roce_get_field(context->byte_24_mtu_tc, + V2_QPC_BYTE_24_MTU_M, + V2_QPC_BYTE_24_MTU_S); + qp_attr->path_mig_state = IB_MIG_ARMED; + if (hr_qp->ibqp.qp_type == IB_QPT_UD) + qp_attr->qkey = V2_QKEY_VAL; + + qp_attr->rq_psn = roce_get_field(context->byte_108_rx_reqepsn, + V2_QPC_BYTE_108_RX_REQ_EPSN_M, + V2_QPC_BYTE_108_RX_REQ_EPSN_S); + qp_attr->sq_psn = (u32)roce_get_field(context->byte_172_sq_psn, + V2_QPC_BYTE_172_SQ_CUR_PSN_M, + V2_QPC_BYTE_172_SQ_CUR_PSN_S); + qp_attr->dest_qp_num = (u8)roce_get_field(context->byte_56_dqpn_err, + V2_QPC_BYTE_56_DQPN_M, + V2_QPC_BYTE_56_DQPN_S); + qp_attr->qp_access_flags = ((roce_get_bit(context->byte_76_srqn_op_en, + V2_QPC_BYTE_76_RRE_S)) << 2) | + ((roce_get_bit(context->byte_76_srqn_op_en, + V2_QPC_BYTE_76_RWE_S)) << 1) | + ((roce_get_bit(context->byte_76_srqn_op_en, + V2_QPC_BYTE_76_ATE_S)) << 3); + if (hr_qp->ibqp.qp_type == IB_QPT_RC || + hr_qp->ibqp.qp_type == IB_QPT_UC) { + struct ib_global_route *grh = + rdma_ah_retrieve_grh(&qp_attr->ah_attr); + + rdma_ah_set_sl(&qp_attr->ah_attr, + roce_get_field(context->byte_28_at_fl, + V2_QPC_BYTE_28_SL_M, + V2_QPC_BYTE_28_SL_S)); + grh->flow_label = roce_get_field(context->byte_28_at_fl, + V2_QPC_BYTE_28_FL_M, + V2_QPC_BYTE_28_FL_S); + grh->sgid_index = roce_get_field(context->byte_20_smac_sgid_idx, + V2_QPC_BYTE_20_SGID_IDX_M, + V2_QPC_BYTE_20_SGID_IDX_S); + grh->hop_limit = roce_get_field(context->byte_24_mtu_tc, + V2_QPC_BYTE_24_HOP_LIMIT_M, + V2_QPC_BYTE_24_HOP_LIMIT_S); + grh->traffic_class = roce_get_field(context->byte_24_mtu_tc, + V2_QPC_BYTE_24_TC_M, + V2_QPC_BYTE_24_TC_S); + + memcpy(grh->dgid.raw, context->dgid, sizeof(grh->dgid.raw)); + } + + qp_attr->port_num = hr_qp->port + 1; + qp_attr->sq_draining = 0; + qp_attr->max_rd_atomic = 1 << roce_get_field(context->byte_208_irrl, + V2_QPC_BYTE_208_SR_MAX_M, + V2_QPC_BYTE_208_SR_MAX_S); + qp_attr->max_dest_rd_atomic = 1 << roce_get_field(context->byte_140_raq, + V2_QPC_BYTE_140_RR_MAX_M, + V2_QPC_BYTE_140_RR_MAX_S); + qp_attr->min_rnr_timer = (u8)roce_get_field(context->byte_80_rnr_rx_cqn, + V2_QPC_BYTE_80_MIN_RNR_TIME_M, + V2_QPC_BYTE_80_MIN_RNR_TIME_S); + qp_attr->timeout = (u8)roce_get_field(context->byte_28_at_fl, + V2_QPC_BYTE_28_AT_M, + V2_QPC_BYTE_28_AT_S); + qp_attr->retry_cnt = roce_get_field(context->byte_212_lsn, + V2_QPC_BYTE_212_RETRY_CNT_M, + V2_QPC_BYTE_212_RETRY_CNT_S); + qp_attr->rnr_retry = context->rq_rnr_timer; + +done: + qp_attr->cur_qp_state = qp_attr->qp_state; + qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt; + qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs; + + if (!ibqp->uobject) { + qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt; + qp_attr->cap.max_send_sge = hr_qp->sq.max_gs; + } else { + qp_attr->cap.max_send_wr = 0; + qp_attr->cap.max_send_sge = 0; + } + + qp_init_attr->cap = qp_attr->cap; + +out: + mutex_unlock(&hr_qp->mutex); + kfree(context); + return ret; +} + +static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + int is_user) +{ + struct hns_roce_cq *send_cq, *recv_cq; + struct device *dev = hr_dev->dev; + int ret; + + if (hr_qp->ibqp.qp_type == IB_QPT_RC && hr_qp->state != IB_QPS_RESET) { + /* Modify qp to reset before destroying qp */ + ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0, + hr_qp->state, IB_QPS_RESET); + if (ret) { + dev_err(dev, "modify QP %06lx to ERR failed.\n", + hr_qp->qpn); + return ret; + } + } + + send_cq = to_hr_cq(hr_qp->ibqp.send_cq); + recv_cq = to_hr_cq(hr_qp->ibqp.recv_cq); + + hns_roce_lock_cqs(send_cq, recv_cq); + + if (!is_user) { + __hns_roce_v2_cq_clean(recv_cq, hr_qp->qpn, hr_qp->ibqp.srq ? + to_hr_srq(hr_qp->ibqp.srq) : NULL); + if (send_cq != recv_cq) + __hns_roce_v2_cq_clean(send_cq, hr_qp->qpn, NULL); + } + + hns_roce_qp_remove(hr_dev, hr_qp); + + hns_roce_unlock_cqs(send_cq, recv_cq); + + hns_roce_qp_free(hr_dev, hr_qp); + + /* Not special_QP, free their QPN */ + if ((hr_qp->ibqp.qp_type == IB_QPT_RC) || + (hr_qp->ibqp.qp_type == IB_QPT_UC) || + (hr_qp->ibqp.qp_type == IB_QPT_UD)) + hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1); + + hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt); + + if (is_user) { + ib_umem_release(hr_qp->umem); + } else { + kfree(hr_qp->sq.wrid); + kfree(hr_qp->rq.wrid); + hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); + } + + return 0; +} + +static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + int ret; + + ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, !!ibqp->pd->uobject); + if (ret) { + dev_err(hr_dev->dev, "Destroy qp failed(%d)\n", ret); + return ret; + } + + if (hr_qp->ibqp.qp_type == IB_QPT_GSI) + kfree(hr_to_hr_sqp(hr_qp)); + else + kfree(hr_qp); + + return 0; +} + static const struct hns_roce_hw hns_roce_hw_v2 = { .cmq_init = hns_roce_v2_cmq_init, .cmq_exit = hns_roce_v2_cmq_exit, @@ -1282,6 +2585,9 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { .write_cqc = hns_roce_v2_write_cqc, .set_hem = hns_roce_v2_set_hem, .clear_hem = hns_roce_v2_clear_hem, + .modify_qp = hns_roce_v2_modify_qp, + .query_qp = hns_roce_v2_query_qp, + .destroy_qp = hns_roce_v2_destroy_qp, .req_notify_cq = hns_roce_v2_req_notify_cq, .poll_cq = hns_roce_v2_poll_cq, }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 8b69875534c0..0360df04467a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -104,6 +104,9 @@ #define V2_CQ_DB_REQ_NOT 1 #define V2_CQ_STATE_VALID 1 +#define V2_QKEY_VAL 0x80010000 + +#define GID_LEN_V2 16 #define HNS_ROCE_V2_CQE_QPN_MASK 0x3ffff @@ -262,6 +265,453 @@ struct hns_roce_v2_cq_context { #define V2_CQC_BYTE_64_SE_CQE_IDX_S 0 #define V2_CQC_BYTE_64_SE_CQE_IDX_M GENMASK(23, 0) +enum{ + V2_MPT_ST_VALID = 0x1, +}; + +enum hns_roce_v2_qp_state { + HNS_ROCE_QP_ST_RST, + HNS_ROCE_QP_ST_INIT, + HNS_ROCE_QP_ST_RTR, + HNS_ROCE_QP_ST_RTS, + HNS_ROCE_QP_ST_SQER, + HNS_ROCE_QP_ST_SQD, + HNS_ROCE_QP_ST_ERR, + HNS_ROCE_QP_ST_SQ_DRAINING, + HNS_ROCE_QP_NUM_ST +}; + +struct hns_roce_v2_qp_context { + u32 byte_4_sqpn_tst; + u32 wqe_sge_ba; + u32 byte_12_sq_hop; + u32 byte_16_buf_ba_pg_sz; + u32 byte_20_smac_sgid_idx; + u32 byte_24_mtu_tc; + u32 byte_28_at_fl; + u8 dgid[GID_LEN_V2]; + u32 dmac; + u32 byte_52_udpspn_dmac; + u32 byte_56_dqpn_err; + u32 byte_60_qpst_mapid; + u32 qkey_xrcd; + u32 byte_68_rq_db; + u32 rq_db_record_addr; + u32 byte_76_srqn_op_en; + u32 byte_80_rnr_rx_cqn; + u32 byte_84_rq_ci_pi; + u32 rq_cur_blk_addr; + u32 byte_92_srq_info; + u32 byte_96_rx_reqmsn; + u32 rq_nxt_blk_addr; + u32 byte_104_rq_sge; + u32 byte_108_rx_reqepsn; + u32 rq_rnr_timer; + u32 rx_msg_len; + u32 rx_rkey_pkt_info; + u64 rx_va; + u32 byte_132_trrl; + u32 trrl_ba; + u32 byte_140_raq; + u32 byte_144_raq; + u32 byte_148_raq; + u32 byte_152_raq; + u32 byte_156_raq; + u32 byte_160_sq_ci_pi; + u32 sq_cur_blk_addr; + u32 byte_168_irrl_idx; + u32 byte_172_sq_psn; + u32 byte_176_msg_pktn; + u32 sq_cur_sqe_blk_addr; + u32 byte_184_irrl_idx; + u32 cur_sge_offset; + u32 byte_192_ext_sge; + u32 byte_196_sq_psn; + u32 byte_200_sq_max; + u32 irrl_ba; + u32 byte_208_irrl; + u32 byte_212_lsn; + u32 sq_timer; + u32 byte_220_retry_psn_msn; + u32 byte_224_retry_msg; + u32 rx_sq_cur_blk_addr; + u32 byte_232_irrl_sge; + u32 irrl_cur_sge_offset; + u32 byte_240_irrl_tail; + u32 byte_244_rnr_rxack; + u32 byte_248_ack_psn; + u32 byte_252_err_txcqn; + u32 byte_256_sqflush_rqcqe; +}; + +#define V2_QPC_BYTE_4_TST_S 0 +#define V2_QPC_BYTE_4_TST_M GENMASK(2, 0) + +#define V2_QPC_BYTE_4_SGE_SHIFT_S 3 +#define V2_QPC_BYTE_4_SGE_SHIFT_M GENMASK(7, 3) + +#define V2_QPC_BYTE_4_SQPN_S 8 +#define V2_QPC_BYTE_4_SQPN_M GENMASK(31, 8) + +#define V2_QPC_BYTE_12_WQE_SGE_BA_S 0 +#define V2_QPC_BYTE_12_WQE_SGE_BA_M GENMASK(28, 0) + +#define V2_QPC_BYTE_12_SQ_HOP_NUM_S 29 +#define V2_QPC_BYTE_12_SQ_HOP_NUM_M GENMASK(30, 29) + +#define V2_QPC_BYTE_12_RSVD_LKEY_EN_S 31 + +#define V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_S 0 +#define V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_M GENMASK(3, 0) + +#define V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_S 4 +#define V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_M GENMASK(7, 4) + +#define V2_QPC_BYTE_16_PD_S 8 +#define V2_QPC_BYTE_16_PD_M GENMASK(31, 8) + +#define V2_QPC_BYTE_20_RQ_HOP_NUM_S 0 +#define V2_QPC_BYTE_20_RQ_HOP_NUM_M GENMASK(1, 0) + +#define V2_QPC_BYTE_20_SGE_HOP_NUM_S 2 +#define V2_QPC_BYTE_20_SGE_HOP_NUM_M GENMASK(3, 2) + +#define V2_QPC_BYTE_20_RQWS_S 4 +#define V2_QPC_BYTE_20_RQWS_M GENMASK(7, 4) + +#define V2_QPC_BYTE_20_SQ_SHIFT_S 8 +#define V2_QPC_BYTE_20_SQ_SHIFT_M GENMASK(11, 8) + +#define V2_QPC_BYTE_20_RQ_SHIFT_S 12 +#define V2_QPC_BYTE_20_RQ_SHIFT_M GENMASK(15, 12) + +#define V2_QPC_BYTE_20_SGID_IDX_S 16 +#define V2_QPC_BYTE_20_SGID_IDX_M GENMASK(23, 16) + +#define V2_QPC_BYTE_20_SMAC_IDX_S 24 +#define V2_QPC_BYTE_20_SMAC_IDX_M GENMASK(31, 24) + +#define V2_QPC_BYTE_24_HOP_LIMIT_S 0 +#define V2_QPC_BYTE_24_HOP_LIMIT_M GENMASK(7, 0) + +#define V2_QPC_BYTE_24_TC_S 8 +#define V2_QPC_BYTE_24_TC_M GENMASK(15, 8) + +#define V2_QPC_BYTE_24_VLAN_IDX_S 16 +#define V2_QPC_BYTE_24_VLAN_IDX_M GENMASK(27, 16) + +#define V2_QPC_BYTE_24_MTU_S 28 +#define V2_QPC_BYTE_24_MTU_M GENMASK(31, 28) + +#define V2_QPC_BYTE_28_FL_S 0 +#define V2_QPC_BYTE_28_FL_M GENMASK(19, 0) + +#define V2_QPC_BYTE_28_SL_S 20 +#define V2_QPC_BYTE_28_SL_M GENMASK(23, 20) + +#define V2_QPC_BYTE_28_CNP_TX_FLAG_S 24 + +#define V2_QPC_BYTE_28_CE_FLAG_S 25 + +#define V2_QPC_BYTE_28_LBI_S 26 + +#define V2_QPC_BYTE_28_AT_S 27 +#define V2_QPC_BYTE_28_AT_M GENMASK(31, 27) + +#define V2_QPC_BYTE_52_DMAC_S 0 +#define V2_QPC_BYTE_52_DMAC_M GENMASK(15, 0) + +#define V2_QPC_BYTE_52_UDPSPN_S 16 +#define V2_QPC_BYTE_52_UDPSPN_M GENMASK(31, 16) + +#define V2_QPC_BYTE_56_DQPN_S 0 +#define V2_QPC_BYTE_56_DQPN_M GENMASK(23, 0) + +#define V2_QPC_BYTE_56_SQ_TX_ERR_S 24 +#define V2_QPC_BYTE_56_SQ_RX_ERR_S 25 +#define V2_QPC_BYTE_56_RQ_TX_ERR_S 26 +#define V2_QPC_BYTE_56_RQ_RX_ERR_S 27 + +#define V2_QPC_BYTE_56_LP_PKTN_INI_S 28 +#define V2_QPC_BYTE_56_LP_PKTN_INI_M GENMASK(31, 28) + +#define V2_QPC_BYTE_60_MAPID_S 0 +#define V2_QPC_BYTE_60_MAPID_M GENMASK(12, 0) + +#define V2_QPC_BYTE_60_INNER_MAP_IND_S 13 + +#define V2_QPC_BYTE_60_SQ_MAP_IND_S 14 + +#define V2_QPC_BYTE_60_RQ_MAP_IND_S 15 + +#define V2_QPC_BYTE_60_TEMPID_S 16 +#define V2_QPC_BYTE_60_TEMPID_M GENMASK(22, 16) + +#define V2_QPC_BYTE_60_EXT_MAP_IND_S 23 + +#define V2_QPC_BYTE_60_RTY_NUM_INI_BAK_S 24 +#define V2_QPC_BYTE_60_RTY_NUM_INI_BAK_M GENMASK(26, 24) + +#define V2_QPC_BYTE_60_SQ_RLS_IND_S 27 + +#define V2_QPC_BYTE_60_SQ_EXT_IND_S 28 + +#define V2_QPC_BYTE_60_QP_ST_S 29 +#define V2_QPC_BYTE_60_QP_ST_M GENMASK(31, 29) + +#define V2_QPC_BYTE_68_RQ_RECORD_EN_S 0 + +#define V2_QPC_BYTE_68_RQ_DB_RECORD_ADDR_S 1 +#define V2_QPC_BYTE_68_RQ_DB_RECORD_ADDR_M GENMASK(31, 1) + +#define V2_QPC_BYTE_76_SRQN_S 0 +#define V2_QPC_BYTE_76_SRQN_M GENMASK(23, 0) + +#define V2_QPC_BYTE_76_SRQ_EN_S 24 + +#define V2_QPC_BYTE_76_RRE_S 25 + +#define V2_QPC_BYTE_76_RWE_S 26 + +#define V2_QPC_BYTE_76_ATE_S 27 + +#define V2_QPC_BYTE_76_RQIE_S 28 + +#define V2_QPC_BYTE_80_RX_CQN_S 0 +#define V2_QPC_BYTE_80_RX_CQN_M GENMASK(23, 0) + +#define V2_QPC_BYTE_80_MIN_RNR_TIME_S 27 +#define V2_QPC_BYTE_80_MIN_RNR_TIME_M GENMASK(31, 27) + +#define V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S 0 +#define V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M GENMASK(15, 0) + +#define V2_QPC_BYTE_84_RQ_CONSUMER_IDX_S 16 +#define V2_QPC_BYTE_84_RQ_CONSUMER_IDX_M GENMASK(31, 16) + +#define V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_S 0 +#define V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_M GENMASK(19, 0) + +#define V2_QPC_BYTE_92_SRQ_INFO_S 20 +#define V2_QPC_BYTE_92_SRQ_INFO_M GENMASK(31, 20) + +#define V2_QPC_BYTE_96_RX_REQ_MSN_S 0 +#define V2_QPC_BYTE_96_RX_REQ_MSN_M GENMASK(23, 0) + +#define V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_S 0 +#define V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_M GENMASK(19, 0) + +#define V2_QPC_BYTE_104_RQ_CUR_WQE_SGE_NUM_S 24 +#define V2_QPC_BYTE_104_RQ_CUR_WQE_SGE_NUM_M GENMASK(31, 24) + +#define V2_QPC_BYTE_108_INV_CREDIT_S 0 + +#define V2_QPC_BYTE_108_RX_REQ_PSN_ERR_S 3 + +#define V2_QPC_BYTE_108_RX_REQ_LAST_OPTYPE_S 4 +#define V2_QPC_BYTE_108_RX_REQ_LAST_OPTYPE_M GENMASK(6, 4) + +#define V2_QPC_BYTE_108_RX_REQ_RNR_S 7 + +#define V2_QPC_BYTE_108_RX_REQ_EPSN_S 8 +#define V2_QPC_BYTE_108_RX_REQ_EPSN_M GENMASK(31, 8) + +#define V2_QPC_BYTE_132_TRRL_HEAD_MAX_S 0 +#define V2_QPC_BYTE_132_TRRL_HEAD_MAX_M GENMASK(7, 0) + +#define V2_QPC_BYTE_132_TRRL_TAIL_MAX_S 8 +#define V2_QPC_BYTE_132_TRRL_TAIL_MAX_M GENMASK(15, 8) + +#define V2_QPC_BYTE_132_TRRL_BA_S 16 +#define V2_QPC_BYTE_132_TRRL_BA_M GENMASK(31, 16) + +#define V2_QPC_BYTE_140_TRRL_BA_S 0 +#define V2_QPC_BYTE_140_TRRL_BA_M GENMASK(11, 0) + +#define V2_QPC_BYTE_140_RR_MAX_S 12 +#define V2_QPC_BYTE_140_RR_MAX_M GENMASK(14, 12) + +#define V2_QPC_BYTE_140_RSVD_RAQ_MAP_S 15 + +#define V2_QPC_BYTE_140_RAQ_TRRL_HEAD_S 16 +#define V2_QPC_BYTE_140_RAQ_TRRL_HEAD_M GENMASK(23, 16) + +#define V2_QPC_BYTE_140_RAQ_TRRL_TAIL_S 24 +#define V2_QPC_BYTE_140_RAQ_TRRL_TAIL_M GENMASK(31, 24) + +#define V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_S 0 +#define V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_M GENMASK(23, 0) + +#define V2_QPC_BYTE_144_RAQ_RTY_INI_IND_S 24 + +#define V2_QPC_BYTE_144_RAQ_CREDIT_S 25 +#define V2_QPC_BYTE_144_RAQ_CREDIT_M GENMASK(29, 25) + +#define V2_QPC_BYTE_144_RESP_RTY_FLG_S 31 + +#define V2_QPC_BYTE_148_RQ_MSN_S 0 +#define V2_QPC_BYTE_148_RQ_MSN_M GENMASK(23, 0) + +#define V2_QPC_BYTE_148_RAQ_SYNDROME_S 24 +#define V2_QPC_BYTE_148_RAQ_SYNDROME_M GENMASK(31, 24) + +#define V2_QPC_BYTE_152_RAQ_PSN_S 8 +#define V2_QPC_BYTE_152_RAQ_PSN_M GENMASK(31, 8) + +#define V2_QPC_BYTE_152_RAQ_TRRL_RTY_HEAD_S 24 +#define V2_QPC_BYTE_152_RAQ_TRRL_RTY_HEAD_M GENMASK(31, 24) + +#define V2_QPC_BYTE_156_RAQ_USE_PKTN_S 0 +#define V2_QPC_BYTE_156_RAQ_USE_PKTN_M GENMASK(23, 0) + +#define V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S 0 +#define V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M GENMASK(15, 0) + +#define V2_QPC_BYTE_160_SQ_CONSUMER_IDX_S 16 +#define V2_QPC_BYTE_160_SQ_CONSUMER_IDX_M GENMASK(31, 16) + +#define V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_S 0 +#define V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M GENMASK(19, 0) + +#define V2_QPC_BYTE_168_MSG_RTY_LP_FLG_S 20 + +#define V2_QPC_BYTE_168_LP_SGEN_INI_S 21 +#define V2_QPC_BYTE_168_LP_SGEN_INI_M GENMASK(23, 21) + +#define V2_QPC_BYTE_168_SQ_SHIFT_BAK_S 24 +#define V2_QPC_BYTE_168_SQ_SHIFT_BAK_M GENMASK(27, 24) + +#define V2_QPC_BYTE_168_IRRL_IDX_LSB_S 28 +#define V2_QPC_BYTE_168_IRRL_IDX_LSB_M GENMASK(31, 28) + +#define V2_QPC_BYTE_172_ACK_REQ_FREQ_S 0 +#define V2_QPC_BYTE_172_ACK_REQ_FREQ_M GENMASK(5, 0) + +#define V2_QPC_BYTE_172_MSG_RNR_FLG_S 6 + +#define V2_QPC_BYTE_172_FRE_S 7 + +#define V2_QPC_BYTE_172_SQ_CUR_PSN_S 8 +#define V2_QPC_BYTE_172_SQ_CUR_PSN_M GENMASK(31, 8) + +#define V2_QPC_BYTE_176_MSG_USE_PKTN_S 0 +#define V2_QPC_BYTE_176_MSG_USE_PKTN_M GENMASK(23, 0) + +#define V2_QPC_BYTE_176_IRRL_HEAD_PRE_S 24 +#define V2_QPC_BYTE_176_IRRL_HEAD_PRE_M GENMASK(31, 24) + +#define V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_S 0 +#define V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M GENMASK(19, 0) + +#define V2_QPC_BYTE_184_IRRL_IDX_MSB_S 20 +#define V2_QPC_BYTE_184_IRRL_IDX_MSB_M GENMASK(31, 20) + +#define V2_QPC_BYTE_192_CUR_SGE_IDX_S 0 +#define V2_QPC_BYTE_192_CUR_SGE_IDX_M GENMASK(23, 0) + +#define V2_QPC_BYTE_192_EXT_SGE_NUM_LEFT_S 24 +#define V2_QPC_BYTE_192_EXT_SGE_NUM_LEFT_M GENMASK(31, 24) + +#define V2_QPC_BYTE_196_IRRL_HEAD_S 0 +#define V2_QPC_BYTE_196_IRRL_HEAD_M GENMASK(7, 0) + +#define V2_QPC_BYTE_196_SQ_MAX_PSN_S 8 +#define V2_QPC_BYTE_196_SQ_MAX_PSN_M GENMASK(31, 8) + +#define V2_QPC_BYTE_200_SQ_MAX_IDX_S 0 +#define V2_QPC_BYTE_200_SQ_MAX_IDX_M GENMASK(15, 0) + +#define V2_QPC_BYTE_200_LCL_OPERATED_CNT_S 16 +#define V2_QPC_BYTE_200_LCL_OPERATED_CNT_M GENMASK(31, 16) + +#define V2_QPC_BYTE_208_IRRL_BA_S 0 +#define V2_QPC_BYTE_208_IRRL_BA_M GENMASK(25, 0) + +#define V2_QPC_BYTE_208_PKT_RNR_FLG_S 26 + +#define V2_QPC_BYTE_208_PKT_RTY_FLG_S 27 + +#define V2_QPC_BYTE_208_RMT_E2E_S 28 + +#define V2_QPC_BYTE_208_SR_MAX_S 29 +#define V2_QPC_BYTE_208_SR_MAX_M GENMASK(31, 29) + +#define V2_QPC_BYTE_212_LSN_S 0 +#define V2_QPC_BYTE_212_LSN_M GENMASK(23, 0) + +#define V2_QPC_BYTE_212_RETRY_NUM_INIT_S 24 +#define V2_QPC_BYTE_212_RETRY_NUM_INIT_M GENMASK(26, 24) + +#define V2_QPC_BYTE_212_CHECK_FLG_S 27 +#define V2_QPC_BYTE_212_CHECK_FLG_M GENMASK(28, 27) + +#define V2_QPC_BYTE_212_RETRY_CNT_S 29 +#define V2_QPC_BYTE_212_RETRY_CNT_M GENMASK(31, 29) + +#define V2_QPC_BYTE_220_RETRY_MSG_MSN_S 0 +#define V2_QPC_BYTE_220_RETRY_MSG_MSN_M GENMASK(15, 0) + +#define V2_QPC_BYTE_220_RETRY_MSG_PSN_S 16 +#define V2_QPC_BYTE_220_RETRY_MSG_PSN_M GENMASK(31, 16) + +#define V2_QPC_BYTE_224_RETRY_MSG_PSN_S 0 +#define V2_QPC_BYTE_224_RETRY_MSG_PSN_M GENMASK(7, 0) + +#define V2_QPC_BYTE_224_RETRY_MSG_FPKT_PSN_S 8 +#define V2_QPC_BYTE_224_RETRY_MSG_FPKT_PSN_M GENMASK(31, 8) + +#define V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_S 0 +#define V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_M GENMASK(19, 0) + +#define V2_QPC_BYTE_232_IRRL_SGE_IDX_S 20 +#define V2_QPC_BYTE_232_IRRL_SGE_IDX_M GENMASK(28, 20) + +#define V2_QPC_BYTE_240_IRRL_TAIL_REAL_S 0 +#define V2_QPC_BYTE_240_IRRL_TAIL_REAL_M GENMASK(7, 0) + +#define V2_QPC_BYTE_240_IRRL_TAIL_RD_S 8 +#define V2_QPC_BYTE_240_IRRL_TAIL_RD_M GENMASK(15, 8) + +#define V2_QPC_BYTE_240_RX_ACK_MSN_S 16 +#define V2_QPC_BYTE_240_RX_ACK_MSN_M GENMASK(31, 16) + +#define V2_QPC_BYTE_244_RX_ACK_EPSN_S 0 +#define V2_QPC_BYTE_244_RX_ACK_EPSN_M GENMASK(23, 0) + +#define V2_QPC_BYTE_244_RNR_NUM_INIT_S 24 +#define V2_QPC_BYTE_244_RNR_NUM_INIT_M GENMASK(26, 24) + +#define V2_QPC_BYTE_244_RNR_CNT_S 27 +#define V2_QPC_BYTE_244_RNR_CNT_M GENMASK(29, 27) + +#define V2_QPC_BYTE_248_IRRL_PSN_S 0 +#define V2_QPC_BYTE_248_IRRL_PSN_M GENMASK(23, 0) + +#define V2_QPC_BYTE_248_ACK_PSN_ERR_S 24 + +#define V2_QPC_BYTE_248_ACK_LAST_OPTYPE_S 25 +#define V2_QPC_BYTE_248_ACK_LAST_OPTYPE_M GENMASK(26, 25) + +#define V2_QPC_BYTE_248_IRRL_PSN_VLD_S 27 + +#define V2_QPC_BYTE_248_RNR_RETRY_FLAG_S 28 + +#define V2_QPC_BYTE_248_CQ_ERR_IND_S 31 + +#define V2_QPC_BYTE_252_TX_CQN_S 0 +#define V2_QPC_BYTE_252_TX_CQN_M GENMASK(23, 0) + +#define V2_QPC_BYTE_252_SIG_TYPE_S 24 + +#define V2_QPC_BYTE_252_ERR_TYPE_S 25 +#define V2_QPC_BYTE_252_ERR_TYPE_M GENMASK(31, 25) + +#define V2_QPC_BYTE_256_RQ_CQE_IDX_S 0 +#define V2_QPC_BYTE_256_RQ_CQE_IDX_M GENMASK(15, 0) + +#define V2_QPC_BYTE_256_SQ_FLUSH_IDX_S 16 +#define V2_QPC_BYTE_256_SQ_FLUSH_IDX_M GENMASK(31, 16) + struct hns_roce_v2_cqe { u32 byte_4; u32 rkey_immtdata; diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 881ea67e6bac..c04aa81ffac9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -286,20 +286,27 @@ static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev, return -EINVAL; } - /* In v1 engine, parameter verification procession */ - max_cnt = cap->max_recv_wr > HNS_ROCE_MIN_WQE_NUM ? - cap->max_recv_wr : HNS_ROCE_MIN_WQE_NUM; + if (hr_dev->caps.min_wqes) + max_cnt = max(cap->max_recv_wr, hr_dev->caps.min_wqes); + else + max_cnt = cap->max_recv_wr; + hr_qp->rq.wqe_cnt = roundup_pow_of_two(max_cnt); if ((u32)hr_qp->rq.wqe_cnt > hr_dev->caps.max_wqes) { - dev_err(dev, "hns_roce_set_rq_size rq.wqe_cnt too large\n"); + dev_err(dev, "while setting rq size, rq.wqe_cnt too large\n"); return -EINVAL; } max_cnt = max(1U, cap->max_recv_sge); hr_qp->rq.max_gs = roundup_pow_of_two(max_cnt); - /* WQE is fixed for 64B */ - hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz); + if (hr_dev->caps.max_rq_sg <= 2) + hr_qp->rq.wqe_shift = + ilog2(hr_dev->caps.max_rq_desc_sz); + else + hr_qp->rq.wqe_shift = + ilog2(hr_dev->caps.max_rq_desc_sz + * hr_qp->rq.max_gs); } cap->max_recv_wr = hr_qp->rq.max_post = hr_qp->rq.wqe_cnt; @@ -309,11 +316,13 @@ static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev, } static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev, + struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp, struct hns_roce_ib_create_qp *ucmd) { u32 roundup_sq_stride = roundup_pow_of_two(hr_dev->caps.max_sq_desc_sz); u8 max_sq_stride = ilog2(roundup_sq_stride); + u32 max_cnt; /* Sanity check SQ size before proceeding */ if ((u32)(1 << ucmd->log_sq_bb_count) > hr_dev->caps.max_wqes || @@ -323,18 +332,61 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev, return -EINVAL; } + if (cap->max_send_sge > hr_dev->caps.max_sq_sg) { + dev_err(hr_dev->dev, "SQ sge error! max_send_sge=%d\n", + cap->max_send_sge); + return -EINVAL; + } + hr_qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count; hr_qp->sq.wqe_shift = ucmd->log_sq_stride; + max_cnt = max(1U, cap->max_send_sge); + if (hr_dev->caps.max_sq_sg <= 2) + hr_qp->sq.max_gs = roundup_pow_of_two(max_cnt); + else + hr_qp->sq.max_gs = max_cnt; + + if (hr_qp->sq.max_gs > 2) + hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt * + (hr_qp->sq.max_gs - 2)); + hr_qp->sge.sge_shift = 4; + /* Get buf size, SQ and RQ are aligned to page_szie */ - hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt << + if (hr_dev->caps.max_sq_sg <= 2) { + hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), PAGE_SIZE) + - HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt << + HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift), PAGE_SIZE); - hr_qp->sq.offset = 0; - hr_qp->rq.offset = HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt << + hr_qp->sq.offset = 0; + hr_qp->rq.offset = HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift), PAGE_SIZE); + } else { + hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt << + hr_qp->rq.wqe_shift), PAGE_SIZE) + + HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt << + hr_qp->sge.sge_shift), PAGE_SIZE) + + HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt << + hr_qp->sq.wqe_shift), PAGE_SIZE); + + hr_qp->sq.offset = 0; + if (hr_qp->sge.sge_cnt) { + hr_qp->sge.offset = HNS_ROCE_ALOGN_UP( + (hr_qp->sq.wqe_cnt << + hr_qp->sq.wqe_shift), + PAGE_SIZE); + hr_qp->rq.offset = hr_qp->sge.offset + + HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt << + hr_qp->sge.sge_shift), + PAGE_SIZE); + } else { + hr_qp->rq.offset = HNS_ROCE_ALOGN_UP( + (hr_qp->sq.wqe_cnt << + hr_qp->sq.wqe_shift), + PAGE_SIZE); + } + } return 0; } @@ -345,11 +397,12 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev, { struct device *dev = hr_dev->dev; u32 max_cnt; + int size; if (cap->max_send_wr > hr_dev->caps.max_wqes || cap->max_send_sge > hr_dev->caps.max_sq_sg || cap->max_inline_data > hr_dev->caps.max_sq_inline) { - dev_err(dev, "hns_roce_set_kernel_sq_size error1\n"); + dev_err(dev, "SQ WR or sge or inline data error!\n"); return -EINVAL; } @@ -357,27 +410,45 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev, hr_qp->sq_max_wqes_per_wr = 1; hr_qp->sq_spare_wqes = 0; - /* In v1 engine, parameter verification procession */ - max_cnt = cap->max_send_wr > HNS_ROCE_MIN_WQE_NUM ? - cap->max_send_wr : HNS_ROCE_MIN_WQE_NUM; + if (hr_dev->caps.min_wqes) + max_cnt = max(cap->max_send_wr, hr_dev->caps.min_wqes); + else + max_cnt = cap->max_send_wr; + hr_qp->sq.wqe_cnt = roundup_pow_of_two(max_cnt); if ((u32)hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes) { - dev_err(dev, "hns_roce_set_kernel_sq_size sq.wqe_cnt too large\n"); + dev_err(dev, "while setting kernel sq size, sq.wqe_cnt too large\n"); return -EINVAL; } /* Get data_seg numbers */ max_cnt = max(1U, cap->max_send_sge); - hr_qp->sq.max_gs = roundup_pow_of_two(max_cnt); + if (hr_dev->caps.max_sq_sg <= 2) + hr_qp->sq.max_gs = roundup_pow_of_two(max_cnt); + else + hr_qp->sq.max_gs = max_cnt; - /* Get buf size, SQ and RQ are aligned to page_szie */ - hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt << - hr_qp->rq.wqe_shift), PAGE_SIZE) + - HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt << - hr_qp->sq.wqe_shift), PAGE_SIZE); + if (hr_qp->sq.max_gs > 2) { + hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt * + (hr_qp->sq.max_gs - 2)); + hr_qp->sge.sge_shift = 4; + } + + /* Get buf size, SQ and RQ are aligned to PAGE_SIZE */ hr_qp->sq.offset = 0; - hr_qp->rq.offset = HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt << - hr_qp->sq.wqe_shift), PAGE_SIZE); + size = HNS_ROCE_ALOGN_UP(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift, + PAGE_SIZE); + + if (hr_dev->caps.max_sq_sg > 2 && hr_qp->sge.sge_cnt) { + hr_qp->sge.offset = size; + size += HNS_ROCE_ALOGN_UP(hr_qp->sge.sge_cnt << + hr_qp->sge.sge_shift, PAGE_SIZE); + } + + hr_qp->rq.offset = size; + size += HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), + PAGE_SIZE); + hr_qp->buff_size = size; /* Get wr and sge number which send */ cap->max_send_wr = hr_qp->sq.max_post = hr_qp->sq.wqe_cnt; @@ -425,7 +496,8 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, goto err_out; } - ret = hns_roce_set_user_sq_size(hr_dev, hr_qp, &ucmd); + ret = hns_roce_set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, + &ucmd); if (ret) { dev_err(dev, "hns_roce_set_user_sq_size error for create qp\n"); goto err_out; @@ -528,7 +600,9 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, } } - if ((init_attr->qp_type) == IB_QPT_GSI) { + if (init_attr->qp_type == IB_QPT_GSI && + hr_dev->hw_rev == HNS_ROCE_HW_VER1) { + /* In v1 engine, GSI QP context in RoCE engine's register */ ret = hns_roce_gsi_qp_alloc(hr_dev, qpn, hr_qp); if (ret) { dev_err(dev, "hns_roce_qp_alloc failed!\n"); @@ -700,7 +774,10 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port; active_mtu = iboe_get_mtu(hr_dev->iboe.netdevs[p]->mtu); - if (attr->path_mtu > IB_MTU_2048 || + if ((hr_dev->caps.max_mtu == IB_MTU_4096 && + attr->path_mtu > IB_MTU_4096) || + (hr_dev->caps.max_mtu == IB_MTU_2048 && + attr->path_mtu > IB_MTU_2048) || attr->path_mtu < IB_MTU_256 || attr->path_mtu > active_mtu) { dev_err(dev, "attr path_mtu(%d)invalid while modify qp", @@ -724,9 +801,7 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, } if (cur_state == new_state && cur_state == IB_QPS_RESET) { - ret = -EPERM; - dev_err(dev, "cur_state=%d new_state=%d\n", cur_state, - new_state); + ret = 0; goto out; } @@ -804,6 +879,13 @@ void *get_send_wqe(struct hns_roce_qp *hr_qp, int n) } EXPORT_SYMBOL_GPL(get_send_wqe); +void *get_send_extend_sge(struct hns_roce_qp *hr_qp, int n) +{ + return hns_roce_buf_offset(&hr_qp->hr_buf, hr_qp->sge.offset + + (n << hr_qp->sge.sge_shift)); +} +EXPORT_SYMBOL_GPL(get_send_extend_sge); + bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, struct ib_cq *ib_cq) { From 2d40788825acf8abc609740e1f89a4df77dc2f00 Mon Sep 17 00:00:00 2001 From: "Wei Hu(Xavier)" Date: Wed, 30 Aug 2017 17:23:14 +0800 Subject: [PATCH 026/296] RDMA/hns: Add support for processing send wr and receive wr This patch is implementing for posting send request and receiving request for hip08 RoCE driver. such as post send verbs and post recv verbs. Signed-off-by: Lijun Ou Signed-off-by: Shaobo Xu Signed-off-by: Wei Hu (Xavier) Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_cq.c | 4 +- drivers/infiniband/hw/hns/hns_roce_device.h | 2 + drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 4 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 325 ++++++++++++++++++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 72 +++++ drivers/infiniband/hw/hns/hns_roce_qp.c | 5 +- 6 files changed, 406 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index fa17dbc78467..88cdf6f67b69 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -351,8 +351,8 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, } uar = &hr_dev->priv_uar; - hr_cq->cq_db_l = hr_dev->reg_base + ROCEE_DB_OTHERS_L_0_REG + - 0x1000 * uar->index; + hr_cq->cq_db_l = hr_dev->reg_base + hr_dev->odb_offset + + DB_REG_OFFSET * uar->index; } /* Allocate cq index, fill cq_context */ diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 22f20795cef5..b45dba53e3b9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -643,6 +643,8 @@ struct hns_roce_dev { int cmd_mod; int loop_idc; + u32 sdb_offset; + u32 odb_offset; dma_addr_t tptr_dma_addr; /*only for hw v1*/ u32 tptr_size; /*only for hw v1*/ const struct hns_roce_hw *hw; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 8ed053809cea..426f55a030e6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -3221,7 +3221,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, if (ibqp->uobject) { hr_qp->rq.db_reg_l = hr_dev->reg_base + - ROCEE_DB_OTHERS_L_0_REG + + hr_dev->odb_offset + DB_REG_OFFSET * hr_dev->priv_uar.index; } @@ -4084,6 +4084,8 @@ static int hns_roce_get_cfg(struct hns_roce_dev *hr_dev) /* cmd issue mode: 0 is poll, 1 is event */ hr_dev->cmd_mod = 1; hr_dev->loop_idc = 0; + hr_dev->sdb_offset = ROCEE_DB_SQ_L_0_REG; + hr_dev->odb_offset = ROCEE_DB_OTHERS_L_0_REG; /* read the interrupt names from the DT or ACPI */ ret = device_property_read_string_array(dev, "interrupt-names", diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index f2425eecf7d1..b614177f9165 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -43,6 +43,327 @@ #include "hns_roce_hem.h" #include "hns_roce_hw_v2.h" +static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg, + struct ib_sge *sg) +{ + dseg->lkey = cpu_to_le32(sg->lkey); + dseg->addr = cpu_to_le64(sg->addr); + dseg->len = cpu_to_le32(sg->length); +} + +static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_v2_rc_send_wqe *rc_sq_wqe; + struct hns_roce_qp *qp = to_hr_qp(ibqp); + struct hns_roce_v2_wqe_data_seg *dseg; + struct device *dev = hr_dev->dev; + struct hns_roce_v2_db sq_db; + unsigned int sge_ind = 0; + unsigned int wqe_sz = 0; + unsigned long flags; + unsigned int ind; + void *wqe = NULL; + int ret = 0; + int nreq; + int i; + + if (unlikely(ibqp->qp_type != IB_QPT_RC)) { + dev_err(dev, "Not supported QP(0x%x)type!\n", ibqp->qp_type); + *bad_wr = NULL; + return -EOPNOTSUPP; + } + + if (unlikely(qp->state != IB_QPS_RTS && qp->state != IB_QPS_SQD)) { + dev_err(dev, "Post WQE fail, QP state %d err!\n", qp->state); + *bad_wr = wr; + return -EINVAL; + } + + spin_lock_irqsave(&qp->sq.lock, flags); + ind = qp->sq_next_wqe; + sge_ind = qp->next_sge; + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (hns_roce_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { + ret = -ENOMEM; + *bad_wr = wr; + goto out; + } + + if (unlikely(wr->num_sge > qp->sq.max_gs)) { + dev_err(dev, "num_sge=%d > qp->sq.max_gs=%d\n", + wr->num_sge, qp->sq.max_gs); + ret = -EINVAL; + *bad_wr = wr; + goto out; + } + + wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); + qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] = + wr->wr_id; + + + rc_sq_wqe = wqe; + memset(rc_sq_wqe, 0, sizeof(*rc_sq_wqe)); + for (i = 0; i < wr->num_sge; i++) + rc_sq_wqe->msg_len += wr->sg_list[i].length; + + rc_sq_wqe->inv_key_immtdata = send_ieth(wr); + + switch (wr->opcode) { + case IB_WR_RDMA_READ: + roce_set_field(rc_sq_wqe->byte_4, + V2_RC_SEND_WQE_BYTE_4_OPCODE_M, + V2_RC_SEND_WQE_BYTE_4_OPCODE_S, + HNS_ROCE_V2_WQE_OP_RDMA_READ); + rc_sq_wqe->rkey = cpu_to_le32(rdma_wr(wr)->rkey); + rc_sq_wqe->va = cpu_to_le64(rdma_wr(wr)->remote_addr); + break; + case IB_WR_RDMA_WRITE: + roce_set_field(rc_sq_wqe->byte_4, + V2_RC_SEND_WQE_BYTE_4_OPCODE_M, + V2_RC_SEND_WQE_BYTE_4_OPCODE_S, + HNS_ROCE_V2_WQE_OP_RDMA_WRITE); + rc_sq_wqe->rkey = cpu_to_le32(rdma_wr(wr)->rkey); + rc_sq_wqe->va = cpu_to_le64(rdma_wr(wr)->remote_addr); + break; + case IB_WR_RDMA_WRITE_WITH_IMM: + roce_set_field(rc_sq_wqe->byte_4, + V2_RC_SEND_WQE_BYTE_4_OPCODE_M, + V2_RC_SEND_WQE_BYTE_4_OPCODE_S, + HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM); + rc_sq_wqe->rkey = cpu_to_le32(rdma_wr(wr)->rkey); + rc_sq_wqe->va = cpu_to_le64(rdma_wr(wr)->remote_addr); + break; + case IB_WR_SEND: + roce_set_field(rc_sq_wqe->byte_4, + V2_RC_SEND_WQE_BYTE_4_OPCODE_M, + V2_RC_SEND_WQE_BYTE_4_OPCODE_S, + HNS_ROCE_V2_WQE_OP_SEND); + break; + case IB_WR_SEND_WITH_INV: + roce_set_field(rc_sq_wqe->byte_4, + V2_RC_SEND_WQE_BYTE_4_OPCODE_M, + V2_RC_SEND_WQE_BYTE_4_OPCODE_S, + HNS_ROCE_V2_WQE_OP_SEND_WITH_INV); + break; + case IB_WR_SEND_WITH_IMM: + roce_set_field(rc_sq_wqe->byte_4, + V2_RC_SEND_WQE_BYTE_4_OPCODE_M, + V2_RC_SEND_WQE_BYTE_4_OPCODE_S, + HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM); + break; + case IB_WR_LOCAL_INV: + roce_set_field(rc_sq_wqe->byte_4, + V2_RC_SEND_WQE_BYTE_4_OPCODE_M, + V2_RC_SEND_WQE_BYTE_4_OPCODE_S, + HNS_ROCE_V2_WQE_OP_LOCAL_INV); + break; + case IB_WR_ATOMIC_CMP_AND_SWP: + roce_set_field(rc_sq_wqe->byte_4, + V2_RC_SEND_WQE_BYTE_4_OPCODE_M, + V2_RC_SEND_WQE_BYTE_4_OPCODE_S, + HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP); + break; + case IB_WR_ATOMIC_FETCH_AND_ADD: + roce_set_field(rc_sq_wqe->byte_4, + V2_RC_SEND_WQE_BYTE_4_OPCODE_M, + V2_RC_SEND_WQE_BYTE_4_OPCODE_S, + HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD); + break; + case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: + roce_set_field(rc_sq_wqe->byte_4, + V2_RC_SEND_WQE_BYTE_4_OPCODE_M, + V2_RC_SEND_WQE_BYTE_4_OPCODE_S, + HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP); + break; + case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD: + roce_set_field(rc_sq_wqe->byte_4, + V2_RC_SEND_WQE_BYTE_4_OPCODE_M, + V2_RC_SEND_WQE_BYTE_4_OPCODE_S, + HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD); + break; + default: + roce_set_field(rc_sq_wqe->byte_4, + V2_RC_SEND_WQE_BYTE_4_OPCODE_M, + V2_RC_SEND_WQE_BYTE_4_OPCODE_S, + HNS_ROCE_V2_WQE_OP_MASK); + break; + } + + roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_CQE_S, 1); + + wqe += sizeof(struct hns_roce_v2_rc_send_wqe); + dseg = wqe; + if (wr->send_flags & IB_SEND_INLINE && wr->num_sge) { + if (rc_sq_wqe->msg_len > + hr_dev->caps.max_sq_inline) { + ret = -EINVAL; + *bad_wr = wr; + dev_err(dev, "inline len(1-%d)=%d, illegal", + rc_sq_wqe->msg_len, + hr_dev->caps.max_sq_inline); + goto out; + } + + for (i = 0; i < wr->num_sge; i++) { + memcpy(wqe, ((void *)wr->sg_list[i].addr), + wr->sg_list[i].length); + wqe += wr->sg_list[i].length; + wqe_sz += wr->sg_list[i].length; + } + + roce_set_bit(rc_sq_wqe->byte_4, + V2_RC_SEND_WQE_BYTE_4_INLINE_S, 1); + } else { + if (wr->num_sge <= 2) { + for (i = 0; i < wr->num_sge; i++) + set_data_seg_v2(dseg + i, + wr->sg_list + i); + } else { + roce_set_field(rc_sq_wqe->byte_20, + V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M, + V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, + sge_ind & (qp->sge.sge_cnt - 1)); + + for (i = 0; i < 2; i++) + set_data_seg_v2(dseg + i, + wr->sg_list + i); + + dseg = get_send_extend_sge(qp, + sge_ind & (qp->sge.sge_cnt - 1)); + + for (i = 0; i < wr->num_sge - 2; i++) { + set_data_seg_v2(dseg + i, + wr->sg_list + 2 + i); + sge_ind++; + } + } + + roce_set_field(rc_sq_wqe->byte_16, + V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, + V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, + wr->num_sge); + wqe_sz += wr->num_sge * + sizeof(struct hns_roce_v2_wqe_data_seg); + } + ind++; + } + +out: + if (likely(nreq)) { + qp->sq.head += nreq; + /* Memory barrier */ + wmb(); + + sq_db.byte_4 = 0; + sq_db.parameter = 0; + + roce_set_field(sq_db.byte_4, V2_DB_BYTE_4_TAG_M, + V2_DB_BYTE_4_TAG_S, qp->doorbell_qpn); + roce_set_field(sq_db.byte_4, V2_DB_BYTE_4_CMD_M, + V2_DB_BYTE_4_CMD_S, HNS_ROCE_V2_SQ_DB); + roce_set_field(sq_db.parameter, V2_DB_PARAMETER_CONS_IDX_M, + V2_DB_PARAMETER_CONS_IDX_S, + qp->sq.head & ((qp->sq.wqe_cnt << 1) - 1)); + roce_set_field(sq_db.parameter, V2_DB_PARAMETER_SL_M, + V2_DB_PARAMETER_SL_S, qp->sl); + + hns_roce_write64_k((__be32 *)&sq_db, qp->sq.db_reg_l); + + qp->sq_next_wqe = ind; + qp->next_sge = sge_ind; + } + + spin_unlock_irqrestore(&qp->sq.lock, flags); + + return ret; +} + +static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + struct hns_roce_v2_wqe_data_seg *dseg; + struct device *dev = hr_dev->dev; + struct hns_roce_v2_db rq_db; + unsigned long flags; + void *wqe = NULL; + int ret = 0; + int nreq; + int ind; + int i; + + spin_lock_irqsave(&hr_qp->rq.lock, flags); + ind = hr_qp->rq.head & (hr_qp->rq.wqe_cnt - 1); + + if (hr_qp->state == IB_QPS_RESET || hr_qp->state == IB_QPS_ERR) { + spin_unlock_irqrestore(&hr_qp->rq.lock, flags); + *bad_wr = wr; + return -EINVAL; + } + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (hns_roce_wq_overflow(&hr_qp->rq, nreq, + hr_qp->ibqp.recv_cq)) { + ret = -ENOMEM; + *bad_wr = wr; + goto out; + } + + if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) { + dev_err(dev, "rq:num_sge=%d > qp->sq.max_gs=%d\n", + wr->num_sge, hr_qp->rq.max_gs); + ret = -EINVAL; + *bad_wr = wr; + goto out; + } + + wqe = get_recv_wqe(hr_qp, ind); + dseg = (struct hns_roce_v2_wqe_data_seg *)wqe; + for (i = 0; i < wr->num_sge; i++) { + if (!wr->sg_list[i].length) + continue; + set_data_seg_v2(dseg, wr->sg_list + i); + dseg++; + } + + if (i < hr_qp->rq.max_gs) { + dseg[i].lkey = cpu_to_be32(HNS_ROCE_INVALID_LKEY); + dseg[i].addr = 0; + } + + hr_qp->rq.wrid[ind] = wr->wr_id; + + ind = (ind + 1) & (hr_qp->rq.wqe_cnt - 1); + } + +out: + if (likely(nreq)) { + hr_qp->rq.head += nreq; + /* Memory barrier */ + wmb(); + + rq_db.byte_4 = 0; + rq_db.parameter = 0; + + roce_set_field(rq_db.byte_4, V2_DB_BYTE_4_TAG_M, + V2_DB_BYTE_4_TAG_S, hr_qp->qpn); + roce_set_field(rq_db.byte_4, V2_DB_BYTE_4_CMD_M, + V2_DB_BYTE_4_CMD_S, HNS_ROCE_V2_RQ_DB); + roce_set_field(rq_db.parameter, V2_DB_PARAMETER_CONS_IDX_M, + V2_DB_PARAMETER_CONS_IDX_S, hr_qp->rq.head); + + hns_roce_write64_k((__be32 *)&rq_db, hr_qp->rq.db_reg_l); + } + spin_unlock_irqrestore(&hr_qp->rq.lock, flags); + + return ret; +} + static int hns_roce_cmq_space(struct hns_roce_v2_cmq_ring *ring) { int ntu = ring->next_to_use; @@ -2588,6 +2909,8 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { .modify_qp = hns_roce_v2_modify_qp, .query_qp = hns_roce_v2_query_qp, .destroy_qp = hns_roce_v2_destroy_qp, + .post_send = hns_roce_v2_post_send, + .post_recv = hns_roce_v2_post_recv, .req_notify_cq = hns_roce_v2_req_notify_cq, .poll_cq = hns_roce_v2_poll_cq, }; @@ -2612,6 +2935,8 @@ static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev, } hr_dev->hw = &hns_roce_hw_v2; + hr_dev->sdb_offset = ROCEE_DB_SQ_L_0_REG; + hr_dev->odb_offset = hr_dev->sdb_offset; /* Get info from NIC driver. */ hr_dev->reg_base = handle->rinfo.roce_io_base; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 0360df04467a..6172146ad117 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -70,6 +70,7 @@ #define HNS_ROCE_V2_CQE_ENTRY_SIZE 32 #define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000 #define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2 +#define HNS_ROCE_INVALID_LKEY 0x100 #define HNS_ROCE_CMQ_TX_TIMEOUT 200 #define HNS_ROCE_CONTEXT_HOP_NUM 1 @@ -110,6 +111,23 @@ #define HNS_ROCE_V2_CQE_QPN_MASK 0x3ffff +enum { + HNS_ROCE_V2_WQE_OP_SEND = 0x0, + HNS_ROCE_V2_WQE_OP_SEND_WITH_INV = 0x1, + HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM = 0x2, + HNS_ROCE_V2_WQE_OP_RDMA_WRITE = 0x3, + HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM = 0x4, + HNS_ROCE_V2_WQE_OP_RDMA_READ = 0x5, + HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP = 0x6, + HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD = 0x7, + HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP = 0x8, + HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD = 0x9, + HNS_ROCE_V2_WQE_OP_FAST_REG_PMR = 0xa, + HNS_ROCE_V2_WQE_OP_LOCAL_INV = 0xb, + HNS_ROCE_V2_WQE_OP_BIND_MW_TYPE = 0xc, + HNS_ROCE_V2_WQE_OP_MASK = 0x1f, +}; + enum { HNS_ROCE_SQ_OPCODE_SEND = 0x0, HNS_ROCE_SQ_OPCODE_SEND_WITH_INV = 0x1, @@ -135,6 +153,9 @@ enum { }; enum { + HNS_ROCE_V2_SQ_DB = 0x0, + HNS_ROCE_V2_RQ_DB = 0x1, + HNS_ROCE_V2_SRQ_DB = 0x2, HNS_ROCE_V2_CQ_DB_PTR = 0x3, HNS_ROCE_V2_CQ_DB_NTR = 0x4, }; @@ -775,6 +796,12 @@ struct hns_roce_v2_cqe { #define V2_DB_BYTE_4_CMD_S 24 #define V2_DB_BYTE_4_CMD_M GENMASK(27, 24) +#define V2_DB_PARAMETER_CONS_IDX_S 0 +#define V2_DB_PARAMETER_CONS_IDX_M GENMASK(15, 0) + +#define V2_DB_PARAMETER_SL_S 16 +#define V2_DB_PARAMETER_SL_M GENMASK(18, 16) + struct hns_roce_v2_cq_db { u32 byte_4; u32 parameter; @@ -794,6 +821,51 @@ struct hns_roce_v2_cq_db { #define V2_CQ_DB_PARAMETER_NOTIFY_S 24 +struct hns_roce_v2_rc_send_wqe { + u32 byte_4; + u32 msg_len; + u32 inv_key_immtdata; + u32 byte_16; + u32 byte_20; + u32 rkey; + u64 va; +}; + +#define V2_RC_SEND_WQE_BYTE_4_OPCODE_S 0 +#define V2_RC_SEND_WQE_BYTE_4_OPCODE_M GENMASK(4, 0) + +#define V2_RC_SEND_WQE_BYTE_4_OWNER_S 7 + +#define V2_RC_SEND_WQE_BYTE_4_CQE_S 8 + +#define V2_RC_SEND_WQE_BYTE_4_FENCE_S 9 + +#define V2_RC_SEND_WQE_BYTE_4_SO_S 10 + +#define V2_RC_SEND_WQE_BYTE_4_SE_S 11 + +#define V2_RC_SEND_WQE_BYTE_4_INLINE_S 12 + +#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_S 0 +#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_M GENMASK(23, 0) + +#define V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S 24 +#define V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M GENMASK(31, 24) + +#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0 +#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0) + +struct hns_roce_v2_wqe_data_seg { + __be32 len; + __be32 lkey; + __be64 addr; +}; + +struct hns_roce_v2_db { + u32 byte_4; + u32 parameter; +}; + struct hns_roce_query_version { __le16 rocee_vendor_id; __le16 rocee_hw_version; diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index c04aa81ffac9..e6d11154e6ca 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -549,10 +549,9 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, } /* QP doorbell register address */ - hr_qp->sq.db_reg_l = hr_dev->reg_base + ROCEE_DB_SQ_L_0_REG + + hr_qp->sq.db_reg_l = hr_dev->reg_base + hr_dev->sdb_offset + DB_REG_OFFSET * hr_dev->priv_uar.index; - hr_qp->rq.db_reg_l = hr_dev->reg_base + - ROCEE_DB_OTHERS_L_0_REG + + hr_qp->rq.db_reg_l = hr_dev->reg_base + hr_dev->odb_offset + DB_REG_OFFSET * hr_dev->priv_uar.index; /* Allocate QP buf */ From 3958cc564e1c5b97ddff7e12f95811a993a936e2 Mon Sep 17 00:00:00 2001 From: "Wei Hu(Xavier)" Date: Wed, 30 Aug 2017 17:23:15 +0800 Subject: [PATCH 027/296] RDMA/hns: Configure the MTPT in hip08 The MTPT records the attribute of the registered MR. The MTPT format will be updated in hip08, and the MTPT should be configured. This patch is to configure the MTPT for the registered MR in hip08. Signed-off-by: Shaobo Xu Signed-off-by: Lijun Ou Signed-off-by: Wei Hu (Xavier) Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 94 ++++++++++++++++++++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 68 ++++++++++++++++ 2 files changed, 162 insertions(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index b614177f9165..4171f73bdad5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1086,6 +1086,99 @@ static void hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, roce_write(hr_dev, ROCEE_VF_SMAC_CFG1_REG + 0x08 * phy_port, val); } +static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, + unsigned long mtpt_idx) +{ + struct hns_roce_v2_mpt_entry *mpt_entry; + struct scatterlist *sg; + u64 *pages; + int entry; + int i; + + mpt_entry = mb_buf; + memset(mpt_entry, 0, sizeof(*mpt_entry)); + + roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M, + V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_VALID); + roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M, + V2_MPT_BYTE_4_PBL_HOP_NUM_S, mr->pbl_hop_num == + HNS_ROCE_HOP_NUM_0 ? 0 : mr->pbl_hop_num); + roce_set_field(mpt_entry->byte_4_pd_hop_st, + V2_MPT_BYTE_4_PBL_BA_PG_SZ_M, + V2_MPT_BYTE_4_PBL_BA_PG_SZ_S, mr->pbl_ba_pg_sz); + roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, + V2_MPT_BYTE_4_PD_S, mr->pd); + mpt_entry->byte_4_pd_hop_st = cpu_to_le32(mpt_entry->byte_4_pd_hop_st); + + roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 0); + roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1); + roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 0); + roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_BIND_EN_S, + (mr->access & IB_ACCESS_MW_BIND ? 1 : 0)); + roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_ATOMIC_EN_S, 0); + roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RR_EN_S, + (mr->access & IB_ACCESS_REMOTE_READ ? 1 : 0)); + roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RW_EN_S, + (mr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0)); + roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_LW_EN_S, + (mr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0)); + mpt_entry->byte_8_mw_cnt_en = cpu_to_le32(mpt_entry->byte_8_mw_cnt_en); + + roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, + mr->type == MR_TYPE_MR ? 0 : 1); + mpt_entry->byte_12_mw_pa = cpu_to_le32(mpt_entry->byte_12_mw_pa); + + mpt_entry->len_l = cpu_to_le32(lower_32_bits(mr->size)); + mpt_entry->len_h = cpu_to_le32(upper_32_bits(mr->size)); + mpt_entry->lkey = cpu_to_le32(mr->key); + mpt_entry->va_l = cpu_to_le32(lower_32_bits(mr->iova)); + mpt_entry->va_h = cpu_to_le32(upper_32_bits(mr->iova)); + + if (mr->type == MR_TYPE_DMA) + return 0; + + mpt_entry->pbl_size = cpu_to_le32(mr->pbl_size); + + mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(mr->pbl_ba >> 3)); + roce_set_field(mpt_entry->byte_48_mode_ba, V2_MPT_BYTE_48_PBL_BA_H_M, + V2_MPT_BYTE_48_PBL_BA_H_S, + upper_32_bits(mr->pbl_ba >> 3)); + mpt_entry->byte_48_mode_ba = cpu_to_le32(mpt_entry->byte_48_mode_ba); + + pages = (u64 *)__get_free_page(GFP_KERNEL); + if (!pages) + return -ENOMEM; + + i = 0; + for_each_sg(mr->umem->sg_head.sgl, sg, mr->umem->nmap, entry) { + pages[i] = ((u64)sg_dma_address(sg)) >> 6; + + /* Record the first 2 entry directly to MTPT table */ + if (i >= HNS_ROCE_V2_MAX_INNER_MTPT_NUM - 1) + break; + i++; + } + + mpt_entry->pa0_l = cpu_to_le32(lower_32_bits(pages[0])); + roce_set_field(mpt_entry->byte_56_pa0_h, V2_MPT_BYTE_56_PA0_H_M, + V2_MPT_BYTE_56_PA0_H_S, + upper_32_bits(pages[0])); + mpt_entry->byte_56_pa0_h = cpu_to_le32(mpt_entry->byte_56_pa0_h); + + mpt_entry->pa1_l = cpu_to_le32(lower_32_bits(pages[1])); + roce_set_field(mpt_entry->byte_64_buf_pa1, V2_MPT_BYTE_64_PA1_H_M, + V2_MPT_BYTE_64_PA1_H_S, upper_32_bits(pages[1])); + + free_page((unsigned long)pages); + + roce_set_field(mpt_entry->byte_64_buf_pa1, + V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M, + V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S, mr->pbl_buf_pg_sz); + mpt_entry->byte_64_buf_pa1 = cpu_to_le32(mpt_entry->byte_64_buf_pa1); + + return 0; +} + static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n) { return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf, @@ -2903,6 +2996,7 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { .chk_mbox = hns_roce_v2_chk_mbox, .set_gid = hns_roce_v2_set_gid, .set_mac = hns_roce_v2_set_mac, + .write_mtpt = hns_roce_v2_write_mtpt, .write_cqc = hns_roce_v2_write_cqc, .set_hem = hns_roce_v2_set_hem, .clear_hem = hns_roce_v2_clear_hem, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 6172146ad117..4fc4acd45381 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -790,6 +790,74 @@ struct hns_roce_v2_cqe { #define V2_CQE_BYTE_32_LPK_S 31 +struct hns_roce_v2_mpt_entry { + __le32 byte_4_pd_hop_st; + __le32 byte_8_mw_cnt_en; + __le32 byte_12_mw_pa; + __le32 bound_lkey; + __le32 len_l; + __le32 len_h; + __le32 lkey; + __le32 va_l; + __le32 va_h; + __le32 pbl_size; + __le32 pbl_ba_l; + __le32 byte_48_mode_ba; + __le32 pa0_l; + __le32 byte_56_pa0_h; + __le32 pa1_l; + __le32 byte_64_buf_pa1; +}; + +#define V2_MPT_BYTE_4_MPT_ST_S 0 +#define V2_MPT_BYTE_4_MPT_ST_M GENMASK(1, 0) + +#define V2_MPT_BYTE_4_PBL_HOP_NUM_S 2 +#define V2_MPT_BYTE_4_PBL_HOP_NUM_M GENMASK(3, 2) + +#define V2_MPT_BYTE_4_PBL_BA_PG_SZ_S 4 +#define V2_MPT_BYTE_4_PBL_BA_PG_SZ_M GENMASK(7, 4) + +#define V2_MPT_BYTE_4_PD_S 8 +#define V2_MPT_BYTE_4_PD_M GENMASK(31, 8) + +#define V2_MPT_BYTE_8_RA_EN_S 0 + +#define V2_MPT_BYTE_8_R_INV_EN_S 1 + +#define V2_MPT_BYTE_8_L_INV_EN_S 2 + +#define V2_MPT_BYTE_8_BIND_EN_S 3 + +#define V2_MPT_BYTE_8_ATOMIC_EN_S 4 + +#define V2_MPT_BYTE_8_RR_EN_S 5 + +#define V2_MPT_BYTE_8_RW_EN_S 6 + +#define V2_MPT_BYTE_8_LW_EN_S 7 + +#define V2_MPT_BYTE_12_PA_S 1 + +#define V2_MPT_BYTE_12_INNER_PA_VLD_S 7 + +#define V2_MPT_BYTE_12_MW_BIND_QPN_S 8 +#define V2_MPT_BYTE_12_MW_BIND_QPN_M GENMASK(31, 8) + +#define V2_MPT_BYTE_48_PBL_BA_H_S 0 +#define V2_MPT_BYTE_48_PBL_BA_H_M GENMASK(28, 0) + +#define V2_MPT_BYTE_48_BLK_MODE_S 29 + +#define V2_MPT_BYTE_56_PA0_H_S 0 +#define V2_MPT_BYTE_56_PA0_H_M GENMASK(25, 0) + +#define V2_MPT_BYTE_64_PA1_H_S 0 +#define V2_MPT_BYTE_64_PA1_H_M GENMASK(25, 0) + +#define V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S 28 +#define V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M GENMASK(31, 28) + #define V2_DB_BYTE_4_TAG_S 0 #define V2_DB_BYTE_4_TAG_M GENMASK(23, 0) From 08eb3018b6d91e0736cffba0a9a8c9aa1d2eb743 Mon Sep 17 00:00:00 2001 From: "Wei Hu(Xavier)" Date: Wed, 30 Aug 2017 17:23:16 +0800 Subject: [PATCH 028/296] RDMA/hns: Add releasing resource operation in error branch This patch adds releasing resource operation in error branch of the function named hns_roce_table_get. Signed-off-by: Wei Hu (Xavier) Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hem.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 8a3e174f5b5f..125e26ebf250 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -563,6 +563,8 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev, /* Set HEM base address(128K/page, pa) to Hardware */ if (hns_roce_set_hem(hr_dev, table, obj)) { + hns_roce_free_hem(hr_dev, table->hem[i]); + table->hem[i] = NULL; ret = -ENODEV; dev_err(dev, "set HEM base address to HW failed.\n"); goto out; From 5caad67cb337a81ae6f579a04372865a4dfc0065 Mon Sep 17 00:00:00 2001 From: "Wei Hu(Xavier)" Date: Wed, 30 Aug 2017 17:23:17 +0800 Subject: [PATCH 029/296] RDMA/hns: Replace condition statement using hardware version information This patch replaces condition statement to reduce usage of hardware version information in common driver. Signed-off-by: Wei Hu (Xavier) Signed-off-by: Shaobo Xu Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index a110f968987f..7a0c1e8f45dd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -381,7 +381,8 @@ static int hns_roce_mmap(struct ib_ucontext *context, to_hr_ucontext(context)->uar.pfn, PAGE_SIZE, vma->vm_page_prot)) return -EAGAIN; - } else if (vma->vm_pgoff == 1 && hr_dev->hw_rev == HNS_ROCE_HW_VER1) { + } else if (vma->vm_pgoff == 1 && hr_dev->tptr_dma_addr && + hr_dev->tptr_size) { /* vm_pgoff: 1 -- TPTR */ if (io_remap_pfn_range(vma, vma->vm_start, hr_dev->tptr_dma_addr >> PAGE_SHIFT, From 78928a17cae238fe21a725e1dbc1d13f6c7e6e14 Mon Sep 17 00:00:00 2001 From: "Wei Hu(Xavier)" Date: Wed, 30 Aug 2017 17:23:18 +0800 Subject: [PATCH 030/296] RDMA/hns: Fix inconsistent warning This patch fixes smatch inconsistent warning as below: drivers/infiniband/hw/hns/hns_roce_hem.h:136 hns_roce_hem_first() warn: inconsistent indenting Signed-off-by: Wei Hu (Xavier) Signed-off-by: Shaobo Xu Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index 1aa54b6d583e..af28bbf31c7f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -133,7 +133,7 @@ static inline void hns_roce_hem_first(struct hns_roce_hem *hem, iter->chunk = list_empty(&hem->chunk_list) ? NULL : list_entry(hem->chunk_list.next, struct hns_roce_hem_chunk, list); - iter->page_idx = 0; + iter->page_idx = 0; } static inline int hns_roce_hem_last(struct hns_roce_hem_iter *iter) From 400d324a14acbe9a6189d0336fdb22f69c2cfd9a Mon Sep 17 00:00:00 2001 From: "Wei Hu(Xavier)" Date: Wed, 20 Sep 2017 11:45:26 +0800 Subject: [PATCH 031/296] RDMA/hns: Delete the unnecessary initializing enum to zero It deletes the unnecessary initializing enum to zero. Signed-off-by: Wei Hu (Xavier) Signed-off-by: Shaobo Xu Signed-off-by: Lijun Ou Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index b45dba53e3b9..4f43c91db01a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -171,7 +171,7 @@ enum { }; enum hns_roce_mtt_type { - MTT_TYPE_WQE = 0, + MTT_TYPE_WQE, MTT_TYPE_CQE, }; From 8f63d4b1d557b037b597ba5da295a3eb70a6df72 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 1 Sep 2017 09:22:14 +0100 Subject: [PATCH 032/296] IB/core: fix spelling mistake: "aceess" -> "access" Trivial fix to spelling mistake in WARN message Signed-off-by: Colin Ian King Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_ioctl_merge.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_ioctl_merge.c b/drivers/infiniband/core/uverbs_ioctl_merge.c index 76ddb6564578..062485f9300d 100644 --- a/drivers/infiniband/core/uverbs_ioctl_merge.c +++ b/drivers/infiniband/core/uverbs_ioctl_merge.c @@ -376,7 +376,7 @@ static struct uverbs_method_spec *build_method_with_attrs(const struct uverbs_me min_id) || WARN(attr_obj_with_special_access && !(attr->flags & UVERBS_ATTR_SPEC_F_MANDATORY), - "ib_uverbs: Tried to merge attr (%d) but it's an object with new/destroy aceess but isn't mandatory\n", + "ib_uverbs: Tried to merge attr (%d) but it's an object with new/destroy access but isn't mandatory\n", min_id) || WARN(IS_ATTR_OBJECT(attr) && attr->flags & UVERBS_ATTR_SPEC_F_MIN_SZ, From 931bc0d91639f8fbf336332aa443705004ecf61d Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Wed, 6 Sep 2017 13:15:50 +0300 Subject: [PATCH 033/296] IB: Move PCI dependency from root KConfig to HW's KConfigs No reason to have dependency on PCI for the entire infiniband stack so move it to KConfig of only the drivers that actually using PCI. Signed-off-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/Kconfig | 1 - drivers/infiniband/hw/i40iw/Kconfig | 1 + drivers/infiniband/hw/qedr/Kconfig | 1 + drivers/infiniband/hw/qib/Kconfig | 1 + drivers/infiniband/sw/rdmavt/Kconfig | 1 + drivers/infiniband/ulp/ipoib/ipoib_main.c | 1 - drivers/staging/lustre/lnet/Kconfig | 2 +- 7 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 3726205c8704..b62b3b1e09cd 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -1,6 +1,5 @@ menuconfig INFINIBAND tristate "InfiniBand support" - depends on PCI || BROKEN depends on HAS_IOMEM depends on NET depends on INET diff --git a/drivers/infiniband/hw/i40iw/Kconfig b/drivers/infiniband/hw/i40iw/Kconfig index 6e7d27a14061..f6d20ba88c03 100644 --- a/drivers/infiniband/hw/i40iw/Kconfig +++ b/drivers/infiniband/hw/i40iw/Kconfig @@ -1,6 +1,7 @@ config INFINIBAND_I40IW tristate "Intel(R) Ethernet X722 iWARP Driver" depends on INET && I40E + depends on PCI select GENERIC_ALLOCATOR ---help--- Intel(R) Ethernet X722 iWARP Driver diff --git a/drivers/infiniband/hw/qedr/Kconfig b/drivers/infiniband/hw/qedr/Kconfig index 6c9f3923e838..3feec8dddf41 100644 --- a/drivers/infiniband/hw/qedr/Kconfig +++ b/drivers/infiniband/hw/qedr/Kconfig @@ -1,6 +1,7 @@ config INFINIBAND_QEDR tristate "QLogic RoCE driver" depends on 64BIT && QEDE + depends on PCI select QED_LL2 select QED_RDMA ---help--- diff --git a/drivers/infiniband/hw/qib/Kconfig b/drivers/infiniband/hw/qib/Kconfig index e0fdb9201423..cb06314a2ae2 100644 --- a/drivers/infiniband/hw/qib/Kconfig +++ b/drivers/infiniband/hw/qib/Kconfig @@ -1,6 +1,7 @@ config INFINIBAND_QIB tristate "Intel PCIe HCA support" depends on 64BIT && INFINIBAND_RDMAVT + depends on PCI ---help--- This is a low-level driver for Intel PCIe QLE InfiniBand host channel adapters. This driver does not support the Intel diff --git a/drivers/infiniband/sw/rdmavt/Kconfig b/drivers/infiniband/sw/rdmavt/Kconfig index fdd001ce13d8..2b5513da7e83 100644 --- a/drivers/infiniband/sw/rdmavt/Kconfig +++ b/drivers/infiniband/sw/rdmavt/Kconfig @@ -1,6 +1,7 @@ config INFINIBAND_RDMAVT tristate "RDMA verbs transport library" depends on 64BIT + depends on PCI select DMA_VIRT_OPS ---help--- This is a common software verbs provider for RDMA networks. diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index bac95b509a9b..2b1b0f2da8fb 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -51,7 +51,6 @@ #include #include #include -#include #define DRV_VERSION "1.0.0" diff --git a/drivers/staging/lustre/lnet/Kconfig b/drivers/staging/lustre/lnet/Kconfig index 2b5930150cda..6bcb53d0c6f4 100644 --- a/drivers/staging/lustre/lnet/Kconfig +++ b/drivers/staging/lustre/lnet/Kconfig @@ -34,7 +34,7 @@ config LNET_SELFTEST config LNET_XPRT_IB tristate "LNET infiniband support" - depends on LNET && INFINIBAND && INFINIBAND_ADDR_TRANS + depends on LNET && PCI && INFINIBAND && INFINIBAND_ADDR_TRANS default LNET && INFINIBAND help This option allows the LNET users to use infiniband as an From be92d4891f89cb08f8a7cfc268b211c8e1253497 Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Wed, 6 Sep 2017 13:52:01 +0300 Subject: [PATCH 034/296] IB/{cxgb3,cxgb4}: Remove unneeded config dependencies CHELSIO_T3 already depend on INET CHELSIO_T4 already depend on (IPV6 || IPV6=n) Signed-off-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb3/Kconfig | 2 +- drivers/infiniband/hw/cxgb4/Kconfig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/Kconfig b/drivers/infiniband/hw/cxgb3/Kconfig index 2b6352b85485..431be733fbbe 100644 --- a/drivers/infiniband/hw/cxgb3/Kconfig +++ b/drivers/infiniband/hw/cxgb3/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_CXGB3 tristate "Chelsio RDMA Driver" - depends on CHELSIO_T3 && INET + depends on CHELSIO_T3 select GENERIC_ALLOCATOR ---help--- This is an iWARP/RDMA driver for the Chelsio T3 1GbE and diff --git a/drivers/infiniband/hw/cxgb4/Kconfig b/drivers/infiniband/hw/cxgb4/Kconfig index afe8b28e0878..0a671a61fc92 100644 --- a/drivers/infiniband/hw/cxgb4/Kconfig +++ b/drivers/infiniband/hw/cxgb4/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_CXGB4 tristate "Chelsio T4/T5 RDMA Driver" - depends on CHELSIO_T4 && INET && (IPV6 || IPV6=n) + depends on CHELSIO_T4 && INET select CHELSIO_LIB select GENERIC_ALLOCATOR ---help--- From 12f727721eee61b3d19dedb95cb893b2baa9fe41 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 6 Sep 2017 23:34:25 +0200 Subject: [PATCH 035/296] IB/uverbs: clean up INIT_UDATA_BUF_OR_NULL usage We get a harmless warning about the fact that we use the result of a multiplication as a condition: drivers/infiniband/core/uverbs_main.c: In function 'ib_uverbs_write': drivers/infiniband/core/uverbs_main.c:787:40: error: '*' in boolean context, suggest '&&' instead [-Werror=int-in-bool-context] drivers/infiniband/core/uverbs_main.c:787:117: error: '*' in boolean context, suggest '&&' instead [-Werror=int-in-bool-context] drivers/infiniband/core/uverbs_main.c:790:50: error: '*' in boolean context, suggest '&&' instead [-Werror=int-in-bool-context] drivers/infiniband/core/uverbs_main.c:790:151: error: '*' in boolean context, suggest '&&' instead [-Werror=int-in-bool-context] This avoids the problem by using an inline function in place of the macro. Fixes: a96e4e2ffe43 ("IB/uverbs: New macro to set pointers to NULL if length is 0 in INIT_UDATA()") Suggested-by: Christoph Hellwig Link: https://patchwork.kernel.org/patch/9940777/ Signed-off-by: Arnd Bergmann Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs.h | 15 ++++++++------- drivers/infiniband/core/uverbs_main.c | 22 ++++++++++------------ drivers/infiniband/core/uverbs_std_types.c | 3 ++- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 37c8903e7fd0..a8f104a4a91b 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -55,13 +55,14 @@ (udata)->outlen = (olen); \ } while (0) -#define INIT_UDATA_BUF_OR_NULL(udata, ibuf, obuf, ilen, olen) \ - do { \ - (udata)->inbuf = (ilen) ? (const void __user *) (ibuf) : NULL; \ - (udata)->outbuf = (olen) ? (void __user *) (obuf) : NULL; \ - (udata)->inlen = (ilen); \ - (udata)->outlen = (olen); \ - } while (0) +static inline void +ib_uverbs_init_udata_buf_or_null(struct ib_udata *udata, + const void __user *ibuf, + void __user *obuf, + size_t ilen, size_t olen) +{ + INIT_UDATA(udata, ilen ? ibuf : NULL, olen ? obuf : NULL, ilen, olen); +} /* * Our lifetime rules for these structs are the following: diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index dc2aed6fb21b..b5febfd84ee5 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -763,7 +763,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, } if (!access_ok(VERIFY_WRITE, - (void __user *) (unsigned long) ex_hdr.response, + u64_to_user_ptr(ex_hdr.response), (hdr.out_words + ex_hdr.provider_out_words) * 8)) { ret = -EFAULT; goto out; @@ -775,19 +775,17 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, } } - INIT_UDATA_BUF_OR_NULL(&ucore, buf, (unsigned long) ex_hdr.response, - hdr.in_words * 8, hdr.out_words * 8); + ib_uverbs_init_udata_buf_or_null(&ucore, buf, + u64_to_user_ptr(ex_hdr.response), + hdr.in_words * 8, hdr.out_words * 8); - INIT_UDATA_BUF_OR_NULL(&uhw, - buf + ucore.inlen, - (unsigned long) ex_hdr.response + ucore.outlen, - ex_hdr.provider_in_words * 8, - ex_hdr.provider_out_words * 8); + ib_uverbs_init_udata_buf_or_null(&uhw, + buf + ucore.inlen, + u64_to_user_ptr(ex_hdr.response) + ucore.outlen, + ex_hdr.provider_in_words * 8, + ex_hdr.provider_out_words * 8); - ret = uverbs_ex_cmd_table[command](file, - ib_dev, - &ucore, - &uhw); + ret = uverbs_ex_cmd_table[command](file, ib_dev, &ucore, &uhw); if (!ret) ret = written_count; } else { diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index 0a98579700ec..b095bce7f238 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -246,7 +246,8 @@ static void create_udata(struct uverbs_attr_bundle *ctx, outbuf_len = uhw_out->ptr_attr.len; } - INIT_UDATA_BUF_OR_NULL(udata, inbuf, outbuf, inbuf_len, outbuf_len); + ib_uverbs_init_udata_buf_or_null(udata, inbuf, outbuf, inbuf_len, + outbuf_len); } static int uverbs_create_cq_handler(struct ib_device *ib_dev, From 40a203396cc1c239f2e71c47c66ed03097123d2c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 6 Sep 2017 23:34:26 +0200 Subject: [PATCH 036/296] IB/uverbs: clean up INIT_UDATA() macro usage After changing INIT_UDATA_BUF_OR_NULL() to an inline function, this does the same change to INIT_UDATA for consistency. I'm keeping it separate as this part is much larger and we wouldn't want to backport this to stable kernels if we ever want to address the gcc warnings by backporting the first patch. Again, using an inline function gives us better type safety here among other issues with macros. I'm using u64_to_user_ptr() to convert the user pointer to simplify the logic rather than adding lots of new type casts. Signed-off-by: Arnd Bergmann Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs.h | 22 +++-- drivers/infiniband/core/uverbs_cmd.c | 125 ++++++++++++--------------- 2 files changed, 67 insertions(+), 80 deletions(-) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index a8f104a4a91b..ee2739ae4305 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -47,13 +47,17 @@ #include #include -#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \ - do { \ - (udata)->inbuf = (const void __user *) (ibuf); \ - (udata)->outbuf = (void __user *) (obuf); \ - (udata)->inlen = (ilen); \ - (udata)->outlen = (olen); \ - } while (0) +static inline void +ib_uverbs_init_udata(struct ib_udata *udata, + const void __user *ibuf, + void __user *obuf, + size_t ilen, size_t olen) +{ + udata->inbuf = ibuf; + udata->outbuf = obuf; + udata->inlen = ilen; + udata->outlen = olen; +} static inline void ib_uverbs_init_udata_buf_or_null(struct ib_udata *udata, @@ -61,7 +65,9 @@ ib_uverbs_init_udata_buf_or_null(struct ib_udata *udata, void __user *obuf, size_t ilen, size_t olen) { - INIT_UDATA(udata, ilen ? ibuf : NULL, olen ? obuf : NULL, ilen, olen); + ib_uverbs_init_udata(udata, + ilen ? ibuf : NULL, olen ? obuf : NULL, + ilen, olen); } /* diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 4ab30d832ac5..cf787fabd88c 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -91,8 +91,8 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, goto err; } - INIT_UDATA(&udata, buf + sizeof(cmd), - (unsigned long) cmd.response + sizeof(resp), + ib_uverbs_init_udata(&udata, buf + sizeof(cmd), + u64_to_user_ptr(cmd.response) + sizeof(resp), in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); @@ -141,8 +141,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, goto err_fd; } - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) { ret = -EFAULT; goto err_file; } @@ -238,8 +237,7 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file, memset(&resp, 0, sizeof resp); copy_query_dev_fields(file, ib_dev, &resp, &ib_dev->attrs); - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) return -EFAULT; return in_len; @@ -295,8 +293,7 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, resp.link_layer = rdma_port_get_link_layer(ib_dev, cmd.port_num); - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) return -EFAULT; return in_len; @@ -320,8 +317,8 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - INIT_UDATA(&udata, buf + sizeof(cmd), - (unsigned long) cmd.response + sizeof(resp), + ib_uverbs_init_udata(&udata, buf + sizeof(cmd), + u64_to_user_ptr(cmd.response) + sizeof(resp), in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); @@ -344,8 +341,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, memset(&resp, 0, sizeof resp); resp.pd_handle = uobj->id; - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) { ret = -EFAULT; goto err_copy; } @@ -490,8 +486,8 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - INIT_UDATA(&udata, buf + sizeof(cmd), - (unsigned long) cmd.response + sizeof(resp), + ib_uverbs_init_udata(&udata, buf + sizeof(cmd), + u64_to_user_ptr(cmd.response) + sizeof(resp), in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); @@ -556,8 +552,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, atomic_inc(&xrcd->usecnt); } - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) { ret = -EFAULT; goto err_copy; } @@ -655,8 +650,8 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - INIT_UDATA(&udata, buf + sizeof(cmd), - (unsigned long) cmd.response + sizeof(resp), + ib_uverbs_init_udata(&udata, buf + sizeof(cmd), + u64_to_user_ptr(cmd.response) + sizeof(resp), in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); @@ -705,8 +700,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, resp.rkey = mr->rkey; resp.mr_handle = uobj->id; - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) { ret = -EFAULT; goto err_copy; } @@ -748,8 +742,8 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof(cmd))) return -EFAULT; - INIT_UDATA(&udata, buf + sizeof(cmd), - (unsigned long) cmd.response + sizeof(resp), + ib_uverbs_init_udata(&udata, buf + sizeof(cmd), + u64_to_user_ptr(cmd.response) + sizeof(resp), in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); @@ -800,8 +794,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, resp.lkey = mr->lkey; resp.rkey = mr->rkey; - if (copy_to_user((void __user *)(unsigned long)cmd.response, - &resp, sizeof(resp))) + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) ret = -EFAULT; else ret = in_len; @@ -867,8 +860,8 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, goto err_free; } - INIT_UDATA(&udata, buf + sizeof(cmd), - (unsigned long)cmd.response + sizeof(resp), + ib_uverbs_init_udata(&udata, buf + sizeof(cmd), + u64_to_user_ptr(cmd.response) + sizeof(resp), in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); @@ -889,8 +882,7 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, resp.rkey = mw->rkey; resp.mw_handle = uobj->id; - if (copy_to_user((void __user *)(unsigned long)cmd.response, - &resp, sizeof(resp))) { + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) { ret = -EFAULT; goto err_copy; } @@ -956,8 +948,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, uobj_file.uobj); ib_uverbs_init_event_queue(&ev_file->ev_queue); - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) { uobj_alloc_abort(uobj); return -EFAULT; } @@ -1087,10 +1078,11 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof(cmd))) return -EFAULT; - INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd), sizeof(resp)); + ib_uverbs_init_udata(&ucore, buf, u64_to_user_ptr(cmd.response), + sizeof(cmd), sizeof(resp)); - INIT_UDATA(&uhw, buf + sizeof(cmd), - (unsigned long)cmd.response + sizeof(resp), + ib_uverbs_init_udata(&uhw, buf + sizeof(cmd), + u64_to_user_ptr(cmd.response) + sizeof(resp), in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); @@ -1173,8 +1165,8 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - INIT_UDATA(&udata, buf + sizeof(cmd), - (unsigned long) cmd.response + sizeof(resp), + ib_uverbs_init_udata(&udata, buf + sizeof(cmd), + u64_to_user_ptr(cmd.response) + sizeof(resp), in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); @@ -1188,8 +1180,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, resp.cqe = cq->cqe; - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp.cqe)) + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp.cqe)) ret = -EFAULT; out: @@ -1249,7 +1240,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, return -EINVAL; /* we copy a struct ib_uverbs_poll_cq_resp to user space */ - header_ptr = (void __user *)(unsigned long) cmd.response; + header_ptr = u64_to_user_ptr(cmd.response); data_ptr = header_ptr + sizeof resp; memset(&resp, 0, sizeof resp); @@ -1343,8 +1334,7 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, resp.async_events_reported = obj->async_events_reported; uverbs_uobject_put(uobj); - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) return -EFAULT; return in_len; @@ -1650,10 +1640,10 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof(cmd))) return -EFAULT; - INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd), - resp_size); - INIT_UDATA(&uhw, buf + sizeof(cmd), - (unsigned long)cmd.response + resp_size, + ib_uverbs_init_udata(&ucore, buf, u64_to_user_ptr(cmd.response), + sizeof(cmd), resp_size); + ib_uverbs_init_udata(&uhw, buf + sizeof(cmd), + u64_to_user_ptr(cmd.response) + resp_size, in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - resp_size); @@ -1750,8 +1740,8 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - INIT_UDATA(&udata, buf + sizeof(cmd), - (unsigned long) cmd.response + sizeof(resp), + ib_uverbs_init_udata(&udata, buf + sizeof(cmd), + u64_to_user_ptr(cmd.response) + sizeof(resp), in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); @@ -1795,8 +1785,7 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, resp.qpn = qp->qp_num; resp.qp_handle = obj->uevent.uobject.id; - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) { ret = -EFAULT; goto err_destroy; } @@ -1911,8 +1900,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, resp.max_inline_data = init_attr->cap.max_inline_data; resp.sq_sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR; - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) ret = -EFAULT; out: @@ -2042,7 +2030,7 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, ~((IB_USER_LEGACY_LAST_QP_ATTR_MASK << 1) - 1)) return -EOPNOTSUPP; - INIT_UDATA(&udata, buf + sizeof(cmd.base), NULL, + ib_uverbs_init_udata(&udata, buf + sizeof(cmd.base), NULL, in_len - sizeof(cmd.base) - sizeof(struct ib_uverbs_cmd_hdr), out_len); @@ -2126,8 +2114,7 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, resp.events_reported = obj->uevent.events_reported; uverbs_uobject_put(uobj); - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) return -EFAULT; return in_len; @@ -2311,8 +2298,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, break; } - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) ret = -EFAULT; out_put: @@ -2460,8 +2446,7 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, } } - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) ret = -EFAULT; out: @@ -2510,8 +2495,7 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, break; } - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) ret = -EFAULT; out: @@ -2548,8 +2532,8 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, if (!rdma_is_port_valid(ib_dev, cmd.attr.port_num)) return -EINVAL; - INIT_UDATA(&udata, buf + sizeof(cmd), - (unsigned long)cmd.response + sizeof(resp), + ib_uverbs_init_udata(&udata, buf + sizeof(cmd), + u64_to_user_ptr(cmd.response) + sizeof(resp), in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); @@ -2600,8 +2584,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, resp.ah_handle = uobj->id; - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) { ret = -EFAULT; goto err_copy; } @@ -3627,8 +3610,8 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, xcmd.max_sge = cmd.max_sge; xcmd.srq_limit = cmd.srq_limit; - INIT_UDATA(&udata, buf + sizeof(cmd), - (unsigned long) cmd.response + sizeof(resp), + ib_uverbs_init_udata(&udata, buf + sizeof(cmd), + u64_to_user_ptr(cmd.response) + sizeof(resp), in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); @@ -3654,8 +3637,8 @@ ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - INIT_UDATA(&udata, buf + sizeof(cmd), - (unsigned long) cmd.response + sizeof(resp), + ib_uverbs_init_udata(&udata, buf + sizeof(cmd), + u64_to_user_ptr(cmd.response) + sizeof(resp), in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); @@ -3680,7 +3663,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, + ib_uverbs_init_udata(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, out_len); srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext); @@ -3731,8 +3714,7 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, resp.max_sge = attr.max_sge; resp.srq_limit = attr.srq_limit; - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) return -EFAULT; return in_len; @@ -3773,8 +3755,7 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, } resp.events_reported = obj->events_reported; uverbs_uobject_put(uobj); - if (copy_to_user((void __user *)(unsigned long)cmd.response, - &resp, sizeof(resp))) + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) return -EFAULT; return in_len; From 3e4d6f91cae689b01865aedb71c1ad88a118084f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 11 Sep 2017 12:42:31 +0100 Subject: [PATCH 037/296] RDMA/cxgb3: remove redundant first assignement of sqp sqp is being initialised when it is being declared and then updated a little later on making the first initialization redundant. Clean this up by initializing ptr and sqp at their declaration. Cleans up warning: "warning: Value stored to 'sqp' during its initialization is never read" Fixes: a58e58fafdff ("RDMA/cxgb3: Wrap the software send queue pointer as needed on flush") Signed-off-by: Colin Ian King Acked-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb3/cxio_hal.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index 3eff6541bd6f..3328acc53c2a 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -404,12 +404,10 @@ static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq, int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count) { - __u32 ptr; + __u32 ptr = wq->sq_rptr + count; int flushed = 0; - struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2); + struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); - ptr = wq->sq_rptr + count; - sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); while (ptr != wq->sq_wptr) { sqp->signaled = 0; insert_sq_cqe(wq, cq, sqp); From e4b2d06892c7f700f3d62dfef603add35269612e Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Mon, 25 Sep 2017 05:18:00 -0700 Subject: [PATCH 038/296] IB/ipoib: Remove device when one port fails to init Call ipoib_remove_one when one of the IPoIB ports fails to initialize in order not to leave the module in unstable state. Signed-off-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 2b1b0f2da8fb..1983494f2c38 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2311,7 +2311,8 @@ static void ipoib_add_one(struct ib_device *device) } if (!count) { - kfree(dev_list); + pr_err("Failed to init port, removing it\n"); + ipoib_remove_one(device, dev_list); return; } From d23a8bafa9d109fe6ea483fe8f96071ac460962c Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Tue, 26 Sep 2017 12:20:01 +0530 Subject: [PATCH 039/296] IB/mlx5:: pr_err() and mlx5_ib_dbg() strings should end with newlines pr_err() and mlx5_ib_dbg( messages should terminated with a new-line to avoid other messages being concatenated. Signed-off-by: Arvind Yadav Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 0e2789d9bb4d..92d643a6d41c 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1229,13 +1229,13 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont, page_shift, order, access_flags); if (PTR_ERR(mr) == -EAGAIN) { - mlx5_ib_dbg(dev, "cache empty for order %d", order); + mlx5_ib_dbg(dev, "cache empty for order %d\n", order); mr = NULL; } } else if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) { if (access_flags & IB_ACCESS_ON_DEMAND) { err = -EINVAL; - pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB"); + pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB\n"); goto error; } use_umr = false; From 44596ebf8d55caa5dc15b21bf8b435b59988b521 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Tue, 26 Sep 2017 12:59:51 +0530 Subject: [PATCH 040/296] IB/ocrdma: pr_err() strings should end with newlines pr_err() messages should end with a new-line to avoid other messages being concatenated. Signed-off-by: Arvind Yadav Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 2 +- drivers/infiniband/hw/ocrdma/ocrdma_stats.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 65b166cc7437..8e2e9bcf1916 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1093,7 +1093,7 @@ static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe) rsp = &mqe->u.rsp; if (cqe_status || ext_status) { - pr_err("%s() cqe_status=0x%x, ext_status=0x%x,", + pr_err("%s() cqe_status=0x%x, ext_status=0x%x,\n", __func__, cqe_status, ext_status); if (rsp) { /* This is for embedded cmds. */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c index 66056f9a9700..e528d7acb7f6 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c @@ -658,7 +658,7 @@ static ssize_t ocrdma_dbgfs_ops_write(struct file *filp, if (reset) { status = ocrdma_mbx_rdma_stats(dev, true); if (status) { - pr_err("Failed to reset stats = %d", status); + pr_err("Failed to reset stats = %d\n", status); goto err; } } From 548ddb19afbabf8f7af7a900c19d0f0705d6dc90 Mon Sep 17 00:00:00 2001 From: Bharat Potnuri Date: Wed, 27 Sep 2017 13:05:49 +0530 Subject: [PATCH 041/296] iw_cxgb4: Remove __func__ parameter from pr_debug() pr_debug() can be enabled to print function names, So removing the unwanted __func__ parameters from debug logs. Realign function parameters. Signed-off-by: Potnuri Bharat Teja Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 198 ++++++++++++------------- drivers/infiniband/hw/cxgb4/cq.c | 50 +++---- drivers/infiniband/hw/cxgb4/device.c | 16 +- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 13 +- drivers/infiniband/hw/cxgb4/mem.c | 26 ++-- drivers/infiniband/hw/cxgb4/provider.c | 36 ++--- drivers/infiniband/hw/cxgb4/qp.c | 55 ++++--- drivers/infiniband/hw/cxgb4/resource.c | 46 +++--- drivers/infiniband/hw/cxgb4/t4.h | 12 +- 9 files changed, 217 insertions(+), 235 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index daf7a56e5d7e..23cb4af8ce09 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -180,7 +180,7 @@ static void ref_qp(struct c4iw_ep *ep) static void start_ep_timer(struct c4iw_ep *ep) { - pr_debug("%s ep %p\n", __func__, ep); + pr_debug("ep %p\n", ep); if (timer_pending(&ep->timer)) { pr_err("%s timer already started! ep %p\n", __func__, ep); @@ -196,7 +196,7 @@ static void start_ep_timer(struct c4iw_ep *ep) static int stop_ep_timer(struct c4iw_ep *ep) { - pr_debug("%s ep %p stopping\n", __func__, ep); + pr_debug("ep %p stopping\n", ep); del_timer_sync(&ep->timer); if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { c4iw_put_ep(&ep->com); @@ -265,8 +265,8 @@ static void set_emss(struct c4iw_ep *ep, u16 opt) if (ep->emss & 7) pr_debug("Warning: misaligned mtu idx %u mss %u emss=%u\n", TCPOPT_MSS_G(opt), ep->mss, ep->emss); - pr_debug("%s mss_idx %u mss %u emss=%u\n", __func__, TCPOPT_MSS_G(opt), - ep->mss, ep->emss); + pr_debug("mss_idx %u mss %u emss=%u\n", TCPOPT_MSS_G(opt), ep->mss, + ep->emss); } static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc) @@ -287,7 +287,7 @@ static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new) static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new) { mutex_lock(&epc->mutex); - pr_debug("%s - %s -> %s\n", __func__, states[epc->state], states[new]); + pr_debug("%s -> %s\n", states[epc->state], states[new]); __state_set(epc, new); mutex_unlock(&epc->mutex); return; @@ -322,7 +322,7 @@ static void *alloc_ep(int size, gfp_t gfp) mutex_init(&epc->mutex); c4iw_init_wr_wait(&epc->wr_wait); } - pr_debug("%s alloc ep %p\n", __func__, epc); + pr_debug("alloc ep %p\n", epc); return epc; } @@ -384,7 +384,7 @@ void _c4iw_free_ep(struct kref *kref) struct c4iw_ep *ep; ep = container_of(kref, struct c4iw_ep, com.kref); - pr_debug("%s ep %p state %s\n", __func__, ep, states[ep->com.state]); + pr_debug("ep %p state %s\n", ep, states[ep->com.state]); if (test_bit(QP_REFERENCED, &ep->com.flags)) deref_qp(ep); if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) { @@ -570,7 +570,7 @@ static void abort_arp_failure(void *handle, struct sk_buff *skb) struct c4iw_rdev *rdev = &ep->com.dev->rdev; struct cpl_abort_req *req = cplhdr(skb); - pr_debug("%s rdev %p\n", __func__, rdev); + pr_debug("rdev %p\n", rdev); req->cmd = CPL_ABORT_NO_RST; skb_get(skb); ret = c4iw_ofld_send(rdev, skb); @@ -647,7 +647,7 @@ static int send_halfclose(struct c4iw_ep *ep) struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list); u32 wrlen = roundup(sizeof(struct cpl_close_con_req), 16); - pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + pr_debug("ep %p tid %u\n", ep, ep->hwtid); if (WARN_ON(!skb)) return -ENOMEM; @@ -662,7 +662,7 @@ static int send_abort(struct c4iw_ep *ep) u32 wrlen = roundup(sizeof(struct cpl_abort_req), 16); struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list); - pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + pr_debug("ep %p tid %u\n", ep, ep->hwtid); if (WARN_ON(!req_skb)) return -ENOMEM; @@ -725,7 +725,7 @@ static int send_connect(struct c4iw_ep *ep) roundup(sizev4, 16) : roundup(sizev6, 16); - pr_debug("%s ep %p atid %u\n", __func__, ep, ep->atid); + pr_debug("ep %p atid %u\n", ep, ep->atid); skb = get_skb(NULL, wrlen, GFP_KERNEL); if (!skb) { @@ -824,13 +824,13 @@ static int send_connect(struct c4iw_ep *ep) t5req->params = cpu_to_be64(FILTER_TUPLE_V(params)); t5req->rsvd = cpu_to_be32(isn); - pr_debug("%s snd_isn %u\n", __func__, t5req->rsvd); + pr_debug("snd_isn %u\n", t5req->rsvd); t5req->opt2 = cpu_to_be32(opt2); } else { t6req->params = cpu_to_be64(FILTER_TUPLE_V(params)); t6req->rsvd = cpu_to_be32(isn); - pr_debug("%s snd_isn %u\n", __func__, t6req->rsvd); + pr_debug("snd_isn %u\n", t6req->rsvd); t6req->opt2 = cpu_to_be32(opt2); } } @@ -877,13 +877,13 @@ static int send_connect(struct c4iw_ep *ep) t5req6->params = cpu_to_be64(FILTER_TUPLE_V(params)); t5req6->rsvd = cpu_to_be32(isn); - pr_debug("%s snd_isn %u\n", __func__, t5req6->rsvd); + pr_debug("snd_isn %u\n", t5req6->rsvd); t5req6->opt2 = cpu_to_be32(opt2); } else { t6req6->params = cpu_to_be64(FILTER_TUPLE_V(params)); t6req6->rsvd = cpu_to_be32(isn); - pr_debug("%s snd_isn %u\n", __func__, t6req6->rsvd); + pr_debug("snd_isn %u\n", t6req6->rsvd); t6req6->opt2 = cpu_to_be32(opt2); } @@ -907,8 +907,8 @@ static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, struct mpa_message *mpa; struct mpa_v2_conn_params mpa_v2_params; - pr_debug("%s ep %p tid %u pd_len %d\n", - __func__, ep, ep->hwtid, ep->plen); + pr_debug("ep %p tid %u pd_len %d\n", + ep, ep->hwtid, ep->plen); BUG_ON(skb_cloned(skb)); @@ -961,7 +961,7 @@ static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, if (mpa_rev_to_use == 2) { mpa->private_data_size = htons(ntohs(mpa->private_data_size) + sizeof (struct mpa_v2_conn_params)); - pr_debug("%s initiator ird %u ord %u\n", __func__, ep->ird, + pr_debug("initiator ird %u ord %u\n", ep->ird, ep->ord); mpa_v2_params.ird = htons((u16)ep->ird); mpa_v2_params.ord = htons((u16)ep->ord); @@ -1014,8 +1014,8 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) struct sk_buff *skb; struct mpa_v2_conn_params mpa_v2_params; - pr_debug("%s ep %p tid %u pd_len %d\n", - __func__, ep, ep->hwtid, ep->plen); + pr_debug("ep %p tid %u pd_len %d\n", + ep, ep->hwtid, ep->plen); mpalen = sizeof(*mpa) + plen; if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) @@ -1094,8 +1094,8 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen) struct sk_buff *skb; struct mpa_v2_conn_params mpa_v2_params; - pr_debug("%s ep %p tid %u pd_len %d\n", - __func__, ep, ep->hwtid, ep->plen); + pr_debug("ep %p tid %u pd_len %d\n", + ep, ep->hwtid, ep->plen); mpalen = sizeof(*mpa) + plen; if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) @@ -1185,7 +1185,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) ep = lookup_atid(t, atid); - pr_debug("%s ep %p tid %u snd_isn %u rcv_isn %u\n", __func__, ep, tid, + pr_debug("ep %p tid %u snd_isn %u rcv_isn %u\n", ep, tid, be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn)); mutex_lock(&ep->com.mutex); @@ -1229,7 +1229,7 @@ static void close_complete_upcall(struct c4iw_ep *ep, int status) { struct iw_cm_event event; - pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + pr_debug("ep %p tid %u\n", ep, ep->hwtid); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_CLOSE; event.status = status; @@ -1246,7 +1246,7 @@ static void peer_close_upcall(struct c4iw_ep *ep) { struct iw_cm_event event; - pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + pr_debug("ep %p tid %u\n", ep, ep->hwtid); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_DISCONNECT; if (ep->com.cm_id) { @@ -1261,7 +1261,7 @@ static void peer_abort_upcall(struct c4iw_ep *ep) { struct iw_cm_event event; - pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + pr_debug("ep %p tid %u\n", ep, ep->hwtid); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_CLOSE; event.status = -ECONNRESET; @@ -1278,8 +1278,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status) { struct iw_cm_event event; - pr_debug("%s ep %p tid %u status %d\n", - __func__, ep, ep->hwtid, status); + pr_debug("ep %p tid %u status %d\n", + ep, ep->hwtid, status); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_CONNECT_REPLY; event.status = status; @@ -1308,7 +1308,7 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status) } } - pr_debug("%s ep %p tid %u status %d\n", __func__, ep, + pr_debug("ep %p tid %u status %d\n", ep, ep->hwtid, status); set_bit(CONN_RPL_UPCALL, &ep->com.history); ep->com.cm_id->event_handler(ep->com.cm_id, &event); @@ -1322,7 +1322,7 @@ static int connect_request_upcall(struct c4iw_ep *ep) struct iw_cm_event event; int ret; - pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + pr_debug("ep %p tid %u\n", ep, ep->hwtid); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_CONNECT_REQUEST; memcpy(&event.local_addr, &ep->com.local_addr, @@ -1359,13 +1359,13 @@ static void established_upcall(struct c4iw_ep *ep) { struct iw_cm_event event; - pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + pr_debug("ep %p tid %u\n", ep, ep->hwtid); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_ESTABLISHED; event.ird = ep->ord; event.ord = ep->ird; if (ep->com.cm_id) { - pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + pr_debug("ep %p tid %u\n", ep, ep->hwtid); ep->com.cm_id->event_handler(ep->com.cm_id, &event); set_bit(ESTAB_UPCALL, &ep->com.history); } @@ -1377,8 +1377,8 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits) u32 wrlen = roundup(sizeof(struct cpl_rx_data_ack), 16); u32 credit_dack; - pr_debug("%s ep %p tid %u credits %u\n", - __func__, ep, ep->hwtid, credits); + pr_debug("ep %p tid %u credits %u\n", + ep, ep->hwtid, credits); skb = get_skb(NULL, wrlen, GFP_KERNEL); if (!skb) { pr_err("update_rx_credits - cannot alloc skb!\n"); @@ -1429,7 +1429,7 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) int err; int disconnect = 0; - pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + pr_debug("ep %p tid %u\n", ep, ep->hwtid); /* * If we get more than the supported amount of private data @@ -1527,8 +1527,7 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) MPA_V2_IRD_ORD_MASK; resp_ord = ntohs(mpa_v2_params->ord) & MPA_V2_IRD_ORD_MASK; - pr_debug("%s responder ird %u ord %u ep ird %u ord %u\n", - __func__, + pr_debug("responder ird %u ord %u ep ird %u ord %u\n", resp_ird, resp_ord, ep->ird, ep->ord); /* @@ -1573,8 +1572,8 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) if (peer2peer) ep->mpa_attr.p2p_type = p2p_type; - pr_debug("%s - crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d p2p_type=%d local-p2p_type = %d\n", - __func__, ep->mpa_attr.crc_enabled, + pr_debug("crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d p2p_type=%d local-p2p_type = %d\n", + ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version, ep->mpa_attr.p2p_type, p2p_type); @@ -1670,7 +1669,7 @@ static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) struct mpa_v2_conn_params *mpa_v2_params; u16 plen; - pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + pr_debug("ep %p tid %u\n", ep, ep->hwtid); /* * If we get more than the supported amount of private data @@ -1679,7 +1678,7 @@ static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) goto err_stop_timer; - pr_debug("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__); + pr_debug("enter (%s line %u)\n", __FILE__, __LINE__); /* * Copy the new data into our accumulation buffer. @@ -1695,7 +1694,7 @@ static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) if (ep->mpa_pkt_len < sizeof(*mpa)) return 0; - pr_debug("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__); + pr_debug("enter (%s line %u)\n", __FILE__, __LINE__); mpa = (struct mpa_message *) ep->mpa_pkt; /* @@ -1758,8 +1757,8 @@ static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) MPA_V2_IRD_ORD_MASK; ep->ord = min_t(u32, ep->ord, cur_max_read_depth(ep->com.dev)); - pr_debug("%s initiator ird %u ord %u\n", - __func__, ep->ird, ep->ord); + pr_debug("initiator ird %u ord %u\n", + ep->ird, ep->ord); if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL) if (peer2peer) { if (ntohs(mpa_v2_params->ord) & @@ -1776,8 +1775,7 @@ static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) if (peer2peer) ep->mpa_attr.p2p_type = p2p_type; - pr_debug("%s - crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d p2p_type=%d\n", - __func__, + pr_debug("crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d p2p_type=%d\n", ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version, ep->mpa_attr.p2p_type); @@ -1816,7 +1814,7 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) ep = get_ep_from_tid(dev, tid); if (!ep) return 0; - pr_debug("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen); + pr_debug("ep %p tid %u dlen %u\n", ep, ep->hwtid, dlen); skb_pull(skb, sizeof(*hdr)); skb_trim(skb, dlen); mutex_lock(&ep->com.mutex); @@ -1870,7 +1868,7 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) pr_warn("Abort rpl to freed endpoint\n"); return 0; } - pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + pr_debug("ep %p tid %u\n", ep, ep->hwtid); mutex_lock(&ep->com.mutex); switch (ep->com.state) { case ABORTING: @@ -1994,8 +1992,8 @@ static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi) { ep->snd_win = snd_win; ep->rcv_win = rcv_win; - pr_debug("%s snd_win %d rcv_win %d\n", - __func__, ep->snd_win, ep->rcv_win); + pr_debug("snd_win %d rcv_win %d\n", + ep->snd_win, ep->rcv_win); } #define ACT_OPEN_RETRY_COUNT 2 @@ -2100,7 +2098,7 @@ static int c4iw_reconnect(struct c4iw_ep *ep) int iptype; __u8 *ra; - pr_debug("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id); + pr_debug("qp %p cm_id %p\n", ep->com.qp, ep->com.cm_id); init_timer(&ep->timer); c4iw_init_wr_wait(&ep->com.wr_wait); @@ -2163,8 +2161,8 @@ static int c4iw_reconnect(struct c4iw_ep *ep) goto fail4; } - pr_debug("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n", - __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid, + pr_debug("txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n", + ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid, ep->l2t->idx); state_set(&ep->com, CONNECTING); @@ -2215,12 +2213,12 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) la6 = (struct sockaddr_in6 *)&ep->com.local_addr; ra6 = (struct sockaddr_in6 *)&ep->com.remote_addr; - pr_debug("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid, + pr_debug("ep %p atid %u status %u errno %d\n", ep, atid, status, status2errno(status)); if (cxgb_is_neg_adv(status)) { - pr_debug("%s Connection problems for atid %u status %u (%s)\n", - __func__, atid, status, neg_adv_str(status)); + pr_debug("Connection problems for atid %u status %u (%s)\n", + atid, status, neg_adv_str(status)); ep->stats.connect_neg_adv++; mutex_lock(&dev->rdev.stats.lock); dev->rdev.stats.neg_adv++; @@ -2319,7 +2317,7 @@ static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) pr_debug("%s stid %d lookup failure!\n", __func__, stid); goto out; } - pr_debug("%s ep %p status %d error %d\n", __func__, ep, + pr_debug("ep %p status %d error %d\n", ep, rpl->status, status2errno(rpl->status)); c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status)); c4iw_put_ep(&ep->com); @@ -2337,7 +2335,7 @@ static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb) pr_debug("%s stid %d lookup failure!\n", __func__, stid); goto out; } - pr_debug("%s ep %p\n", __func__, ep); + pr_debug("ep %p\n", ep); c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status)); c4iw_put_ep(&ep->com); out: @@ -2356,7 +2354,7 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, int win; enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type; - pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + pr_debug("ep %p tid %u\n", ep, ep->hwtid); BUG_ON(skb_cloned(skb)); skb_get(skb); @@ -2427,7 +2425,7 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, if (peer2peer) isn += 4; rpl5->iss = cpu_to_be32(isn); - pr_debug("%s iss %u\n", __func__, be32_to_cpu(rpl5->iss)); + pr_debug("iss %u\n", be32_to_cpu(rpl5->iss)); } rpl->opt0 = cpu_to_be64(opt0); @@ -2440,7 +2438,7 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb) { - pr_debug("%s c4iw_dev %p tid %u\n", __func__, dev, hwtid); + pr_debug("c4iw_dev %p tid %u\n", dev, hwtid); BUG_ON(skb_cloned(skb)); skb_trim(skb, sizeof(struct cpl_tid_release)); release_tid(&dev->rdev, hwtid, skb); @@ -2481,16 +2479,16 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) /* Find output route */ if (iptype == 4) { - pr_debug("%s parent ep %p hwtid %u laddr %pI4 raddr %pI4 lport %d rport %d peer_mss %d\n" - , __func__, parent_ep, hwtid, + pr_debug("parent ep %p hwtid %u laddr %pI4 raddr %pI4 lport %d rport %d peer_mss %d\n" + , parent_ep, hwtid, local_ip, peer_ip, ntohs(local_port), ntohs(peer_port), peer_mss); dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev, *(__be32 *)local_ip, *(__be32 *)peer_ip, local_port, peer_port, tos); } else { - pr_debug("%s parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n" - , __func__, parent_ep, hwtid, + pr_debug("parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n" + , parent_ep, hwtid, local_ip, peer_ip, ntohs(local_port), ntohs(peer_port), peer_mss); dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev, @@ -2576,7 +2574,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) child_ep->dst = dst; child_ep->hwtid = hwtid; - pr_debug("%s tx_chan %u smac_idx %u rss_qid %u\n", __func__, + pr_debug("tx_chan %u smac_idx %u rss_qid %u\n", child_ep->tx_chan, child_ep->smac_idx, child_ep->rss_qid); init_timer(&child_ep->timer); @@ -2613,11 +2611,11 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb) int ret; ep = get_ep_from_tid(dev, tid); - pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + pr_debug("ep %p tid %u\n", ep, ep->hwtid); ep->snd_seq = be32_to_cpu(req->snd_isn); ep->rcv_seq = be32_to_cpu(req->rcv_isn); - pr_debug("%s ep %p hwtid %u tcp_opt 0x%02x\n", __func__, ep, tid, + pr_debug("ep %p hwtid %u tcp_opt 0x%02x\n", ep, tid, ntohs(req->tcp_opt)); set_emss(ep, ntohs(req->tcp_opt)); @@ -2650,7 +2648,7 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) if (!ep) return 0; - pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + pr_debug("ep %p tid %u\n", ep, ep->hwtid); dst_confirm(ep->dst); set_bit(PEER_CLOSE, &ep->com.history); @@ -2750,7 +2748,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) mutex_unlock(&dev->rdev.stats.lock); goto deref_ep; } - pr_debug("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid, + pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid, ep->com.state); set_bit(PEER_ABORT, &ep->com.history); @@ -2875,7 +2873,7 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb) if (!ep) return 0; - pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + pr_debug("ep %p tid %u\n", ep, ep->hwtid); /* The cm_id may be null if we failed to connect */ mutex_lock(&ep->com.mutex); @@ -2950,19 +2948,19 @@ static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb) ep = get_ep_from_tid(dev, tid); if (!ep) return 0; - pr_debug("%s ep %p tid %u credits %u\n", - __func__, ep, ep->hwtid, credits); + pr_debug("ep %p tid %u credits %u\n", + ep, ep->hwtid, credits); if (credits == 0) { - pr_debug("%s 0 credit ack ep %p tid %u state %u\n", - __func__, ep, ep->hwtid, state_read(&ep->com)); + pr_debug("0 credit ack ep %p tid %u state %u\n", + ep, ep->hwtid, state_read(&ep->com)); goto out; } dst_confirm(ep->dst); if (ep->mpa_skb) { - pr_debug("%s last streaming msg ack ep %p tid %u state %u initiator %u freeing skb\n", - __func__, ep, ep->hwtid, - state_read(&ep->com), ep->mpa_attr.initiator ? 1 : 0); + pr_debug("last streaming msg ack ep %p tid %u state %u initiator %u freeing skb\n", + ep, ep->hwtid, state_read(&ep->com), + ep->mpa_attr.initiator ? 1 : 0); mutex_lock(&ep->com.mutex); kfree_skb(ep->mpa_skb); ep->mpa_skb = NULL; @@ -2980,7 +2978,7 @@ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) int abort; struct c4iw_ep *ep = to_ep(cm_id); - pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + pr_debug("ep %p tid %u\n", ep, ep->hwtid); mutex_lock(&ep->com.mutex); if (ep->com.state != MPA_REQ_RCVD) { @@ -3011,7 +3009,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct c4iw_qp *qp = get_qhp(h, conn_param->qpn); int abort = 0; - pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + pr_debug("ep %p tid %u\n", ep, ep->hwtid); mutex_lock(&ep->com.mutex); if (ep->com.state != MPA_REQ_RCVD) { @@ -3064,7 +3062,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) ep->ird = 1; } - pr_debug("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord); + pr_debug("ird %d ord %d\n", ep->ird, ep->ord); ep->com.cm_id = cm_id; ref_cm_id(&ep->com); @@ -3225,7 +3223,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) goto fail2; } ref_qp(ep); - pr_debug("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn, + pr_debug("qpn 0x%x qp %p cm_id %p\n", conn_param->qpn, ep->com.qp, cm_id); /* @@ -3263,8 +3261,8 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) } /* find a route */ - pr_debug("%s saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n", - __func__, &laddr->sin_addr, ntohs(laddr->sin_port), + pr_debug("saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n", + &laddr->sin_addr, ntohs(laddr->sin_port), ra, ntohs(raddr->sin_port)); ep->dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev, laddr->sin_addr.s_addr, @@ -3285,8 +3283,8 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) } /* find a route */ - pr_debug("%s saddr %pI6 sport 0x%x raddr %pI6 rport 0x%x\n", - __func__, laddr6->sin6_addr.s6_addr, + pr_debug("saddr %pI6 sport 0x%x raddr %pI6 rport 0x%x\n", + laddr6->sin6_addr.s6_addr, ntohs(laddr6->sin6_port), raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port)); ep->dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev, @@ -3309,8 +3307,8 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) goto fail4; } - pr_debug("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n", - __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid, + pr_debug("txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n", + ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid, ep->l2t->idx); state_set(&ep->com, CONNECTING); @@ -3424,7 +3422,7 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) goto fail1; } skb_queue_head_init(&ep->com.ep_skb_list); - pr_debug("%s ep %p\n", __func__, ep); + pr_debug("ep %p\n", ep); ep->com.cm_id = cm_id; ref_cm_id(&ep->com); ep->com.dev = dev; @@ -3478,7 +3476,7 @@ int c4iw_destroy_listen(struct iw_cm_id *cm_id) int err; struct c4iw_listen_ep *ep = to_listen_ep(cm_id); - pr_debug("%s ep %p\n", __func__, ep); + pr_debug("ep %p\n", ep); might_sleep(); state_set(&ep->com, DEAD); @@ -3519,7 +3517,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) mutex_lock(&ep->com.mutex); - pr_debug("%s ep %p state %s, abrupt %d\n", __func__, ep, + pr_debug("ep %p state %s, abrupt %d\n", ep, states[ep->com.state], abrupt); /* @@ -3933,7 +3931,7 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) skb_set_transport_header(skb, (void *)tcph - (void *)rss); skb_get(skb); - pr_debug("%s lip 0x%x lport %u pip 0x%x pport %u tos %d\n", __func__, + pr_debug("lip 0x%x lport %u pip 0x%x pport %u tos %d\n", ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr), ntohs(tcph->source), iph->tos); @@ -3941,15 +3939,13 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) iph->daddr, iph->saddr, tcph->dest, tcph->source, iph->tos); if (!dst) { - pr_err("%s - failed to find dst entry!\n", - __func__); + pr_err("%s - failed to find dst entry!\n", __func__); goto reject; } neigh = dst_neigh_lookup_skb(dst, skb); if (!neigh) { - pr_err("%s - failed to allocate neigh!\n", - __func__); + pr_err("%s - failed to allocate neigh!\n", __func__); goto free_dst; } @@ -4032,8 +4028,7 @@ static void process_timeout(struct c4iw_ep *ep) int abort = 1; mutex_lock(&ep->com.mutex); - pr_debug("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid, - ep->com.state); + pr_debug("ep %p tid %u state %d\n", ep, ep->hwtid, ep->com.state); set_bit(TIMEDOUT, &ep->com.history); switch (ep->com.state) { case MPA_REQ_SENT: @@ -4176,13 +4171,13 @@ static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) struct c4iw_wr_wait *wr_waitp; int ret; - pr_debug("%s type %u\n", __func__, rpl->type); + pr_debug("type %u\n", rpl->type); switch (rpl->type) { case FW6_TYPE_WR_RPL: ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff); wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1]; - pr_debug("%s wr_waitp %p ret %u\n", __func__, wr_waitp, ret); + pr_debug("wr_waitp %p ret %u\n", wr_waitp, ret); if (wr_waitp) c4iw_wake_up(wr_waitp, ret ? -ret : 0); kfree_skb(skb); @@ -4219,8 +4214,7 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) neg_adv_str(req->status)); goto out; } - pr_debug("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid, - ep->com.state); + pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid, ep->com.state); c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); out: diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index be07da1997e6..da62535b3252 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -144,7 +144,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, ret = c4iw_ofld_send(rdev, skb); if (ret) goto err4; - pr_debug("%s wait_event wr_wait %p\n", __func__, &wr_wait); + pr_debug("wait_event wr_wait %p\n", &wr_wait); ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__); if (ret) goto err4; @@ -178,7 +178,7 @@ static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq) { struct t4_cqe cqe; - pr_debug("%s wq %p cq %p sw_cidx %u sw_pidx %u\n", __func__, + pr_debug("wq %p cq %p sw_cidx %u sw_pidx %u\n", wq, cq, cq->sw_cidx, cq->sw_pidx); memset(&cqe, 0, sizeof(cqe)); cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) | @@ -197,7 +197,7 @@ int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count) int in_use = wq->rq.in_use - count; BUG_ON(in_use < 0); - pr_debug("%s wq %p cq %p rq.in_use %u skip count %u\n", __func__, + pr_debug("wq %p cq %p rq.in_use %u skip count %u\n", wq, cq, wq->rq.in_use, count); while (in_use--) { insert_recv_cqe(wq, cq); @@ -211,7 +211,7 @@ static void insert_sq_cqe(struct t4_wq *wq, struct t4_cq *cq, { struct t4_cqe cqe; - pr_debug("%s wq %p cq %p sw_cidx %u sw_pidx %u\n", __func__, + pr_debug("wq %p cq %p sw_cidx %u sw_pidx %u\n", wq, cq, cq->sw_cidx, cq->sw_pidx); memset(&cqe, 0, sizeof(cqe)); cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) | @@ -281,8 +281,8 @@ static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq) /* * Insert this completed cqe into the swcq. */ - pr_debug("%s moving cqe into swcq sq idx %u cq idx %u\n", - __func__, cidx, cq->sw_pidx); + pr_debug("moving cqe into swcq sq idx %u cq idx %u\n", + cidx, cq->sw_pidx); swsqe->cqe.header |= htonl(CQE_SWCQE_V(1)); cq->sw_queue[cq->sw_pidx] = swsqe->cqe; t4_swcq_produce(cq); @@ -337,7 +337,7 @@ void c4iw_flush_hw_cq(struct c4iw_cq *chp) struct t4_swsqe *swsqe; int ret; - pr_debug("%s cqid 0x%x\n", __func__, chp->cq.cqid); + pr_debug("cqid 0x%x\n", chp->cq.cqid); ret = t4_next_hw_cqe(&chp->cq, &hw_cqe); /* @@ -430,7 +430,7 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count) u32 ptr; *count = 0; - pr_debug("%s count zero %d\n", __func__, *count); + pr_debug("count zero %d\n", *count); ptr = cq->sw_cidx; while (ptr != cq->sw_pidx) { cqe = &cq->sw_queue[ptr]; @@ -440,7 +440,7 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count) if (++ptr == cq->size) ptr = 0; } - pr_debug("%s cq %p count %d\n", __func__, cq, *count); + pr_debug("cq %p count %d\n", cq, *count); } /* @@ -471,8 +471,8 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, if (ret) return ret; - pr_debug("%s CQE OVF %u qpid 0x%0x genbit %u type %u status 0x%0x opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n", - __func__, CQE_OVFBIT(hw_cqe), CQE_QPID(hw_cqe), + pr_debug("CQE OVF %u qpid 0x%0x genbit %u type %u status 0x%0x opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n", + CQE_OVFBIT(hw_cqe), CQE_QPID(hw_cqe), CQE_GENBIT(hw_cqe), CQE_TYPE(hw_cqe), CQE_STATUS(hw_cqe), CQE_OPCODE(hw_cqe), CQE_LEN(hw_cqe), CQE_WRID_HI(hw_cqe), CQE_WRID_LOW(hw_cqe)); @@ -603,8 +603,8 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, if (!SW_CQE(hw_cqe) && (CQE_WRID_SQ_IDX(hw_cqe) != wq->sq.cidx)) { struct t4_swsqe *swsqe; - pr_debug("%s out of order completion going in sw_sq at idx %u\n", - __func__, CQE_WRID_SQ_IDX(hw_cqe)); + pr_debug("out of order completion going in sw_sq at idx %u\n", + CQE_WRID_SQ_IDX(hw_cqe)); swsqe = &wq->sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)]; swsqe->cqe = *hw_cqe; swsqe->complete = 1; @@ -638,13 +638,13 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, BUG_ON(wq->sq.in_use <= 0 && wq->sq.in_use >= wq->sq.size); wq->sq.cidx = (uint16_t)idx; - pr_debug("%s completing sq idx %u\n", __func__, wq->sq.cidx); + pr_debug("completing sq idx %u\n", wq->sq.cidx); *cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id; if (c4iw_wr_log) c4iw_log_wr_stats(wq, hw_cqe); t4_sq_consume(wq); } else { - pr_debug("%s completing rq idx %u\n", __func__, wq->rq.cidx); + pr_debug("completing rq idx %u\n", wq->rq.cidx); *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id; BUG_ON(t4_rq_empty(wq)); if (c4iw_wr_log) @@ -661,12 +661,12 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, skip_cqe: if (SW_CQE(hw_cqe)) { - pr_debug("%s cq %p cqid 0x%x skip sw cqe cidx %u\n", - __func__, cq, cq->cqid, cq->sw_cidx); + pr_debug("cq %p cqid 0x%x skip sw cqe cidx %u\n", + cq, cq->cqid, cq->sw_cidx); t4_swcq_consume(cq); } else { - pr_debug("%s cq %p cqid 0x%x skip hw cqe cidx %u\n", - __func__, cq, cq->cqid, cq->cidx); + pr_debug("cq %p cqid 0x%x skip hw cqe cidx %u\n", + cq, cq->cqid, cq->cidx); t4_hwcq_consume(cq); } return ret; @@ -712,8 +712,8 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) wc->vendor_err = CQE_STATUS(&cqe); wc->wc_flags = 0; - pr_debug("%s qpid 0x%x type %d opcode %d status 0x%x len %u wrid hi 0x%x lo 0x%x cookie 0x%llx\n", - __func__, CQE_QPID(&cqe), + pr_debug("qpid 0x%x type %d opcode %d status 0x%x len %u wrid hi 0x%x lo 0x%x cookie 0x%llx\n", + CQE_QPID(&cqe), CQE_TYPE(&cqe), CQE_OPCODE(&cqe), CQE_STATUS(&cqe), CQE_LEN(&cqe), CQE_WRID_HI(&cqe), CQE_WRID_LOW(&cqe), @@ -857,7 +857,7 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq) struct c4iw_cq *chp; struct c4iw_ucontext *ucontext; - pr_debug("%s ib_cq %p\n", __func__, ib_cq); + pr_debug("ib_cq %p\n", ib_cq); chp = to_c4iw_cq(ib_cq); remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid); @@ -889,7 +889,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, size_t memsize, hwentries; struct c4iw_mm_entry *mm, *mm2; - pr_debug("%s ib_dev %p entries %d\n", __func__, ibdev, entries); + pr_debug("ib_dev %p entries %d\n", ibdev, entries); if (attr->flags) return ERR_PTR(-EINVAL); @@ -996,8 +996,8 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, mm2->len = PAGE_SIZE; insert_mmap(ucontext, mm2); } - pr_debug("%s cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx\n", - __func__, chp->cq.cqid, chp, chp->cq.size, + pr_debug("cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx\n", + chp->cq.cqid, chp, chp->cq.size, chp->cq.memsize, (unsigned long long)chp->cq.dma_addr); return &chp->ibcq; err6: diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index fc886f81b885..d19f7fb2966c 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -811,8 +811,8 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) rdev->qpmask = rdev->lldi.udb_density - 1; rdev->cqmask = rdev->lldi.ucq_density - 1; - pr_debug("%s dev %s stag start 0x%0x size 0x%0x num stags %d pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x qp qid start %u size %u cq qid start %u size %u\n", - __func__, pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start, + pr_debug("dev %s stag start 0x%0x size 0x%0x num stags %d pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x qp qid start %u size %u cq qid start %u size %u\n", + pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start, rdev->lldi.vr->stag.size, c4iw_num_stags(rdev), rdev->lldi.vr->pbl.start, rdev->lldi.vr->pbl.size, rdev->lldi.vr->rq.start, @@ -935,7 +935,7 @@ static void c4iw_dealloc(struct uld_ctx *ctx) static void c4iw_remove(struct uld_ctx *ctx) { - pr_debug("%s c4iw_dev %p\n", __func__, ctx->dev); + pr_debug("c4iw_dev %p\n", ctx->dev); c4iw_unregister_device(ctx->dev); c4iw_dealloc(ctx); } @@ -969,8 +969,8 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) devp->rdev.lldi = *infop; /* init various hw-queue params based on lld info */ - pr_debug("%s: Ing. padding boundary is %d, egrsstatuspagesize = %d\n", - __func__, devp->rdev.lldi.sge_ingpadboundary, + pr_debug("Ing. padding boundary is %d, egrsstatuspagesize = %d\n", + devp->rdev.lldi.sge_ingpadboundary, devp->rdev.lldi.sge_egrstatuspagesize); devp->rdev.hw_queue.t4_eq_status_entries = @@ -1069,8 +1069,8 @@ static void *c4iw_uld_add(const struct cxgb4_lld_info *infop) } ctx->lldi = *infop; - pr_debug("%s found device %s nchan %u nrxq %u ntxq %u nports %u\n", - __func__, pci_name(ctx->lldi.pdev), + pr_debug("found device %s nchan %u nrxq %u ntxq %u nports %u\n", + pci_name(ctx->lldi.pdev), ctx->lldi.nchan, ctx->lldi.nrxq, ctx->lldi.ntxq, ctx->lldi.nports); @@ -1203,7 +1203,7 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state) { struct uld_ctx *ctx = handle; - pr_debug("%s new_state %u\n", __func__, new_state); + pr_debug("new_state %u\n", new_state); switch (new_state) { case CXGB4_STATE_UP: pr_info("%s: Up\n", pci_name(ctx->lldi.pdev)); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 819a30635d53..fafb899fec7c 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -537,8 +537,7 @@ static inline struct c4iw_mm_entry *remove_mmap(struct c4iw_ucontext *ucontext, if (mm->key == key && mm->len == len) { list_del_init(&mm->entry); spin_unlock(&ucontext->mmap_lock); - pr_debug("%s key 0x%x addr 0x%llx len %d\n", - __func__, key, + pr_debug("key 0x%x addr 0x%llx len %d\n", key, (unsigned long long)mm->addr, mm->len); return mm; } @@ -551,8 +550,8 @@ static inline void insert_mmap(struct c4iw_ucontext *ucontext, struct c4iw_mm_entry *mm) { spin_lock(&ucontext->mmap_lock); - pr_debug("%s key 0x%x addr 0x%llx len %d\n", - __func__, mm->key, (unsigned long long)mm->addr, mm->len); + pr_debug("key 0x%x addr 0x%llx len %d\n", + mm->key, (unsigned long long)mm->addr, mm->len); list_add_tail(&mm->entry, &ucontext->mmaps); spin_unlock(&ucontext->mmap_lock); } @@ -671,16 +670,14 @@ enum c4iw_mmid_state { #define MPA_V2_IRD_ORD_MASK 0x3FFF #define c4iw_put_ep(ep) { \ - pr_debug("put_ep (via %s:%u) ep %p refcnt %d\n", \ - __func__, __LINE__, \ + pr_debug("put_ep ep %p refcnt %d\n", \ ep, kref_read(&((ep)->kref))); \ WARN_ON(kref_read(&((ep)->kref)) < 1); \ kref_put(&((ep)->kref), _c4iw_free_ep); \ } #define c4iw_get_ep(ep) { \ - pr_debug("get_ep (via %s:%u) ep %p, refcnt %d\n", \ - __func__, __LINE__, \ + pr_debug("get_ep ep %p, refcnt %d\n", \ ep, kref_read(&((ep)->kref))); \ kref_get(&((ep)->kref)); \ } diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index c2fba76becd4..eeadc69ad4be 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -124,7 +124,7 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len, cmd |= cpu_to_be32(T5_ULP_MEMIO_IMM_F); addr &= 0x7FFFFFF; - pr_debug("%s addr 0x%x len %u\n", __func__, addr, len); + pr_debug("addr 0x%x len %u\n", addr, len); num_wqe = DIV_ROUND_UP(len, C4IW_MAX_INLINE_SIZE); c4iw_init_wr_wait(&wr_wait); for (i = 0; i < num_wqe; i++) { @@ -285,8 +285,8 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, mutex_unlock(&rdev->stats.lock); *stag = (stag_idx << 8) | (atomic_inc_return(&key) & 0xff); } - pr_debug("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n", - __func__, stag_state, type, pdid, stag_idx); + pr_debug("stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n", + stag_state, type, pdid, stag_idx); /* write TPT entry */ if (reset_tpt_entry) @@ -327,8 +327,8 @@ static int write_pbl(struct c4iw_rdev *rdev, __be64 *pbl, { int err; - pr_debug("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n", - __func__, pbl_addr, rdev->lldi.vr->pbl.start, + pr_debug("*pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n", + pbl_addr, rdev->lldi.vr->pbl.start, pbl_size); err = write_adapter_mem(rdev, pbl_addr >> 5, pbl_size << 3, pbl, NULL); @@ -372,7 +372,7 @@ static int finish_mem_reg(struct c4iw_mr *mhp, u32 stag) mhp->attr.stag = stag; mmid = stag >> 8; mhp->ibmr.rkey = mhp->ibmr.lkey = stag; - pr_debug("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp); + pr_debug("mmid 0x%x mhp %p\n", mmid, mhp); return insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid); } @@ -422,7 +422,7 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc) int ret; u32 stag = T4_STAG_UNSET; - pr_debug("%s ib_pd %p\n", __func__, pd); + pr_debug("ib_pd %p\n", pd); php = to_c4iw_pd(pd); rhp = php->rhp; @@ -479,7 +479,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, struct c4iw_pd *php; struct c4iw_mr *mhp; - pr_debug("%s ib_pd %p\n", __func__, pd); + pr_debug("ib_pd %p\n", pd); if (length == ~0ULL) return ERR_PTR(-EINVAL); @@ -616,7 +616,7 @@ struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, ret = -ENOMEM; goto dealloc_win; } - pr_debug("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); + pr_debug("mmid 0x%x mhp %p stag 0x%x\n", mmid, mhp, stag); return &(mhp->ibmw); dealloc_win: @@ -641,7 +641,7 @@ int c4iw_dealloc_mw(struct ib_mw *mw) deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb); kfree_skb(mhp->dereg_skb); kfree(mhp); - pr_debug("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp); + pr_debug("ib_mw %p mmid 0x%x ptr %p\n", mw, mmid, mhp); return 0; } @@ -699,7 +699,7 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, goto err3; } - pr_debug("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); + pr_debug("mmid 0x%x mhp %p stag 0x%x\n", mmid, mhp, stag); return &(mhp->ibmr); err3: dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size, @@ -744,7 +744,7 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr) struct c4iw_mr *mhp; u32 mmid; - pr_debug("%s ib_mr %p\n", __func__, ib_mr); + pr_debug("ib_mr %p\n", ib_mr); mhp = to_c4iw_mr(ib_mr); rhp = mhp->rhp; @@ -762,7 +762,7 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr) kfree((void *) (unsigned long) mhp->kva); if (mhp->umem) ib_umem_release(mhp->umem); - pr_debug("%s mmid 0x%x ptr %p\n", __func__, mmid, mhp); + pr_debug("mmid 0x%x ptr %p\n", mmid, mhp); kfree(mhp); return 0; } diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 346e8334279a..fb99a05562a3 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -102,7 +102,7 @@ void _c4iw_free_ucontext(struct kref *kref) ucontext = container_of(kref, struct c4iw_ucontext, kref); rhp = to_c4iw_dev(ucontext->ibucontext.device); - pr_debug("%s ucontext %p\n", __func__, ucontext); + pr_debug("ucontext %p\n", ucontext); list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry) kfree(mm); c4iw_release_dev_ucontext(&rhp->rdev, &ucontext->uctx); @@ -113,7 +113,7 @@ static int c4iw_dealloc_ucontext(struct ib_ucontext *context) { struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context); - pr_debug("%s context %p\n", __func__, context); + pr_debug("context %p\n", context); c4iw_put_ucontext(ucontext); return 0; } @@ -127,7 +127,7 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev, int ret = 0; struct c4iw_mm_entry *mm = NULL; - pr_debug("%s ibdev %p\n", __func__, ibdev); + pr_debug("ibdev %p\n", ibdev); context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) { ret = -ENOMEM; @@ -185,7 +185,7 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) struct c4iw_ucontext *ucontext; u64 addr; - pr_debug("%s pgoff 0x%lx key 0x%x len %d\n", __func__, vma->vm_pgoff, + pr_debug("pgoff 0x%lx key 0x%x len %d\n", vma->vm_pgoff, key, len); if (vma->vm_start & (PAGE_SIZE-1)) @@ -251,7 +251,7 @@ static int c4iw_deallocate_pd(struct ib_pd *pd) php = to_c4iw_pd(pd); rhp = php->rhp; - pr_debug("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid); + pr_debug("ibpd %p pdid 0x%x\n", pd, php->pdid); c4iw_put_resource(&rhp->rdev.resource.pdid_table, php->pdid); mutex_lock(&rhp->rdev.stats.lock); rhp->rdev.stats.pd.cur--; @@ -268,7 +268,7 @@ static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev, u32 pdid; struct c4iw_dev *rhp; - pr_debug("%s ibdev %p\n", __func__, ibdev); + pr_debug("ibdev %p\n", ibdev); rhp = (struct c4iw_dev *) ibdev; pdid = c4iw_get_resource(&rhp->rdev.resource.pdid_table); if (!pdid) @@ -291,14 +291,14 @@ static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev, if (rhp->rdev.stats.pd.cur > rhp->rdev.stats.pd.max) rhp->rdev.stats.pd.max = rhp->rdev.stats.pd.cur; mutex_unlock(&rhp->rdev.stats.lock); - pr_debug("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php); + pr_debug("pdid 0x%0x ptr 0x%p\n", pdid, php); return &php->ibpd; } static int c4iw_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { - pr_debug("%s ibdev %p\n", __func__, ibdev); + pr_debug("ibdev %p\n", ibdev); *pkey = 0; return 0; } @@ -308,8 +308,8 @@ static int c4iw_query_gid(struct ib_device *ibdev, u8 port, int index, { struct c4iw_dev *dev; - pr_debug("%s ibdev %p, port %d, index %d, gid %p\n", - __func__, ibdev, port, index, gid); + pr_debug("ibdev %p, port %d, index %d, gid %p\n", + ibdev, port, index, gid); dev = to_c4iw_dev(ibdev); BUG_ON(port == 0); memset(&(gid->raw[0]), 0, sizeof(gid->raw)); @@ -323,7 +323,7 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro struct c4iw_dev *dev; - pr_debug("%s ibdev %p\n", __func__, ibdev); + pr_debug("ibdev %p\n", ibdev); if (uhw->inlen || uhw->outlen) return -EINVAL; @@ -364,7 +364,7 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port, struct net_device *netdev; struct in_device *inetdev; - pr_debug("%s ibdev %p\n", __func__, ibdev); + pr_debug("ibdev %p\n", ibdev); dev = to_c4iw_dev(ibdev); netdev = dev->rdev.lldi.ports[port-1]; @@ -406,7 +406,7 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr, { struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, ibdev.dev); - pr_debug("%s dev 0x%p\n", __func__, dev); + pr_debug("dev 0x%p\n", dev); return sprintf(buf, "%d\n", CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type)); } @@ -419,7 +419,7 @@ static ssize_t show_hca(struct device *dev, struct device_attribute *attr, struct ethtool_drvinfo info; struct net_device *lldev = c4iw_dev->rdev.lldi.ports[0]; - pr_debug("%s dev 0x%p\n", __func__, dev); + pr_debug("dev 0x%p\n", dev); lldev->ethtool_ops->get_drvinfo(lldev, &info); return sprintf(buf, "%s\n", info.driver); } @@ -429,7 +429,7 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr, { struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, ibdev.dev); - pr_debug("%s dev 0x%p\n", __func__, dev); + pr_debug("dev 0x%p\n", dev); return sprintf(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor, c4iw_dev->rdev.lldi.pdev->device); } @@ -521,7 +521,7 @@ static void get_dev_fw_str(struct ib_device *dev, char *str) { struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, ibdev); - pr_debug("%s dev 0x%p\n", __func__, dev); + pr_debug("dev 0x%p\n", dev); snprintf(str, IB_FW_VERSION_NAME_MAX, "%u.%u.%u.%u", FW_HDR_FW_VER_MAJOR_G(c4iw_dev->rdev.lldi.fw_vers), @@ -535,7 +535,7 @@ int c4iw_register_device(struct c4iw_dev *dev) int ret; int i; - pr_debug("%s c4iw_dev %p\n", __func__, dev); + pr_debug("c4iw_dev %p\n", dev); BUG_ON(!dev->rdev.lldi.ports[0]); strlcpy(dev->ibdev.name, "cxgb4_%d", IB_DEVICE_NAME_MAX); memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); @@ -645,7 +645,7 @@ void c4iw_unregister_device(struct c4iw_dev *dev) { int i; - pr_debug("%s c4iw_dev %p\n", __func__, dev); + pr_debug("c4iw_dev %p\n", dev); for (i = 0; i < ARRAY_SIZE(c4iw_class_attributes); ++i) device_remove_file(&dev->ibdev.dev, c4iw_class_attributes[i]); diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index cb7fc0d35d1d..a1bbd2434e5a 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -254,8 +254,8 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, ret = -ENOMEM; goto free_sq; } - pr_debug("%s sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n", - __func__, wq->sq.queue, + pr_debug("sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n", + wq->sq.queue, (unsigned long long)virt_to_phys(wq->sq.queue), wq->rq.queue, (unsigned long long)virt_to_phys(wq->rq.queue)); @@ -361,8 +361,8 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, if (ret) goto free_dma; - pr_debug("%s sqid 0x%x rqid 0x%x kdb 0x%p sq_bar2_addr %p rq_bar2_addr %p\n", - __func__, wq->sq.qid, wq->rq.qid, wq->db, + pr_debug("sqid 0x%x rqid 0x%x kdb 0x%p sq_bar2_addr %p rq_bar2_addr %p\n", + wq->sq.qid, wq->rq.qid, wq->db, wq->sq.bar2_va, wq->rq.bar2_va); return 0; @@ -724,7 +724,7 @@ static void free_qp_work(struct work_struct *work) ucontext = qhp->ucontext; rhp = qhp->rhp; - pr_debug("%s qhp %p ucontext %p\n", __func__, qhp, ucontext); + pr_debug("qhp %p ucontext %p\n", qhp, ucontext); destroy_qp(&rhp->rdev, &qhp->wq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx); @@ -738,19 +738,19 @@ static void queue_qp_free(struct kref *kref) struct c4iw_qp *qhp; qhp = container_of(kref, struct c4iw_qp, kref); - pr_debug("%s qhp %p\n", __func__, qhp); + pr_debug("qhp %p\n", qhp); queue_work(qhp->rhp->rdev.free_workq, &qhp->free_work); } void c4iw_qp_add_ref(struct ib_qp *qp) { - pr_debug("%s ib_qp %p\n", __func__, qp); + pr_debug("ib_qp %p\n", qp); kref_get(&to_c4iw_qp(qp)->kref); } void c4iw_qp_rem_ref(struct ib_qp *qp) { - pr_debug("%s ib_qp %p\n", __func__, qp); + pr_debug("ib_qp %p\n", qp); kref_put(&to_c4iw_qp(qp)->kref, queue_qp_free); } @@ -980,8 +980,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16); - pr_debug("%s cookie 0x%llx pidx 0x%x opcode 0x%x read_len %u\n", - __func__, + pr_debug("cookie 0x%llx pidx 0x%x opcode 0x%x read_len %u\n", (unsigned long long)wr->wr_id, qhp->wq.sq.pidx, swsqe->opcode, swsqe->read_len); wr = wr->next; @@ -1057,8 +1056,7 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, wqe->recv.r2[1] = 0; wqe->recv.r2[2] = 0; wqe->recv.len16 = len16; - pr_debug("%s cookie 0x%llx pidx %u\n", - __func__, + pr_debug("cookie 0x%llx pidx %u\n", (unsigned long long)wr->wr_id, qhp->wq.rq.pidx); t4_rq_produce(&qhp->wq, len16); idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); @@ -1218,7 +1216,7 @@ static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe, struct sk_buff *skb; struct terminate_message *term; - pr_debug("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid, + pr_debug("qhp %p qid 0x%x tid %u\n", qhp, qhp->wq.sq.qid, qhp->ep->hwtid); skb = skb_dequeue(&qhp->ep->com.ep_skb_list); @@ -1255,7 +1253,7 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, int rq_flushed, sq_flushed; unsigned long flag; - pr_debug("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp); + pr_debug("qhp %p rchp %p schp %p\n", qhp, rchp, schp); /* locking hierarchy: cq lock first, then qp lock. */ spin_lock_irqsave(&rchp->lock, flag); @@ -1340,8 +1338,7 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp, int ret; struct sk_buff *skb; - pr_debug("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid, - ep->hwtid); + pr_debug("qhp %p qid 0x%x tid %u\n", qhp, qhp->wq.sq.qid, ep->hwtid); skb = skb_dequeue(&ep->com.ep_skb_list); if (WARN_ON(!skb)) @@ -1367,13 +1364,13 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp, ret = c4iw_wait_for_reply(&rhp->rdev, &ep->com.wr_wait, qhp->ep->hwtid, qhp->wq.sq.qid, __func__); out: - pr_debug("%s ret %d\n", __func__, ret); + pr_debug("ret %d\n", ret); return ret; } static void build_rtr_msg(u8 p2p_type, struct fw_ri_init *init) { - pr_debug("%s p2p_type = %d\n", __func__, p2p_type); + pr_debug("p2p_type = %d\n", p2p_type); memset(&init->u, 0, sizeof init->u); switch (p2p_type) { case FW_RI_INIT_P2PTYPE_RDMA_WRITE: @@ -1402,7 +1399,7 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp) int ret; struct sk_buff *skb; - pr_debug("%s qhp %p qid 0x%x tid %u ird %u ord %u\n", __func__, qhp, + pr_debug("qhp %p qid 0x%x tid %u ird %u ord %u\n", qhp, qhp->wq.sq.qid, qhp->ep->hwtid, qhp->ep->ird, qhp->ep->ord); skb = alloc_skb(sizeof *wqe, GFP_KERNEL); @@ -1475,7 +1472,7 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp) err1: free_ird(rhp, qhp->attr.max_ird); out: - pr_debug("%s ret %d\n", __func__, ret); + pr_debug("ret %d\n", ret); return ret; } @@ -1492,8 +1489,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, int free = 0; struct c4iw_ep *ep = NULL; - pr_debug("%s qhp %p sqid 0x%x rqid 0x%x ep %p state %d -> %d\n", - __func__, + pr_debug("qhp %p sqid 0x%x rqid 0x%x ep %p state %d -> %d\n", qhp, qhp->wq.sq.qid, qhp->wq.rq.qid, qhp->ep, qhp->attr.state, (mask & C4IW_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1); @@ -1680,7 +1676,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, } goto out; err: - pr_debug("%s disassociating ep %p qpid 0x%x\n", __func__, qhp->ep, + pr_debug("disassociating ep %p qpid 0x%x\n", qhp->ep, qhp->wq.sq.qid); /* disassociate the LLP connection */ @@ -1717,7 +1713,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, */ if (free) c4iw_put_ep(&ep->com); - pr_debug("%s exit state %d\n", __func__, qhp->attr.state); + pr_debug("exit state %d\n", qhp->attr.state); return ret; } @@ -1747,7 +1743,7 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp) c4iw_qp_rem_ref(ib_qp); - pr_debug("%s ib_qp %p qpid 0x%0x\n", __func__, ib_qp, qhp->wq.sq.qid); + pr_debug("ib_qp %p qpid 0x%0x\n", ib_qp, qhp->wq.sq.qid); return 0; } @@ -1766,7 +1762,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, struct c4iw_mm_entry *sq_key_mm, *rq_key_mm = NULL, *sq_db_key_mm; struct c4iw_mm_entry *rq_db_key_mm = NULL, *ma_sync_key_mm = NULL; - pr_debug("%s ib_pd %p\n", __func__, pd); + pr_debug("ib_pd %p\n", pd); if (attrs->qp_type != IB_QPT_RC) return ERR_PTR(-EINVAL); @@ -1937,8 +1933,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, qhp->ibqp.qp_num = qhp->wq.sq.qid; init_timer(&(qhp->timer)); INIT_LIST_HEAD(&qhp->db_fc_entry); - pr_debug("%s sq id %u size %u memsize %zu num_entries %u rq id %u size %u memsize %zu num_entries %u\n", - __func__, + pr_debug("sq id %u size %u memsize %zu num_entries %u rq id %u size %u memsize %zu num_entries %u\n", qhp->wq.sq.qid, qhp->wq.sq.size, qhp->wq.sq.memsize, attrs->cap.max_send_wr, qhp->wq.rq.qid, qhp->wq.rq.size, qhp->wq.rq.memsize, attrs->cap.max_recv_wr); @@ -1971,7 +1966,7 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, enum c4iw_qp_attr_mask mask = 0; struct c4iw_qp_attributes attrs; - pr_debug("%s ib_qp %p\n", __func__, ibqp); + pr_debug("ib_qp %p\n", ibqp); /* iwarp does not support the RTR state */ if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR)) @@ -2017,7 +2012,7 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn) { - pr_debug("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn); + pr_debug("ib_dev %p qpn 0x%x\n", dev, qpn); return (struct ib_qp *)get_qhp(to_c4iw_dev(dev), qpn); } diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c index 8ff0cbe5cb16..3cf25997ed2b 100644 --- a/drivers/infiniband/hw/cxgb4/resource.c +++ b/drivers/infiniband/hw/cxgb4/resource.c @@ -90,7 +90,7 @@ u32 c4iw_get_resource(struct c4iw_id_table *id_table) void c4iw_put_resource(struct c4iw_id_table *id_table, u32 entry) { - pr_debug("%s entry 0x%x\n", __func__, entry); + pr_debug("entry 0x%x\n", entry); c4iw_id_free(id_table, entry); } @@ -141,7 +141,7 @@ u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx) } out: mutex_unlock(&uctx->lock); - pr_debug("%s qid 0x%x\n", __func__, qid); + pr_debug("qid 0x%x\n", qid); mutex_lock(&rdev->stats.lock); if (rdev->stats.qid.cur > rdev->stats.qid.max) rdev->stats.qid.max = rdev->stats.qid.cur; @@ -157,7 +157,7 @@ void c4iw_put_cqid(struct c4iw_rdev *rdev, u32 qid, entry = kmalloc(sizeof *entry, GFP_KERNEL); if (!entry) return; - pr_debug("%s qid 0x%x\n", __func__, qid); + pr_debug("qid 0x%x\n", qid); entry->qid = qid; mutex_lock(&uctx->lock); list_add_tail(&entry->entry, &uctx->cqids); @@ -215,7 +215,7 @@ u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx) } out: mutex_unlock(&uctx->lock); - pr_debug("%s qid 0x%x\n", __func__, qid); + pr_debug("qid 0x%x\n", qid); mutex_lock(&rdev->stats.lock); if (rdev->stats.qid.cur > rdev->stats.qid.max) rdev->stats.qid.max = rdev->stats.qid.cur; @@ -231,7 +231,7 @@ void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qid, entry = kmalloc(sizeof *entry, GFP_KERNEL); if (!entry) return; - pr_debug("%s qid 0x%x\n", __func__, qid); + pr_debug("qid 0x%x\n", qid); entry->qid = qid; mutex_lock(&uctx->lock); list_add_tail(&entry->entry, &uctx->qpids); @@ -254,7 +254,7 @@ void c4iw_destroy_resource(struct c4iw_resource *rscp) u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size) { unsigned long addr = gen_pool_alloc(rdev->pbl_pool, size); - pr_debug("%s addr 0x%x size %d\n", __func__, (u32)addr, size); + pr_debug("addr 0x%x size %d\n", (u32)addr, size); mutex_lock(&rdev->stats.lock); if (addr) { rdev->stats.pbl.cur += roundup(size, 1 << MIN_PBL_SHIFT); @@ -268,7 +268,7 @@ u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size) void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size) { - pr_debug("%s addr 0x%x size %d\n", __func__, addr, size); + pr_debug("addr 0x%x size %d\n", addr, size); mutex_lock(&rdev->stats.lock); rdev->stats.pbl.cur -= roundup(size, 1 << MIN_PBL_SHIFT); mutex_unlock(&rdev->stats.lock); @@ -290,8 +290,8 @@ int c4iw_pblpool_create(struct c4iw_rdev *rdev) while (pbl_start < pbl_top) { pbl_chunk = min(pbl_top - pbl_start + 1, pbl_chunk); if (gen_pool_add(rdev->pbl_pool, pbl_start, pbl_chunk, -1)) { - pr_debug("%s failed to add PBL chunk (%x/%x)\n", - __func__, pbl_start, pbl_chunk); + pr_debug("failed to add PBL chunk (%x/%x)\n", + pbl_start, pbl_chunk); if (pbl_chunk <= 1024 << MIN_PBL_SHIFT) { pr_warn("Failed to add all PBL chunks (%x/%x)\n", pbl_start, pbl_top - pbl_start); @@ -299,8 +299,8 @@ int c4iw_pblpool_create(struct c4iw_rdev *rdev) } pbl_chunk >>= 1; } else { - pr_debug("%s added PBL chunk (%x/%x)\n", - __func__, pbl_start, pbl_chunk); + pr_debug("added PBL chunk (%x/%x)\n", + pbl_start, pbl_chunk); pbl_start += pbl_chunk; } } @@ -322,7 +322,7 @@ void c4iw_pblpool_destroy(struct c4iw_rdev *rdev) u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size) { unsigned long addr = gen_pool_alloc(rdev->rqt_pool, size << 6); - pr_debug("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6); + pr_debug("addr 0x%x size %d\n", (u32)addr, size << 6); if (!addr) pr_warn_ratelimited("%s: Out of RQT memory\n", pci_name(rdev->lldi.pdev)); @@ -339,7 +339,7 @@ u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size) void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size) { - pr_debug("%s addr 0x%x size %d\n", __func__, addr, size << 6); + pr_debug("addr 0x%x size %d\n", addr, size << 6); mutex_lock(&rdev->stats.lock); rdev->stats.rqt.cur -= roundup(size << 6, 1 << MIN_RQT_SHIFT); mutex_unlock(&rdev->stats.lock); @@ -361,8 +361,8 @@ int c4iw_rqtpool_create(struct c4iw_rdev *rdev) while (rqt_start < rqt_top) { rqt_chunk = min(rqt_top - rqt_start + 1, rqt_chunk); if (gen_pool_add(rdev->rqt_pool, rqt_start, rqt_chunk, -1)) { - pr_debug("%s failed to add RQT chunk (%x/%x)\n", - __func__, rqt_start, rqt_chunk); + pr_debug("failed to add RQT chunk (%x/%x)\n", + rqt_start, rqt_chunk); if (rqt_chunk <= 1024 << MIN_RQT_SHIFT) { pr_warn("Failed to add all RQT chunks (%x/%x)\n", rqt_start, rqt_top - rqt_start); @@ -370,8 +370,8 @@ int c4iw_rqtpool_create(struct c4iw_rdev *rdev) } rqt_chunk >>= 1; } else { - pr_debug("%s added RQT chunk (%x/%x)\n", - __func__, rqt_start, rqt_chunk); + pr_debug("added RQT chunk (%x/%x)\n", + rqt_start, rqt_chunk); rqt_start += rqt_chunk; } } @@ -391,7 +391,7 @@ void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev) u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size) { unsigned long addr = gen_pool_alloc(rdev->ocqp_pool, size); - pr_debug("%s addr 0x%x size %d\n", __func__, (u32)addr, size); + pr_debug("addr 0x%x size %d\n", (u32)addr, size); if (addr) { mutex_lock(&rdev->stats.lock); rdev->stats.ocqp.cur += roundup(size, 1 << MIN_OCQP_SHIFT); @@ -404,7 +404,7 @@ u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size) void c4iw_ocqp_pool_free(struct c4iw_rdev *rdev, u32 addr, int size) { - pr_debug("%s addr 0x%x size %d\n", __func__, addr, size); + pr_debug("addr 0x%x size %d\n", addr, size); mutex_lock(&rdev->stats.lock); rdev->stats.ocqp.cur -= roundup(size, 1 << MIN_OCQP_SHIFT); mutex_unlock(&rdev->stats.lock); @@ -426,8 +426,8 @@ int c4iw_ocqp_pool_create(struct c4iw_rdev *rdev) while (start < top) { chunk = min(top - start + 1, chunk); if (gen_pool_add(rdev->ocqp_pool, start, chunk, -1)) { - pr_debug("%s failed to add OCQP chunk (%x/%x)\n", - __func__, start, chunk); + pr_debug("failed to add OCQP chunk (%x/%x)\n", + start, chunk); if (chunk <= 1024 << MIN_OCQP_SHIFT) { pr_warn("Failed to add all OCQP chunks (%x/%x)\n", start, top - start); @@ -435,8 +435,8 @@ int c4iw_ocqp_pool_create(struct c4iw_rdev *rdev) } chunk >>= 1; } else { - pr_debug("%s added OCQP chunk (%x/%x)\n", - __func__, start, chunk); + pr_debug("added OCQP chunk (%x/%x)\n", + start, chunk); start += chunk; } } diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index e765c00303cd..17abe46920d1 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -466,14 +466,12 @@ static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, union t4_wr *wqe) wmb(); if (wq->sq.bar2_va) { if (inc == 1 && wq->sq.bar2_qid == 0 && wqe) { - pr_debug("%s: WC wq->sq.pidx = %d\n", - __func__, wq->sq.pidx); + pr_debug("WC wq->sq.pidx = %d\n", wq->sq.pidx); pio_copy((u64 __iomem *) (wq->sq.bar2_va + SGE_UDB_WCDOORBELL), (u64 *)wqe); } else { - pr_debug("%s: DB wq->sq.pidx = %d\n", - __func__, wq->sq.pidx); + pr_debug("DB wq->sq.pidx = %d\n", wq->sq.pidx); writel(PIDX_T5_V(inc) | QID_V(wq->sq.bar2_qid), wq->sq.bar2_va + SGE_UDB_KDOORBELL); } @@ -493,14 +491,12 @@ static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc, wmb(); if (wq->rq.bar2_va) { if (inc == 1 && wq->rq.bar2_qid == 0 && wqe) { - pr_debug("%s: WC wq->rq.pidx = %d\n", - __func__, wq->rq.pidx); + pr_debug("WC wq->rq.pidx = %d\n", wq->rq.pidx); pio_copy((u64 __iomem *) (wq->rq.bar2_va + SGE_UDB_WCDOORBELL), (void *)wqe); } else { - pr_debug("%s: DB wq->rq.pidx = %d\n", - __func__, wq->rq.pidx); + pr_debug("DB wq->rq.pidx = %d\n", wq->rq.pidx); writel(PIDX_T5_V(inc) | QID_V(wq->rq.bar2_qid), wq->rq.bar2_va + SGE_UDB_KDOORBELL); } From 4d45b7573ba74a278652b0566d779c2aa7ea3df1 Mon Sep 17 00:00:00 2001 From: Bharat Potnuri Date: Wed, 27 Sep 2017 13:05:50 +0530 Subject: [PATCH 042/296] iw_cxgb4: change pr_debug to appropriate log level Error logs of iw_cxgb4 needs to be printed by default. This patch changes the necessary pr_debug() to appropriate pr_. Signed-off-by: Potnuri Bharat Teja Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 46 +++++++++++++------------- drivers/infiniband/hw/cxgb4/ev.c | 2 +- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 4 +-- drivers/infiniband/hw/cxgb4/qp.c | 4 +-- drivers/infiniband/hw/cxgb4/t4.h | 8 ++--- 5 files changed, 32 insertions(+), 32 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 23cb4af8ce09..01e3aba19fe9 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -212,7 +212,7 @@ static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb, if (c4iw_fatal_error(rdev)) { kfree_skb(skb); - pr_debug("%s - device in error state - dropping\n", __func__); + pr_err("%s - device in error state - dropping\n", __func__); return -EIO; } error = cxgb4_l2t_send(rdev->lldi.ports[0], skb, l2e); @@ -229,7 +229,7 @@ int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb) if (c4iw_fatal_error(rdev)) { kfree_skb(skb); - pr_debug("%s - device in error state - dropping\n", __func__); + pr_err("%s - device in error state - dropping\n", __func__); return -EIO; } error = cxgb4_ofld_send(rdev->lldi.ports[0], skb); @@ -263,8 +263,8 @@ static void set_emss(struct c4iw_ep *ep, u16 opt) if (ep->emss < 128) ep->emss = 128; if (ep->emss & 7) - pr_debug("Warning: misaligned mtu idx %u mss %u emss=%u\n", - TCPOPT_MSS_G(opt), ep->mss, ep->emss); + pr_warn("Warning: misaligned mtu idx %u mss %u emss=%u\n", + TCPOPT_MSS_G(opt), ep->mss, ep->emss); pr_debug("mss_idx %u mss %u emss=%u\n", TCPOPT_MSS_G(opt), ep->mss, ep->emss); } @@ -2314,7 +2314,7 @@ static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid); if (!ep) { - pr_debug("%s stid %d lookup failure!\n", __func__, stid); + pr_warn("%s stid %d lookup failure!\n", __func__, stid); goto out; } pr_debug("ep %p status %d error %d\n", ep, @@ -2332,7 +2332,7 @@ static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb) struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid); if (!ep) { - pr_debug("%s stid %d lookup failure!\n", __func__, stid); + pr_warn("%s stid %d lookup failure!\n", __func__, stid); goto out; } pr_debug("ep %p\n", ep); @@ -2464,13 +2464,13 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) parent_ep = (struct c4iw_ep *)get_ep_from_stid(dev, stid); if (!parent_ep) { - pr_debug("%s connect request on invalid stid %d\n", - __func__, stid); + pr_err("%s connect request on invalid stid %d\n", + __func__, stid); goto reject; } if (state_read(&parent_ep->com) != LISTEN) { - pr_debug("%s - listening ep not in LISTEN\n", __func__); + pr_err("%s - listening ep not in LISTEN\n", __func__); goto reject; } @@ -2739,9 +2739,9 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) return 0; if (cxgb_is_neg_adv(req->status)) { - pr_debug("%s Negative advice on abort- tid %u status %d (%s)\n", - __func__, ep->hwtid, req->status, - neg_adv_str(req->status)); + pr_warn("%s Negative advice on abort- tid %u status %d (%s)\n", + __func__, ep->hwtid, req->status, + neg_adv_str(req->status)); ep->stats.abort_neg_adv++; mutex_lock(&dev->rdev.stats.lock); dev->rdev.stats.neg_adv++; @@ -2781,8 +2781,8 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) * do some housekeeping so as to re-initiate the * connection */ - pr_debug("%s: mpa_rev=%d. Retrying with mpav1\n", - __func__, mpa_rev); + pr_info("%s: mpa_rev=%d. Retrying with mpav1\n", + __func__, mpa_rev); ep->retry_with_mpa_v1 = 1; } break; @@ -2808,7 +2808,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) case ABORTING: break; case DEAD: - pr_debug("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__); + pr_warn("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__); mutex_unlock(&ep->com.mutex); goto deref_ep; default: @@ -3218,7 +3218,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) ep->com.dev = dev; ep->com.qp = get_qhp(dev, conn_param->qpn); if (!ep->com.qp) { - pr_debug("%s qpn 0x%x not found!\n", __func__, conn_param->qpn); + pr_warn("%s qpn 0x%x not found!\n", __func__, conn_param->qpn); err = -EINVAL; goto fail2; } @@ -3571,8 +3571,8 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) case MORIBUND: case ABORTING: case DEAD: - pr_debug("%s ignoring disconnect ep %p state %u\n", - __func__, ep, ep->com.state); + pr_info("%s ignoring disconnect ep %p state %u\n", + __func__, ep, ep->com.state); break; default: BUG(); @@ -3676,7 +3676,7 @@ static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, rpl_skb = (struct sk_buff *)(unsigned long)req->cookie; BUG_ON(!rpl_skb); if (req->retval) { - pr_debug("%s passive open failure %d\n", __func__, req->retval); + pr_err("%s passive open failure %d\n", __func__, req->retval); mutex_lock(&dev->rdev.stats.lock); dev->rdev.stats.pas_ofld_conn_fails++; mutex_unlock(&dev->rdev.stats.lock); @@ -3893,8 +3893,8 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) lep = (struct c4iw_ep *)get_ep_from_stid(dev, stid); if (!lep) { - pr_debug("%s connect request on invalid stid %d\n", - __func__, stid); + pr_warn("%s connect request on invalid stid %d\n", + __func__, stid); goto reject; } @@ -4209,8 +4209,8 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } if (cxgb_is_neg_adv(req->status)) { - pr_debug("%s Negative advice on abort- tid %u status %d (%s)\n", - __func__, ep->hwtid, req->status, + pr_warn("%s Negative advice on abort- tid %u status %d (%s)\n", + __func__, ep->hwtid, req->status, neg_adv_str(req->status)); goto out; } diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c index 8f963df0bffc..b8c7cc938bce 100644 --- a/drivers/infiniband/hw/cxgb4/ev.c +++ b/drivers/infiniband/hw/cxgb4/ev.c @@ -234,7 +234,7 @@ int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid) if (atomic_dec_and_test(&chp->refcnt)) wake_up(&chp->wait); } else { - pr_debug("%s unknown cqid 0x%x\n", __func__, qid); + pr_warn("%s unknown cqid 0x%x\n", __func__, qid); spin_unlock_irqrestore(&dev->lock, flag); } return 0; diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index fafb899fec7c..433e78e5f25b 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -230,8 +230,8 @@ static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev, ret = wait_for_completion_timeout(&wr_waitp->completion, C4IW_WR_TO); if (!ret) { - pr_debug("%s - Device %s not responding (disabling device) - tid %u qpid %u\n", - func, pci_name(rdev->lldi.pdev), hwtid, qpid); + pr_err("%s - Device %s not responding (disabling device) - tid %u qpid %u\n", + func, pci_name(rdev->lldi.pdev), hwtid, qpid); rdev->flags |= T4_FATAL_ERROR; wr_waitp->ret = -EIO; } diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index a1bbd2434e5a..88e045b94773 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -958,8 +958,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, c4iw_invalidate_mr(qhp->rhp, wr->ex.invalidate_rkey); break; default: - pr_debug("%s post of type=%d TBD!\n", __func__, - wr->opcode); + pr_warn("%s post of type=%d TBD!\n", __func__, + wr->opcode); err = -EINVAL; } if (err) { diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 17abe46920d1..2b44fa850bbb 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -597,8 +597,8 @@ static inline void t4_swcq_produce(struct t4_cq *cq) { cq->sw_in_use++; if (cq->sw_in_use == cq->size) { - pr_debug("%s cxgb4 sw cq overflow cqid %u\n", - __func__, cq->cqid); + pr_warn("%s cxgb4 sw cq overflow cqid %u\n", + __func__, cq->cqid); cq->error = 1; BUG_ON(1); } @@ -669,8 +669,8 @@ static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe) static inline struct t4_cqe *t4_next_sw_cqe(struct t4_cq *cq) { if (cq->sw_in_use == cq->size) { - pr_debug("%s cxgb4 sw cq overflow cqid %u\n", - __func__, cq->cqid); + pr_warn("%s cxgb4 sw cq overflow cqid %u\n", + __func__, cq->cqid); cq->error = 1; BUG_ON(1); return NULL; From 62f1e84e4680e96423a610261d2310482d5cacd3 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Tue, 26 Sep 2017 06:43:48 -0700 Subject: [PATCH 043/296] IB/opa_vnic: Mark unused Ethernet MTU fields as reserved Per pcp mtu fields are not used, mark them as reserved. Reviewed-by: Sudeep Dutt Signed-off-by: Niranjana Vishwanathapura Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h | 8 +++----- drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h | 5 ++--- drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c | 2 +- drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c | 10 ++-------- 4 files changed, 8 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h index 4c434b9dd84c..be45697b8e60 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h @@ -111,8 +111,7 @@ * @pkey: partition key * @u_mcast_dlid: unknown multicast dlid * @u_ucast_dlid: array of unknown unicast dlids - * @eth_mtu: MTUs for each vlan PCP - * @eth_mtu_non_vlan: MTU for non vlan packets + * @eth_mtu: Ethernet MTU */ struct opa_vesw_info { __be16 fabric_id; @@ -128,9 +127,8 @@ struct opa_vesw_info { __be32 u_mcast_dlid; __be32 u_ucast_dlid[OPA_VESW_MAX_NUM_DEF_PORT]; - u8 rsvd3[44]; - __be16 eth_mtu[OPA_VNIC_MAX_NUM_PCP]; - __be16 eth_mtu_non_vlan; + u8 rsvd3[60]; + __be16 eth_mtu; u8 rsvd4[2]; } __packed; diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h index ca29e6d5aedc..a90c63125671 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h @@ -89,9 +89,8 @@ struct __opa_vesw_info { u32 u_mcast_dlid; u32 u_ucast_dlid[OPA_VESW_MAX_NUM_DEF_PORT]; - u8 rsvd3[44]; - u16 eth_mtu[OPA_VNIC_MAX_NUM_PCP]; - u16 eth_mtu_non_vlan; + u8 rsvd3[60]; + u16 eth_mtu; u8 rsvd4[2]; } __packed; diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c index 1a3c25364b64..41fd05baed82 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c @@ -140,7 +140,7 @@ void opa_vnic_process_vema_config(struct opa_vnic_adapter *adapter) /* Handle MTU limit change */ rtnl_lock(); - netdev->max_mtu = max_t(unsigned int, info->vesw.eth_mtu_non_vlan, + netdev->max_mtu = max_t(unsigned int, info->vesw.eth_mtu, netdev->min_mtu); if (netdev->mtu > netdev->max_mtu) dev_set_mtu(netdev, netdev->max_mtu); diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c index c2733964379c..5856ae3a0d7b 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c @@ -177,10 +177,7 @@ void opa_vnic_get_vesw_info(struct opa_vnic_adapter *adapter, info->u_ucast_dlid[i] = cpu_to_be32(src->u_ucast_dlid[i]); memcpy(info->rsvd3, src->rsvd3, ARRAY_SIZE(src->rsvd3)); - for (i = 0; i < OPA_VNIC_MAX_NUM_PCP; i++) - info->eth_mtu[i] = cpu_to_be16(src->eth_mtu[i]); - - info->eth_mtu_non_vlan = cpu_to_be16(src->eth_mtu_non_vlan); + info->eth_mtu = cpu_to_be16(src->eth_mtu); memcpy(info->rsvd4, src->rsvd4, ARRAY_SIZE(src->rsvd4)); } @@ -212,10 +209,7 @@ void opa_vnic_set_vesw_info(struct opa_vnic_adapter *adapter, dst->u_ucast_dlid[i] = be32_to_cpu(info->u_ucast_dlid[i]); memcpy(dst->rsvd3, info->rsvd3, ARRAY_SIZE(info->rsvd3)); - for (i = 0; i < OPA_VNIC_MAX_NUM_PCP; i++) - dst->eth_mtu[i] = be16_to_cpu(info->eth_mtu[i]); - - dst->eth_mtu_non_vlan = be16_to_cpu(info->eth_mtu_non_vlan); + dst->eth_mtu = be16_to_cpu(info->eth_mtu); memcpy(dst->rsvd4, info->rsvd4, ARRAY_SIZE(info->rsvd4)); } From 7f291d28690af3d0eff4fbbf02020ef5d237f040 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Tue, 26 Sep 2017 06:43:54 -0700 Subject: [PATCH 044/296] IB/opa_vnic: Set POD value for Ethernet MTU Set power on default value of 1500 for eth_mtu. Reviewed-by: Sudeep Dutt Signed-off-by: Niranjana Vishwanathapura Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c | 1 + drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c index 41fd05baed82..305f3a83176a 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c @@ -183,6 +183,7 @@ static inline void opa_vnic_set_pod_values(struct opa_vnic_adapter *adapter) adapter->info.vport.max_smac_ent = OPA_VNIC_MAX_SMAC_LIMIT; adapter->info.vport.config_state = OPA_VNIC_STATE_DROP_ALL; adapter->info.vport.eth_link_status = OPA_VNIC_ETH_LINK_DOWN; + adapter->info.vesw.eth_mtu = ETH_DATA_LEN; } /* opa_vnic_set_mac_addr - change mac address */ diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c index 21f0b481edcc..4b615c1451e7 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c @@ -186,6 +186,7 @@ static inline void vema_get_pod_values(struct opa_veswport_info *port_info) cpu_to_be16(OPA_VNIC_MAX_SMAC_LIMIT); port_info->vport.oper_state = OPA_VNIC_STATE_DROP_ALL; port_info->vport.config_state = OPA_VNIC_STATE_DROP_ALL; + port_info->vesw.eth_mtu = cpu_to_be16(ETH_DATA_LEN); } /** From 5a5a85da40af20d818afa1983771cc86e2577b39 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Tue, 26 Sep 2017 06:44:01 -0700 Subject: [PATCH 045/296] IB/opa_vnic: Allow reset of MAC address Ensure MAC address is reset while deleting the vesw port. Reviewed-by: Sudeep Dutt Signed-off-by: Niranjana Vishwanathapura Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c index 305f3a83176a..30f78587e23f 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c @@ -130,7 +130,7 @@ void opa_vnic_process_vema_config(struct opa_vnic_adapter *adapter) memcpy(saddr.sa_data, info->vport.base_mac_addr, ARRAY_SIZE(info->vport.base_mac_addr)); mutex_lock(&adapter->lock); - eth_mac_addr(netdev, &saddr); + eth_commit_mac_addr_change(netdev, &saddr); memcpy(adapter->vema_mac_addr, info->vport.base_mac_addr, ETH_ALEN); mutex_unlock(&adapter->lock); From b77eb45e0d9c324245d165656ab3b38b6f386436 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Tue, 26 Sep 2017 06:44:07 -0700 Subject: [PATCH 046/296] IB/opa_vnic: Properly return the total MACs in UC MAC list Do not include EM specified MAC address in total MACs of the UC MAC list. Reviewed-by: Sudeep Dutt Signed-off-by: Niranjana Vishwanathapura Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c index 5856ae3a0d7b..5553900848e3 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c @@ -342,7 +342,7 @@ void opa_vnic_query_mcast_macs(struct opa_vnic_adapter *adapter, void opa_vnic_query_ucast_macs(struct opa_vnic_adapter *adapter, struct opa_veswport_iface_macs *macs) { - u16 start_idx, tot_macs, num_macs, idx = 0, count = 0; + u16 start_idx, tot_macs, num_macs, idx = 0, count = 0, em_macs = 0; struct netdev_hw_addr *ha; start_idx = be16_to_cpu(macs->start_idx); @@ -353,8 +353,10 @@ void opa_vnic_query_ucast_macs(struct opa_vnic_adapter *adapter, /* Do not include EM specified MAC address */ if (!memcmp(adapter->info.vport.base_mac_addr, ha->addr, - ARRAY_SIZE(adapter->info.vport.base_mac_addr))) + ARRAY_SIZE(adapter->info.vport.base_mac_addr))) { + em_macs++; continue; + } if (start_idx > idx++) continue; @@ -377,7 +379,7 @@ void opa_vnic_query_ucast_macs(struct opa_vnic_adapter *adapter, } tot_macs = netdev_hw_addr_list_count(&adapter->netdev->dev_addrs) + - netdev_uc_count(adapter->netdev); + netdev_uc_count(adapter->netdev) - em_macs; macs->tot_macs_in_lst = cpu_to_be16(tot_macs); macs->num_macs_in_msg = cpu_to_be16(count); macs->gen_count = cpu_to_be16(adapter->info.vport.uc_macs_gen_count); From 4bbdfe25600c1909c26747d0b5c39fd0e409bb87 Mon Sep 17 00:00:00 2001 From: Scott Franco Date: Tue, 26 Sep 2017 06:44:13 -0700 Subject: [PATCH 047/296] IB/opa_vnic: Properly clear Mac Table Digest Clear the MAC table digest when the MAC table is freed. Reviewed-by: Niranjana Vishwanathapura Signed-off-by: Scott Franco Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c index afa938bd26d6..a72278e9cd27 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c @@ -139,6 +139,7 @@ void opa_vnic_release_mac_tbl(struct opa_vnic_adapter *adapter) rcu_assign_pointer(adapter->mactbl, NULL); synchronize_rcu(); opa_vnic_free_mac_tbl(mactbl); + adapter->info.vport.mac_tbl_digest = 0; mutex_unlock(&adapter->mactbl_lock); } From e82b7c388a26d83bf0037ba8f6f3c964ccad2f5b Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Tue, 26 Sep 2017 06:44:20 -0700 Subject: [PATCH 048/296] IB/opa_vnic: Properly set vesw port status Update eth_link_status and operating status information to represent the overall status of the virtual Ethernet switch port. Reviewed-by: Sudeep Dutt Signed-off-by: Niranjana Vishwanathapura Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- .../infiniband/ulp/opa_vnic/opa_vnic_netdev.c | 39 +++++++++++++------ 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c index 30f78587e23f..ce57e0f10289 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c @@ -112,6 +112,27 @@ static u16 opa_vnic_select_queue(struct net_device *netdev, struct sk_buff *skb, return rc; } +static void opa_vnic_update_state(struct opa_vnic_adapter *adapter, bool up) +{ + struct __opa_veswport_info *info = &adapter->info; + + mutex_lock(&adapter->lock); + /* Operational state can only be DROP_ALL or FORWARDING */ + if ((info->vport.config_state == OPA_VNIC_STATE_FORWARDING) && up) { + info->vport.oper_state = OPA_VNIC_STATE_FORWARDING; + info->vport.eth_link_status = OPA_VNIC_ETH_LINK_UP; + } else { + info->vport.oper_state = OPA_VNIC_STATE_DROP_ALL; + info->vport.eth_link_status = OPA_VNIC_ETH_LINK_DOWN; + } + + if (info->vport.config_state == OPA_VNIC_STATE_FORWARDING) + netif_dormant_off(adapter->netdev); + else + netif_dormant_on(adapter->netdev); + mutex_unlock(&adapter->lock); +} + /* opa_vnic_process_vema_config - process vema configuration updates */ void opa_vnic_process_vema_config(struct opa_vnic_adapter *adapter) { @@ -164,14 +185,8 @@ void opa_vnic_process_vema_config(struct opa_vnic_adapter *adapter) adapter->flow_tbl[i] = port_count ? port_num[i % port_count] : OPA_VNIC_INVALID_PORT; - /* Operational state can only be DROP_ALL or FORWARDING */ - if (info->vport.config_state == OPA_VNIC_STATE_FORWARDING) { - info->vport.oper_state = OPA_VNIC_STATE_FORWARDING; - netif_dormant_off(netdev); - } else { - info->vport.oper_state = OPA_VNIC_STATE_DROP_ALL; - netif_dormant_on(netdev); - } + /* update state */ + opa_vnic_update_state(adapter, !!(netdev->flags & IFF_UP)); } /* @@ -269,8 +284,8 @@ static int opa_netdev_open(struct net_device *netdev) return rc; } - /* Update eth link status and send trap */ - adapter->info.vport.eth_link_status = OPA_VNIC_ETH_LINK_UP; + /* Update status and send trap */ + opa_vnic_update_state(adapter, true); opa_vnic_vema_report_event(adapter, OPA_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE); return 0; @@ -288,8 +303,8 @@ static int opa_netdev_close(struct net_device *netdev) return rc; } - /* Update eth link status and send trap */ - adapter->info.vport.eth_link_status = OPA_VNIC_ETH_LINK_DOWN; + /* Update status and send trap */ + opa_vnic_update_state(adapter, false); opa_vnic_vema_report_event(adapter, OPA_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE); return 0; From b209a368eb72cacce290e327d3f783e9cdee040c Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Tue, 26 Sep 2017 06:44:26 -0700 Subject: [PATCH 049/296] IB/opa_vnic: Add routing control information Add protocol specific routing control information in the encapsulation header as per the configuration. Reviewed-by: Sudeep Dutt Signed-off-by: Niranjana Vishwanathapura Signed-off-by: Scott Franco Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- .../infiniband/ulp/opa_vnic/opa_vnic_encap.c | 41 ++++++++++++++++++- .../infiniband/ulp/opa_vnic/opa_vnic_encap.h | 16 +++++++- .../ulp/opa_vnic/opa_vnic_internal.h | 4 +- .../ulp/opa_vnic/opa_vnic_vema_iface.c | 4 ++ 4 files changed, 61 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c index a72278e9cd27..4be3aef40bd2 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c @@ -406,6 +406,42 @@ u8 opa_vnic_get_vl(struct opa_vnic_adapter *adapter, struct sk_buff *skb) return vl; } +/* opa_vnic_get_rc - return the routing control */ +static u8 opa_vnic_get_rc(struct __opa_veswport_info *info, + struct sk_buff *skb) +{ + u8 proto, rout_ctrl; + + switch (vlan_get_protocol(skb)) { + case htons(ETH_P_IPV6): + proto = ipv6_hdr(skb)->nexthdr; + if (proto == IPPROTO_TCP) + rout_ctrl = OPA_VNIC_ENCAP_RC_EXT(info->vesw.rc, + IPV6_TCP); + else if (proto == IPPROTO_UDP) + rout_ctrl = OPA_VNIC_ENCAP_RC_EXT(info->vesw.rc, + IPV6_UDP); + else + rout_ctrl = OPA_VNIC_ENCAP_RC_EXT(info->vesw.rc, IPV6); + break; + case htons(ETH_P_IP): + proto = ip_hdr(skb)->protocol; + if (proto == IPPROTO_TCP) + rout_ctrl = OPA_VNIC_ENCAP_RC_EXT(info->vesw.rc, + IPV4_TCP); + else if (proto == IPPROTO_UDP) + rout_ctrl = OPA_VNIC_ENCAP_RC_EXT(info->vesw.rc, + IPV4_UDP); + else + rout_ctrl = OPA_VNIC_ENCAP_RC_EXT(info->vesw.rc, IPV4); + break; + default: + rout_ctrl = OPA_VNIC_ENCAP_RC_EXT(info->vesw.rc, DEFAULT); + } + + return rout_ctrl; +} + /* opa_vnic_calc_entropy - calculate the packet entropy */ u8 opa_vnic_calc_entropy(struct opa_vnic_adapter *adapter, struct sk_buff *skb) { @@ -448,7 +484,7 @@ void opa_vnic_encap_skb(struct opa_vnic_adapter *adapter, struct sk_buff *skb) { struct __opa_veswport_info *info = &adapter->info; struct opa_vnic_skb_mdata *mdata; - u8 def_port, sc, entropy, *hdr; + u8 def_port, sc, rc, entropy, *hdr; u16 len, l4_hdr; u32 dlid; @@ -459,6 +495,7 @@ void opa_vnic_encap_skb(struct opa_vnic_adapter *adapter, struct sk_buff *skb) len = opa_vnic_wire_length(skb); dlid = opa_vnic_get_dlid(adapter, skb, def_port); sc = opa_vnic_get_sc(info, skb); + rc = opa_vnic_get_rc(info, skb); l4_hdr = info->vesw.vesw_id; mdata = skb_push(skb, sizeof(*mdata)); @@ -471,6 +508,6 @@ void opa_vnic_encap_skb(struct opa_vnic_adapter *adapter, struct sk_buff *skb) } opa_vnic_make_header(hdr, info->vport.encap_slid, dlid, len, - info->vesw.pkey, entropy, sc, 0, + info->vesw.pkey, entropy, sc, rc, OPA_VNIC_L4_ETHR, l4_hdr); } diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h index be45697b8e60..e4c9bf2ef7e2 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h @@ -103,6 +103,17 @@ #define OPA_VNIC_ETH_LINK_UP 1 #define OPA_VNIC_ETH_LINK_DOWN 2 +/* routing control */ +#define OPA_VNIC_ENCAP_RC_DEFAULT 0 +#define OPA_VNIC_ENCAP_RC_IPV4 4 +#define OPA_VNIC_ENCAP_RC_IPV4_UDP 8 +#define OPA_VNIC_ENCAP_RC_IPV4_TCP 12 +#define OPA_VNIC_ENCAP_RC_IPV6 16 +#define OPA_VNIC_ENCAP_RC_IPV6_TCP 20 +#define OPA_VNIC_ENCAP_RC_IPV6_UDP 24 + +#define OPA_VNIC_ENCAP_RC_EXT(w, b) (((w) >> OPA_VNIC_ENCAP_RC_ ## b) & 0x7) + /** * struct opa_vesw_info - OPA vnic switch information * @fabric_id: 10-bit fabric id @@ -111,6 +122,7 @@ * @pkey: partition key * @u_mcast_dlid: unknown multicast dlid * @u_ucast_dlid: array of unknown unicast dlids + * @rc: routing control * @eth_mtu: Ethernet MTU */ struct opa_vesw_info { @@ -127,7 +139,9 @@ struct opa_vesw_info { __be32 u_mcast_dlid; __be32 u_ucast_dlid[OPA_VESW_MAX_NUM_DEF_PORT]; - u8 rsvd3[60]; + __be32 rc; + + u8 rsvd3[56]; __be16 eth_mtu; u8 rsvd4[2]; } __packed; diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h index a90c63125671..afd95f432262 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h @@ -89,7 +89,9 @@ struct __opa_vesw_info { u32 u_mcast_dlid; u32 u_ucast_dlid[OPA_VESW_MAX_NUM_DEF_PORT]; - u8 rsvd3[60]; + u32 rc; + + u8 rsvd3[56]; u16 eth_mtu; u8 rsvd4[2]; } __packed; diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c index 5553900848e3..868b5aec1537 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c @@ -176,6 +176,8 @@ void opa_vnic_get_vesw_info(struct opa_vnic_adapter *adapter, for (i = 0; i < OPA_VESW_MAX_NUM_DEF_PORT; i++) info->u_ucast_dlid[i] = cpu_to_be32(src->u_ucast_dlid[i]); + info->rc = cpu_to_be32(src->rc); + memcpy(info->rsvd3, src->rsvd3, ARRAY_SIZE(src->rsvd3)); info->eth_mtu = cpu_to_be16(src->eth_mtu); memcpy(info->rsvd4, src->rsvd4, ARRAY_SIZE(src->rsvd4)); @@ -208,6 +210,8 @@ void opa_vnic_set_vesw_info(struct opa_vnic_adapter *adapter, for (i = 0; i < OPA_VESW_MAX_NUM_DEF_PORT; i++) dst->u_ucast_dlid[i] = be32_to_cpu(info->u_ucast_dlid[i]); + dst->rc = be32_to_cpu(info->rc); + memcpy(dst->rsvd3, info->rsvd3, ARRAY_SIZE(info->rsvd3)); dst->eth_mtu = be16_to_cpu(info->eth_mtu); memcpy(dst->rsvd4, info->rsvd4, ARRAY_SIZE(info->rsvd4)); From e870b4a1f57afb7e913cb05609fd891aa00755a8 Mon Sep 17 00:00:00 2001 From: Jakub Byczkowski Date: Tue, 26 Sep 2017 07:00:04 -0700 Subject: [PATCH 050/296] IB/hfi1: Add new state complete decodes for LNI failures Add state decodes for link width negotiation, verify cap time out and secure data resolution failures. Reviewed-by: Jan Sokolowski Reviewed-by: Ira Weiny Signed-off-by: Jakub Byczkowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index b2ed4b9cda6e..ef27e2b2482d 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -10159,6 +10159,10 @@ static const char * const state_complete_reasons[] = { [0x33] = "Link partner completed the VerifyCap state, but the passing lanes do not meet the local link width policy", [0x34] = tx_out_of_policy, + [0x35] = "Negotiated link width is mutually exclusive", + [0x36] = + "Timed out before receiving verifycap frames in VerifyCap.Exchange", + [0x37] = "Unable to resolve secure data exchange", }; static const char *state_complete_reason_code_string(struct hfi1_pportdata *ppd, From d34ed562ac0af306037d0985fb1b7fad97f91f1e Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Tue, 26 Sep 2017 07:00:11 -0700 Subject: [PATCH 051/296] IB/hfi1: Convert the macro AHG_HEADER_SET into an inline function AHG_HEADER_SET macro doesn't conform to the coding standards as it can affect the control flow. Convert the macro AHG_HEADER_SET into an inline function ahg_header_set(). Cc: Leon Romanovsky Reviewed-by: Mike Marciniszyn Signed-off-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 58 +++++++++++++++++--------- drivers/infiniband/hw/hfi1/user_sdma.h | 29 +++++++++---- 2 files changed, 59 insertions(+), 28 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index c0c0e0445cbf..4ea29ed19dd3 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -1254,20 +1254,25 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, struct user_sdma_txreq *tx, u32 datalen) { u32 ahg[AHG_KDETH_ARRAY_SIZE]; - int diff = 0; + int idx = 0; u8 omfactor; /* KDETH.OM */ struct hfi1_user_sdma_pkt_q *pq = req->pq; struct hfi1_pkt_header *hdr = &req->hdr; u16 pbclen = le16_to_cpu(hdr->pbc[0]); u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen)); + size_t array_size = ARRAY_SIZE(ahg); if (PBC2LRH(pbclen) != lrhlen) { /* PBC.PbcLengthDWs */ - AHG_HEADER_SET(ahg, diff, 0, 0, 12, - cpu_to_le16(LRH2PBC(lrhlen))); + idx = ahg_header_set(ahg, idx, array_size, 0, 0, 12, + (__force u16)cpu_to_le16(LRH2PBC(lrhlen))); + if (idx < 0) + return idx; /* LRH.PktLen (we need the full 16 bits due to byte swap) */ - AHG_HEADER_SET(ahg, diff, 3, 0, 16, - cpu_to_be16(lrhlen >> 2)); + idx = ahg_header_set(ahg, idx, array_size, 3, 0, 16, + (__force u16)cpu_to_be16(lrhlen >> 2)); + if (idx < 0) + return idx; } /* @@ -1278,12 +1283,23 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, (HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffff : 0xffffff); if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK)) val32 |= 1UL << 31; - AHG_HEADER_SET(ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16)); - AHG_HEADER_SET(ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff)); + idx = ahg_header_set(ahg, idx, array_size, 6, 0, 16, + (__force u16)cpu_to_be16(val32 >> 16)); + if (idx < 0) + return idx; + idx = ahg_header_set(ahg, idx, array_size, 6, 16, 16, + (__force u16)cpu_to_be16(val32 & 0xffff)); + if (idx < 0) + return idx; /* KDETH.Offset */ - AHG_HEADER_SET(ahg, diff, 15, 0, 16, - cpu_to_le16(req->koffset & 0xffff)); - AHG_HEADER_SET(ahg, diff, 15, 16, 16, cpu_to_le16(req->koffset >> 16)); + idx = ahg_header_set(ahg, idx, array_size, 15, 0, 16, + (__force u16)cpu_to_le16(req->koffset & 0xffff)); + if (idx < 0) + return idx; + idx = ahg_header_set(ahg, idx, array_size, 15, 16, 16, + (__force u16)cpu_to_le16(req->koffset >> 16)); + if (idx < 0) + return idx; if (req_opcode(req->info.ctrl) == EXPECTED) { __le16 val; @@ -1310,10 +1326,13 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE_SHIFT : KDETH_OM_SMALL_SHIFT; /* KDETH.OM and KDETH.OFFSET (TID) */ - AHG_HEADER_SET(ahg, diff, 7, 0, 16, - ((!!(omfactor - KDETH_OM_SMALL_SHIFT)) << 15 | + idx = ahg_header_set( + ahg, idx, array_size, 7, 0, 16, + ((!!(omfactor - KDETH_OM_SMALL_SHIFT)) << 15 | ((req->tidoffset >> omfactor) - & 0x7fff))); + & 0x7fff))); + if (idx < 0) + return idx; /* KDETH.TIDCtrl, KDETH.TID, KDETH.Intr, KDETH.SH */ val = cpu_to_le16(((EXP_TID_GET(tidval, CTRL) & 0x3) << 10) | (EXP_TID_GET(tidval, IDX) & 0x3ff)); @@ -1330,21 +1349,22 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, AHG_KDETH_INTR_SHIFT)); } - AHG_HEADER_SET(ahg, diff, 7, 16, 14, val); + idx = ahg_header_set(ahg, idx, array_size, + 7, 16, 14, (__force u16)val); + if (idx < 0) + return idx; } - if (diff < 0) - return diff; trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt, req->info.comp_idx, req->sde->this_idx, - req->ahg_idx, ahg, diff, tidval); + req->ahg_idx, ahg, idx, tidval); sdma_txinit_ahg(&tx->txreq, SDMA_TXREQ_F_USE_AHG, - datalen, req->ahg_idx, diff, + datalen, req->ahg_idx, idx, ahg, sizeof(req->hdr), user_sdma_txreq_cb); - return diff; + return idx; } /* diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index 9b8bb5634c0d..a3d192424344 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -80,15 +80,26 @@ #define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4) #define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff) -#define AHG_HEADER_SET(arr, idx, dw, bit, width, value) \ - do { \ - if ((idx) < ARRAY_SIZE((arr))) \ - (arr)[(idx++)] = sdma_build_ahg_descriptor( \ - (__force u16)(value), (dw), (bit), \ - (width)); \ - else \ - return -ERANGE; \ - } while (0) +/** + * Build an SDMA AHG header update descriptor and save it to an array. + * @arr - Array to save the descriptor to. + * @idx - Index of the array at which the descriptor will be saved. + * @array_size - Size of the array arr. + * @dw - Update index into the header in DWs. + * @bit - Start bit. + * @width - Field width. + * @value - 16 bits of immediate data to write into the field. + * Returns -ERANGE if idx is invalid. If successful, returns the next index + * (idx + 1) of the array to be used for the next descriptor. + */ +static inline int ahg_header_set(u32 *arr, int idx, size_t array_size, + u8 dw, u8 bit, u8 width, u16 value) +{ + if ((size_t)idx >= array_size) + return -ERANGE; + arr[idx++] = sdma_build_ahg_descriptor(value, dw, bit, width); + return idx; +} /* Tx request flag bits */ #define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */ From 4029e2a313348a0a44f9d41b681763c69160dfd0 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Tue, 26 Sep 2017 07:00:17 -0700 Subject: [PATCH 052/296] IB/hfi1: Remove the debug trace message in pin_sdma_pages() Remove the debug trace statement in pin_sdma_pages() that gets executed when there is a memory allocation failure as the trace message doesn't help with debugging the memory allocation failure. Cc: Leon Romanovsky Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 4ea29ed19dd3..01b9a9c6c2af 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -956,10 +956,8 @@ static int pin_sdma_pages(struct user_sdma_request *req, struct hfi1_user_sdma_pkt_q *pq = req->pq; pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL); - if (!pages) { - SDMA_DBG(req, "Failed page array alloc"); + if (!pages) return -ENOMEM; - } memcpy(pages, node->pages, node->npages * sizeof(*pages)); npages -= node->npages; From c4bc61568048d6764d8527d76dd8d356619267d2 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Tue, 26 Sep 2017 07:00:24 -0700 Subject: [PATCH 053/296] IB/qib: Update QIB to use the latest PCI API The QIB PCI IRQ code uses an obsolete PCI API. Updating the code to use the new PCI IRQ API and any necessary changes because of the new API. Reviewed-by: Sebastian Sanchez Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib.h | 8 +- drivers/infiniband/hw/qib/qib_7220.h | 1 - drivers/infiniband/hw/qib/qib_iba6120.c | 35 ++---- drivers/infiniband/hw/qib/qib_iba7220.c | 52 +++----- drivers/infiniband/hw/qib/qib_iba7322.c | 158 ++++++++++-------------- drivers/infiniband/hw/qib/qib_pcie.c | 125 +++++-------------- 6 files changed, 120 insertions(+), 259 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index f9e1c69603a5..1167a9c1776b 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -443,14 +443,12 @@ struct qib_irq_notify; #endif struct qib_msix_entry { - int irq; void *arg; #ifdef CONFIG_INFINIBAND_QIB_DCA int dca; int rcv; struct qib_irq_notify *notifier; #endif - char name[MAX_NAME_SIZE]; cpumask_var_t mask; }; @@ -1434,10 +1432,8 @@ int qib_pcie_ddinit(struct qib_devdata *, struct pci_dev *, const struct pci_device_id *); void qib_pcie_ddcleanup(struct qib_devdata *); int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent); -int qib_reinit_intr(struct qib_devdata *); -void qib_enable_intx(struct qib_devdata *dd); -void qib_nomsi(struct qib_devdata *); -void qib_nomsix(struct qib_devdata *); +void qib_free_irq(struct qib_devdata *dd); +int qib_reinit_intr(struct qib_devdata *dd); void qib_pcie_getcmd(struct qib_devdata *, u16 *, u8 *, u8 *); void qib_pcie_reenable(struct qib_devdata *, u16, u8, u8); /* interrupts for device */ diff --git a/drivers/infiniband/hw/qib/qib_7220.h b/drivers/infiniband/hw/qib/qib_7220.h index a5356cb4252e..4ec3dc1fbfb4 100644 --- a/drivers/infiniband/hw/qib/qib_7220.h +++ b/drivers/infiniband/hw/qib/qib_7220.h @@ -67,7 +67,6 @@ struct qib_chip_specific { u32 lastbuf_for_pio; u32 updthresh; /* current AvailUpdThld */ u32 updthresh_dflt; /* default AvailUpdThld */ - int irq; u8 presets_needed; u8 relock_timer_active; char emsgbuf[128]; diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index 3259a60e4f4f..c4a4c57e0f0a 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -245,7 +245,6 @@ struct qib_chip_specific { u64 iblnkerrsnap; u64 ibcctrl; /* shadow for kr_ibcctrl */ u32 lastlinkrecov; /* link recovery issue */ - int irq; u32 cntrnamelen; u32 portcntrnamelen; u32 ncntrs; @@ -1485,15 +1484,6 @@ static void qib_6120_setup_setextled(struct qib_pportdata *ppd, u32 on) spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags); } -static void qib_6120_free_irq(struct qib_devdata *dd) -{ - if (dd->cspec->irq) { - free_irq(dd->cspec->irq, dd); - dd->cspec->irq = 0; - } - qib_nomsi(dd); -} - /** * qib_6120_setup_cleanup - clean up any per-chip chip-specific stuff * @dd: the qlogic_ib device @@ -1502,7 +1492,7 @@ static void qib_6120_free_irq(struct qib_devdata *dd) */ static void qib_6120_setup_cleanup(struct qib_devdata *dd) { - qib_6120_free_irq(dd); + qib_free_irq(dd); kfree(dd->cspec->cntrs); kfree(dd->cspec->portcntrs); if (dd->cspec->dummy_hdrq) { @@ -1706,6 +1696,8 @@ static irqreturn_t qib_6120intr(int irq, void *data) */ static void qib_setup_6120_interrupt(struct qib_devdata *dd) { + int ret; + /* * If the chip supports added error indication via GPIO pins, * enable interrupts on those bits so the interrupt routine @@ -1719,19 +1711,12 @@ static void qib_setup_6120_interrupt(struct qib_devdata *dd) qib_write_kreg(dd, kr_gpio_mask, dd->cspec->gpio_mask); } - if (!dd->cspec->irq) + ret = pci_request_irq(dd->pcidev, 0, qib_6120intr, NULL, dd, + QIB_DRV_NAME); + if (ret) qib_dev_err(dd, - "irq is 0, BIOS error? Interrupts won't work\n"); - else { - int ret; - - ret = request_irq(dd->cspec->irq, qib_6120intr, 0, - QIB_DRV_NAME, dd); - if (ret) - qib_dev_err(dd, - "Couldn't setup interrupt (irq=%d): %d\n", - dd->cspec->irq, ret); - } + "Couldn't setup interrupt (irq=%d): %d\n", + pci_irq_vector(dd->pcidev, 0), ret); } /** @@ -3490,7 +3475,7 @@ struct qib_devdata *qib_init_iba6120_funcs(struct pci_dev *pdev, dd->f_bringup_serdes = qib_6120_bringup_serdes; dd->f_cleanup = qib_6120_setup_cleanup; dd->f_clear_tids = qib_6120_clear_tids; - dd->f_free_irq = qib_6120_free_irq; + dd->f_free_irq = qib_free_irq; dd->f_get_base_info = qib_6120_get_base_info; dd->f_get_msgheader = qib_6120_get_msgheader; dd->f_getsendbuf = qib_6120_getsendbuf; @@ -3559,8 +3544,6 @@ struct qib_devdata *qib_init_iba6120_funcs(struct pci_dev *pdev, if (qib_pcie_params(dd, 8, NULL)) qib_dev_err(dd, "Failed to setup PCIe or interrupts; continuing anyway\n"); - dd->cspec->irq = pdev->irq; /* save IRQ */ - /* clear diagctrl register, in case diags were running and crashed */ qib_write_kreg(dd, kr_hwdiagctrl, 0); diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 04bdd3d487b1..78ce79be4120 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -1780,15 +1780,6 @@ static void qib_setup_7220_setextled(struct qib_pportdata *ppd, u32 on) qib_write_kreg(dd, kr_rcvpktledcnt, ledblink); } -static void qib_7220_free_irq(struct qib_devdata *dd) -{ - if (dd->cspec->irq) { - free_irq(dd->cspec->irq, dd); - dd->cspec->irq = 0; - } - qib_nomsi(dd); -} - /* * qib_setup_7220_cleanup - clean up any per-chip chip-specific stuff * @dd: the qlogic_ib device @@ -1798,7 +1789,7 @@ static void qib_7220_free_irq(struct qib_devdata *dd) */ static void qib_setup_7220_cleanup(struct qib_devdata *dd) { - qib_7220_free_irq(dd); + qib_free_irq(dd); kfree(dd->cspec->cntrs); kfree(dd->cspec->portcntrs); } @@ -2026,20 +2017,14 @@ static irqreturn_t qib_7220intr(int irq, void *data) */ static void qib_setup_7220_interrupt(struct qib_devdata *dd) { - if (!dd->cspec->irq) - qib_dev_err(dd, - "irq is 0, BIOS error? Interrupts won't work\n"); - else { - int ret = request_irq(dd->cspec->irq, qib_7220intr, - dd->msi_lo ? 0 : IRQF_SHARED, - QIB_DRV_NAME, dd); + int ret; - if (ret) - qib_dev_err(dd, - "Couldn't setup %s interrupt (irq=%d): %d\n", - dd->msi_lo ? "MSI" : "INTx", - dd->cspec->irq, ret); - } + ret = pci_request_irq(dd->pcidev, 0, qib_7220intr, NULL, dd, + QIB_DRV_NAME); + if (ret) + qib_dev_err(dd, "Couldn't setup %s interrupt (irq=%d): %d\n", + dd->pcidev->msi_enabled ? "MSI" : "INTx", + pci_irq_vector(dd->pcidev, 0), ret); } /** @@ -3302,16 +3287,12 @@ static int qib_7220_intr_fallback(struct qib_devdata *dd) return 0; qib_devinfo(dd->pcidev, - "MSI interrupt not detected, trying INTx interrupts\n"); - qib_7220_free_irq(dd); - qib_enable_intx(dd); - /* - * Some newer kernels require free_irq before disable_msi, - * and irq can be changed during disable and INTx enable - * and we need to therefore use the pcidev->irq value, - * not our saved MSI value. - */ - dd->cspec->irq = dd->pcidev->irq; + "MSI interrupt not detected, trying INTx interrupts\n"); + + qib_free_irq(dd); + dd->msi_lo = 0; + if (pci_alloc_irq_vectors(dd->pcidev, 1, 1, PCI_IRQ_LEGACY) < 0) + qib_dev_err(dd, "Failed to enable INTx\n"); qib_setup_7220_interrupt(dd); return 1; } @@ -4535,7 +4516,7 @@ struct qib_devdata *qib_init_iba7220_funcs(struct pci_dev *pdev, dd->f_bringup_serdes = qib_7220_bringup_serdes; dd->f_cleanup = qib_setup_7220_cleanup; dd->f_clear_tids = qib_7220_clear_tids; - dd->f_free_irq = qib_7220_free_irq; + dd->f_free_irq = qib_free_irq; dd->f_get_base_info = qib_7220_get_base_info; dd->f_get_msgheader = qib_7220_get_msgheader; dd->f_getsendbuf = qib_7220_getsendbuf; @@ -4618,9 +4599,6 @@ struct qib_devdata *qib_init_iba7220_funcs(struct pci_dev *pdev, qib_dev_err(dd, "Failed to setup PCIe or interrupts; continuing anyway\n"); - /* save IRQ for possible later use */ - dd->cspec->irq = pdev->irq; - if (qib_read_kreg64(dd, kr_hwerrstatus) & QLOGIC_IB_HWE_SERDESPLLFAILED) qib_write_kreg(dd, kr_hwerrclear, diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 14cadf6d6214..4d02cffe4e03 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -553,7 +553,6 @@ struct qib_chip_specific { u32 updthresh; /* current AvailUpdThld */ u32 updthresh_dflt; /* default AvailUpdThld */ u32 r1; - int irq; u32 num_msix_entries; u32 sdmabufcnt; u32 lastbuf_for_pio; @@ -756,10 +755,8 @@ static void check_7322_rxe_status(struct qib_pportdata *); static u32 __iomem *qib_7322_getsendbuf(struct qib_pportdata *, u64, u32 *); #ifdef CONFIG_INFINIBAND_QIB_DCA static void qib_setup_dca(struct qib_devdata *dd); -static void setup_dca_notifier(struct qib_devdata *dd, - struct qib_msix_entry *m); -static void reset_dca_notifier(struct qib_devdata *dd, - struct qib_msix_entry *m); +static void setup_dca_notifier(struct qib_devdata *dd, int msixnum); +static void reset_dca_notifier(struct qib_devdata *dd, int msixnum); #endif /** @@ -2778,7 +2775,7 @@ static void qib_setup_dca(struct qib_devdata *dd) qib_write_kreg(dd, KREG_IDX(DCACtrlB) + i, cspec->dca_rcvhdr_ctrl[i]); for (i = 0; i < cspec->num_msix_entries; i++) - setup_dca_notifier(dd, &cspec->msix_entries[i]); + setup_dca_notifier(dd, i); } static void qib_irq_notifier_notify(struct irq_affinity_notify *notify, @@ -2820,49 +2817,41 @@ static void qib_irq_notifier_release(struct kref *ref) } #endif -/* - * Disable MSIx interrupt if enabled, call generic MSIx code - * to cleanup, and clear pending MSIx interrupts. - * Used for fallback to INTx, after reset, and when MSIx setup fails. - */ -static void qib_7322_nomsix(struct qib_devdata *dd) +static void qib_7322_free_irq(struct qib_devdata *dd) { u64 intgranted; - int n; + int i; dd->cspec->main_int_mask = ~0ULL; - n = dd->cspec->num_msix_entries; - if (n) { - int i; - dd->cspec->num_msix_entries = 0; - for (i = 0; i < n; i++) { + for (i = 0; i < dd->cspec->num_msix_entries; i++) { + /* only free IRQs that were allocated */ + if (dd->cspec->msix_entries[i].arg) { #ifdef CONFIG_INFINIBAND_QIB_DCA - reset_dca_notifier(dd, &dd->cspec->msix_entries[i]); + reset_dca_notifier(dd, i); #endif - irq_set_affinity_hint( - dd->cspec->msix_entries[i].irq, NULL); + irq_set_affinity_hint(pci_irq_vector(dd->pcidev, i), + NULL); free_cpumask_var(dd->cspec->msix_entries[i].mask); - free_irq(dd->cspec->msix_entries[i].irq, - dd->cspec->msix_entries[i].arg); + pci_free_irq(dd->pcidev, i, + dd->cspec->msix_entries[i].arg); } - qib_nomsix(dd); } + + /* If num_msix_entries was 0, disable the INTx IRQ */ + if (!dd->cspec->num_msix_entries) + pci_free_irq(dd->pcidev, 0, dd); + else + dd->cspec->num_msix_entries = 0; + + pci_free_irq_vectors(dd->pcidev); + /* make sure no MSIx interrupts are left pending */ intgranted = qib_read_kreg64(dd, kr_intgranted); if (intgranted) qib_write_kreg(dd, kr_intgranted, intgranted); } -static void qib_7322_free_irq(struct qib_devdata *dd) -{ - if (dd->cspec->irq) { - free_irq(dd->cspec->irq, dd); - dd->cspec->irq = 0; - } - qib_7322_nomsix(dd); -} - static void qib_setup_7322_cleanup(struct qib_devdata *dd) { int i; @@ -3329,22 +3318,20 @@ static irqreturn_t sdma_cleanup_intr(int irq, void *data) #ifdef CONFIG_INFINIBAND_QIB_DCA -static void reset_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m) +static void reset_dca_notifier(struct qib_devdata *dd, int msixnum) { - if (!m->dca) + if (!dd->cspec->msix_entries[msixnum].dca) return; - qib_devinfo(dd->pcidev, - "Disabling notifier on HCA %d irq %d\n", - dd->unit, - m->irq); - irq_set_affinity_notifier( - m->irq, - NULL); - m->notifier = NULL; + + qib_devinfo(dd->pcidev, "Disabling notifier on HCA %d irq %d\n", + dd->unit, pci_irq_vector(dd->pcidev, msixnum)); + irq_set_affinity_notifier(pci_irq_vector(dd->pcidev, msixnum), NULL); + dd->cspec->msix_entries[msixnum].notifier = NULL; } -static void setup_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m) +static void setup_dca_notifier(struct qib_devdata *dd, int msixnum) { + struct qib_msix_entry *m = &dd->cspec->msix_entries[msixnum]; struct qib_irq_notify *n; if (!m->dca) @@ -3354,7 +3341,7 @@ static void setup_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m) int ret; m->notifier = n; - n->notify.irq = m->irq; + n->notify.irq = pci_irq_vector(dd->pcidev, msixnum); n->notify.notify = qib_irq_notifier_notify; n->notify.release = qib_irq_notifier_release; n->arg = m->arg; @@ -3415,22 +3402,17 @@ static void qib_setup_7322_interrupt(struct qib_devdata *dd, int clearpend) if (!dd->cspec->num_msix_entries) { /* Try to get INTx interrupt */ try_intx: - if (!dd->pcidev->irq) { - qib_dev_err(dd, - "irq is 0, BIOS error? Interrupts won't work\n"); - goto bail; - } - ret = request_irq(dd->pcidev->irq, qib_7322intr, - IRQF_SHARED, QIB_DRV_NAME, dd); + ret = pci_request_irq(dd->pcidev, 0, qib_7322intr, NULL, dd, + QIB_DRV_NAME); if (ret) { - qib_dev_err(dd, + qib_dev_err( + dd, "Couldn't setup INTx interrupt (irq=%d): %d\n", - dd->pcidev->irq, ret); - goto bail; + pci_irq_vector(dd->pcidev, 0), ret); + return; } - dd->cspec->irq = dd->pcidev->irq; dd->cspec->main_int_mask = ~0ULL; - goto bail; + return; } /* Try to get MSIx interrupts */ @@ -3458,10 +3440,6 @@ static void qib_setup_7322_interrupt(struct qib_devdata *dd, int clearpend) #ifdef CONFIG_INFINIBAND_QIB_DCA int dca = 0; #endif - - dd->cspec->msix_entries[msixnum]. - name[sizeof(dd->cspec->msix_entries[msixnum].name) - 1] - = '\0'; if (i < ARRAY_SIZE(irq_table)) { if (irq_table[i].port) { /* skip if for a non-configured port */ @@ -3475,11 +3453,10 @@ static void qib_setup_7322_interrupt(struct qib_devdata *dd, int clearpend) #endif lsb = irq_table[i].lsb; handler = irq_table[i].handler; - snprintf(dd->cspec->msix_entries[msixnum].name, - sizeof(dd->cspec->msix_entries[msixnum].name) - - 1, - QIB_DRV_NAME "%d%s", dd->unit, - irq_table[i].name); + ret = pci_request_irq(dd->pcidev, msixnum, handler, + NULL, arg, QIB_DRV_NAME "%d%s", + dd->unit, + irq_table[i].name); } else { unsigned ctxt; @@ -3495,37 +3472,25 @@ static void qib_setup_7322_interrupt(struct qib_devdata *dd, int clearpend) #endif lsb = QIB_I_RCVAVAIL_LSB + ctxt; handler = qib_7322pintr; - snprintf(dd->cspec->msix_entries[msixnum].name, - sizeof(dd->cspec->msix_entries[msixnum].name) - - 1, - QIB_DRV_NAME "%d (kctx)", dd->unit); + ret = pci_request_irq(dd->pcidev, msixnum, handler, + NULL, arg, + QIB_DRV_NAME "%d (kctx)", + dd->unit); } - dd->cspec->msix_entries[msixnum].irq = pci_irq_vector( - dd->pcidev, msixnum); - if (dd->cspec->msix_entries[msixnum].irq < 0) { - qib_dev_err(dd, - "Couldn't get MSIx irq (vec=%d): %d\n", - msixnum, - dd->cspec->msix_entries[msixnum].irq); - qib_7322_nomsix(dd); - goto try_intx; - } - ret = request_irq(dd->cspec->msix_entries[msixnum].irq, - handler, 0, - dd->cspec->msix_entries[msixnum].name, - arg); if (ret) { /* * Shouldn't happen since the enable said we could * have as many as we are trying to setup here. */ qib_dev_err(dd, - "Couldn't setup MSIx interrupt (vec=%d, irq=%d): %d\n", - msixnum, - dd->cspec->msix_entries[msixnum].irq, - ret); - qib_7322_nomsix(dd); + "Couldn't setup MSIx interrupt (vec=%d, irq=%d): %d\n", + msixnum, + pci_irq_vector(dd->pcidev, msixnum), + ret); + qib_7322_free_irq(dd); + pci_alloc_irq_vectors(dd->pcidev, 1, 1, + PCI_IRQ_LEGACY); goto try_intx; } dd->cspec->msix_entries[msixnum].arg = arg; @@ -3559,7 +3524,7 @@ static void qib_setup_7322_interrupt(struct qib_devdata *dd, int clearpend) dd->cspec->msix_entries[msixnum].mask); } irq_set_affinity_hint( - dd->cspec->msix_entries[msixnum].irq, + pci_irq_vector(dd->pcidev, msixnum), dd->cspec->msix_entries[msixnum].mask); } msixnum++; @@ -3570,7 +3535,6 @@ static void qib_setup_7322_interrupt(struct qib_devdata *dd, int clearpend) dd->cspec->main_int_mask = mask; tasklet_init(&dd->error_tasklet, qib_error_tasklet, (unsigned long)dd); -bail:; } /** @@ -3674,8 +3638,9 @@ static int qib_do_7322_reset(struct qib_devdata *dd) /* no interrupts till re-initted */ qib_7322_set_intr_state(dd, 0); + qib_7322_free_irq(dd); + if (msix_entries) { - qib_7322_nomsix(dd); /* can be up to 512 bytes, too big for stack */ msix_vecsave = kmalloc(2 * dd->cspec->num_msix_entries * sizeof(u64), GFP_KERNEL); @@ -3765,11 +3730,11 @@ static int qib_do_7322_reset(struct qib_devdata *dd) write_7322_init_portregs(&dd->pport[i]); write_7322_initregs(dd); - if (qib_pcie_params(dd, dd->lbus_width, - &dd->cspec->num_msix_entries)) + if (qib_pcie_params(dd, dd->lbus_width, &msix_entries)) qib_dev_err(dd, "Reset failed to setup PCIe or interrupts; continuing anyway\n"); + dd->cspec->num_msix_entries = msix_entries; qib_setup_7322_interrupt(dd, 1); for (i = 0; i < dd->num_pports; ++i) { @@ -5197,8 +5162,9 @@ static int qib_7322_intr_fallback(struct qib_devdata *dd) qib_devinfo(dd->pcidev, "MSIx interrupt not detected, trying INTx interrupts\n"); - qib_7322_nomsix(dd); - qib_enable_intx(dd); + qib_7322_free_irq(dd); + if (pci_alloc_irq_vectors(dd->pcidev, 1, 1, PCI_IRQ_LEGACY) < 0) + qib_dev_err(dd, "Failed to enable INTx\n"); qib_setup_7322_interrupt(dd, 0); return 1; } diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c index d90403e31a9d..6f4cc268926c 100644 --- a/drivers/infiniband/hw/qib/qib_pcie.c +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -193,7 +193,7 @@ void qib_pcie_ddcleanup(struct qib_devdata *dd) * chip reset (the kernel PCI infrastructure doesn't yet handle that * correctly. */ -static void qib_msi_setup(struct qib_devdata *dd, int pos) +static void qib_cache_msi_info(struct qib_devdata *dd, int pos) { struct pci_dev *pdev = dd->pcidev; u16 control; @@ -208,64 +208,39 @@ static void qib_msi_setup(struct qib_devdata *dd, int pos) &dd->msi_data); } -static int qib_allocate_irqs(struct qib_devdata *dd, u32 maxvec) -{ - unsigned int flags = PCI_IRQ_LEGACY; - - /* Check our capabilities */ - if (dd->pcidev->msix_cap) { - flags |= PCI_IRQ_MSIX; - } else { - if (dd->pcidev->msi_cap) { - flags |= PCI_IRQ_MSI; - /* Get msi_lo and msi_hi */ - qib_msi_setup(dd, dd->pcidev->msi_cap); - } - } - - if (!(flags & (PCI_IRQ_MSIX | PCI_IRQ_MSI))) - qib_dev_err(dd, "No PCI MSI or MSIx capability!\n"); - - return pci_alloc_irq_vectors(dd->pcidev, 1, maxvec, flags); -} - int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent) { u16 linkstat, speed; int nvec; int maxvec; - int ret = 0; + unsigned int flags = PCI_IRQ_MSIX | PCI_IRQ_MSI; if (!pci_is_pcie(dd->pcidev)) { qib_dev_err(dd, "Can't find PCI Express capability!\n"); /* set up something... */ dd->lbus_width = 1; dd->lbus_speed = 2500; /* Gen1, 2.5GHz */ - ret = -1; + nvec = -1; goto bail; } + if (dd->flags & QIB_HAS_INTX) + flags |= PCI_IRQ_LEGACY; maxvec = (nent && *nent) ? *nent : 1; - nvec = qib_allocate_irqs(dd, maxvec); - if (nvec < 0) { - ret = nvec; + nvec = pci_alloc_irq_vectors(dd->pcidev, 1, maxvec, flags); + if (nvec < 0) goto bail; - } /* - * If nent exists, make sure to record how many vectors were allocated + * If nent exists, make sure to record how many vectors were allocated. + * If msix_enabled is false, return 0 so the fallback code works + * correctly. */ - if (nent) { - *nent = nvec; + if (nent) + *nent = !dd->pcidev->msix_enabled ? 0 : nvec; - /* - * If we requested (nent) MSIX, but msix_enabled is not set, - * pci_alloc_irq_vectors() enabled INTx. - */ - if (!dd->pcidev->msix_enabled) - qib_dev_err(dd, - "no msix vectors allocated, using INTx\n"); - } + if (dd->pcidev->msi_enabled) + qib_cache_msi_info(dd, dd->pcidev->msi_cap); pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKSTA, &linkstat); /* @@ -306,7 +281,21 @@ int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent) /* fill in string, even on errors */ snprintf(dd->lbus_info, sizeof(dd->lbus_info), "PCIe,%uMHz,x%u\n", dd->lbus_speed, dd->lbus_width); - return ret; + return nvec < 0 ? nvec : 0; +} + +/** + * qib_free_irq - Cleanup INTx and MSI interrupts + * @dd: valid pointer to qib dev data + * + * Since cleanup for INTx and MSI interrupts is trivial, have a common + * routine. + * + */ +void qib_free_irq(struct qib_devdata *dd) +{ + pci_free_irq(dd->pcidev, 0, dd); + pci_free_irq_vectors(dd->pcidev); } /* @@ -351,10 +340,10 @@ int qib_reinit_intr(struct qib_devdata *dd) dd->msi_data); ret = 1; bail: - if (!ret && (dd->flags & QIB_HAS_INTX)) { - qib_enable_intx(dd); + qib_free_irq(dd); + + if (!ret && (dd->flags & QIB_HAS_INTX)) ret = 1; - } /* and now set the pci master bit again */ pci_set_master(dd->pcidev); @@ -362,56 +351,6 @@ int qib_reinit_intr(struct qib_devdata *dd) return ret; } -/* - * Disable msi interrupt if enabled, and clear msi_lo. - * This is used primarily for the fallback to INTx, but - * is also used in reinit after reset, and during cleanup. - */ -void qib_nomsi(struct qib_devdata *dd) -{ - dd->msi_lo = 0; - pci_free_irq_vectors(dd->pcidev); -} - -/* - * Same as qib_nosmi, but for MSIx. - */ -void qib_nomsix(struct qib_devdata *dd) -{ - pci_free_irq_vectors(dd->pcidev); -} - -/* - * Similar to pci_intx(pdev, 1), except that we make sure - * msi(x) is off. - */ -void qib_enable_intx(struct qib_devdata *dd) -{ - u16 cw, new; - int pos; - struct pci_dev *pdev = dd->pcidev; - - if (pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_LEGACY) < 0) - qib_dev_err(dd, "Failed to enable INTx\n"); - - pos = pdev->msi_cap; - if (pos) { - /* then turn off MSI */ - pci_read_config_word(pdev, pos + PCI_MSI_FLAGS, &cw); - new = cw & ~PCI_MSI_FLAGS_ENABLE; - if (new != cw) - pci_write_config_word(pdev, pos + PCI_MSI_FLAGS, new); - } - pos = pdev->msix_cap; - if (pos) { - /* then turn off MSIx */ - pci_read_config_word(pdev, pos + PCI_MSIX_FLAGS, &cw); - new = cw & ~PCI_MSIX_FLAGS_ENABLE; - if (new != cw) - pci_write_config_word(pdev, pos + PCI_MSIX_FLAGS, new); - } -} - /* * These two routines are helper routines for the device reset code * to move all the pcie code out of the chip-specific driver code. From 05cb18fda926ddce299280bd86cbc9d491306f28 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Tue, 26 Sep 2017 07:00:30 -0700 Subject: [PATCH 054/296] IB/hfi1: Update HFI to use the latest PCI API The HFI PCI IRQ code uses an obsolete PCI API. Update the code to use the new PCI IRQ API and any necessary changes because of the new API. Reviewed-by: Sebastian Sanchez Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 58 +++++++++++-------------------- drivers/infiniband/hw/hfi1/hfi.h | 2 -- 2 files changed, 21 insertions(+), 39 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index ef27e2b2482d..2cbc619a1754 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -12929,7 +12929,7 @@ static void clean_up_interrupts(struct hfi1_devdata *dd) if (!me->arg) /* => no irq, no affinity */ continue; hfi1_put_irq_affinity(dd, me); - free_irq(me->irq, me->arg); + pci_free_irq(dd->pcidev, i, me->arg); } /* clean structures */ @@ -12939,7 +12939,7 @@ static void clean_up_interrupts(struct hfi1_devdata *dd) } else { /* INTx */ if (dd->requested_intx_irq) { - free_irq(dd->pcidev->irq, dd); + pci_free_irq(dd->pcidev, 0, dd); dd->requested_intx_irq = 0; } disable_intx(dd->pcidev); @@ -12998,10 +12998,8 @@ static int request_intx_irq(struct hfi1_devdata *dd) { int ret; - snprintf(dd->intx_name, sizeof(dd->intx_name), DRIVER_NAME "_%d", - dd->unit); - ret = request_irq(dd->pcidev->irq, general_interrupt, - IRQF_SHARED, dd->intx_name, dd); + ret = pci_request_irq(dd->pcidev, 0, general_interrupt, NULL, dd, + DRIVER_NAME "_%d", dd->unit); if (ret) dd_dev_err(dd, "unable to request INTx interrupt, err %d\n", ret); @@ -13044,13 +13042,14 @@ static int request_msix_irqs(struct hfi1_devdata *dd) int idx; struct hfi1_ctxtdata *rcd = NULL; struct sdma_engine *sde = NULL; + char name[MAX_NAME_SIZE]; - /* obtain the arguments to request_irq */ + /* obtain the arguments to pci_request_irq */ if (first_general <= i && i < last_general) { idx = i - first_general; handler = general_interrupt; arg = dd; - snprintf(me->name, sizeof(me->name), + snprintf(name, sizeof(name), DRIVER_NAME "_%d", dd->unit); err_info = "general"; me->type = IRQ_GENERAL; @@ -13059,7 +13058,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd) sde = &dd->per_sdma[idx]; handler = sdma_interrupt; arg = sde; - snprintf(me->name, sizeof(me->name), + snprintf(name, sizeof(name), DRIVER_NAME "_%d sdma%d", dd->unit, idx); err_info = "sdma"; remap_sdma_interrupts(dd, idx, i); @@ -13078,7 +13077,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd) handler = receive_context_interrupt; thread = receive_context_thread; arg = rcd; - snprintf(me->name, sizeof(me->name), + snprintf(name, sizeof(name), DRIVER_NAME "_%d kctxt%d", dd->unit, idx); err_info = "receive context"; @@ -13099,18 +13098,10 @@ static int request_msix_irqs(struct hfi1_devdata *dd) if (!arg) continue; /* make sure the name is terminated */ - me->name[sizeof(me->name) - 1] = 0; + name[sizeof(name) - 1] = 0; me->irq = pci_irq_vector(dd->pcidev, i); - /* - * On err return me->irq. Don't need to clear this - * because 'arg' has not been set, and cleanup will - * do the right thing. - */ - if (me->irq < 0) - return me->irq; - - ret = request_threaded_irq(me->irq, handler, thread, 0, - me->name, arg); + ret = pci_request_irq(dd->pcidev, i, handler, thread, arg, + name); if (ret) { dd_dev_err(dd, "unable to allocate %s interrupt, irq %d, index %d, err %d\n", @@ -13118,7 +13109,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd) return ret; } /* - * assign arg after request_irq call, so it will be + * assign arg after pci_request_irq call, so it will be * cleaned up */ me->arg = arg; @@ -13136,7 +13127,7 @@ void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd) int i; if (!dd->num_msix_entries) { - synchronize_irq(dd->pcidev->irq); + synchronize_irq(pci_irq_vector(dd->pcidev, 0)); return; } @@ -13157,7 +13148,7 @@ void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd) return; hfi1_put_irq_affinity(dd, me); - free_irq(me->irq, me->arg); + pci_free_irq(dd->pcidev, rcd->msix_intr, me->arg); me->arg = NULL; } @@ -13180,28 +13171,21 @@ void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd) rcd->ireg = (IS_RCVAVAIL_START + idx) / 64; rcd->imask = ((u64)1) << ((IS_RCVAVAIL_START + idx) % 64); - - snprintf(me->name, sizeof(me->name), - DRIVER_NAME "_%d kctxt%d", dd->unit, idx); - me->name[sizeof(me->name) - 1] = 0; me->type = IRQ_RCVCTXT; me->irq = pci_irq_vector(dd->pcidev, rcd->msix_intr); - if (me->irq < 0) { - dd_dev_err(dd, "vnic irq vector request (idx %d) fail %d\n", - idx, me->irq); - return; - } remap_intr(dd, IS_RCVAVAIL_START + idx, rcd->msix_intr); - ret = request_threaded_irq(me->irq, receive_context_interrupt, - receive_context_thread, 0, me->name, arg); + ret = pci_request_irq(dd->pcidev, rcd->msix_intr, + receive_context_interrupt, + receive_context_thread, arg, + DRIVER_NAME "_%d kctxt%d", dd->unit, idx); if (ret) { dd_dev_err(dd, "vnic irq request (irq %d, idx %d) fail %d\n", me->irq, idx, ret); return; } /* - * assign arg after request_irq call, so it will be + * assign arg after pci_request_irq call, so it will be * cleaned up */ me->arg = arg; @@ -13210,7 +13194,7 @@ void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd) if (ret) { dd_dev_err(dd, "unable to pin IRQ %d\n", ret); - free_irq(me->irq, me->arg); + pci_free_irq(dd->pcidev, rcd->msix_intr, me->arg); } } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 3ac9c307a285..0a64158c2e2d 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -616,7 +616,6 @@ struct hfi1_msix_entry { enum irq_type type; int irq; void *arg; - char name[MAX_NAME_SIZE]; cpumask_t mask; struct irq_affinity_notify notify; }; @@ -1183,7 +1182,6 @@ struct hfi1_devdata { /* INTx information */ u32 requested_intx_irq; /* did we request one? */ - char intx_name[MAX_NAME_SIZE]; /* INTx name */ /* general interrupt: mask of handled interrupts */ u64 gi_mask[CCE_NUM_INT_CSRS]; From aadd7020b5903e353c53b69090335e993b27d9d8 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Tue, 26 Sep 2017 07:00:37 -0700 Subject: [PATCH 055/296] IB/hfi1: Set default_desc1 just one time There is no reason to set the default descriptor flag on every SDMA engine initialization. Reviewed-by: Mike Marciniszyn Signed-off-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/sdma.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 6781bcdb10b3..e7df7794c97e 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1392,6 +1392,13 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) return ret; idle_cnt = ns_to_cclock(dd, idle_cnt); + if (idle_cnt) + dd->default_desc1 = + SDMA_DESC1_HEAD_TO_HOST_FLAG; + else + dd->default_desc1 = + SDMA_DESC1_INT_REQ_FLAG; + if (!sdma_desct_intr) sdma_desct_intr = SDMA_DESC_INTR; @@ -1436,13 +1443,6 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) sde->tail_csr = get_kctxt_csr_addr(dd, this_idx, SD(TAIL)); - if (idle_cnt) - dd->default_desc1 = - SDMA_DESC1_HEAD_TO_HOST_FLAG; - else - dd->default_desc1 = - SDMA_DESC1_INT_REQ_FLAG; - tasklet_init(&sde->sdma_hw_clean_up_task, sdma_hw_clean_up_task, (unsigned long)sde); From 156d24d7001e9b451e2dfc4c5de410b753a8de0d Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Tue, 26 Sep 2017 07:00:43 -0700 Subject: [PATCH 056/296] IB/hfi1: Remove unused link_default variable devdata->link_default is no longer variable Maintain number of holes by moving dc_shutdown Reviewed-by: Sebastian Sanchez Signed-off-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 6 ++---- drivers/infiniband/hw/hfi1/hfi.h | 6 +++--- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 2cbc619a1754..6b63201513fe 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -9956,7 +9956,7 @@ int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which) val = ppd->phy_error_threshold; break; case HFI1_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */ - val = dd->link_default; + val = HLS_DEFAULT; break; case HFI1_IB_CFG_HRTBT: /* Heartbeat off/enable/auto */ @@ -10551,7 +10551,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) orig_new_state = state; if (state == HLS_DN_DOWNDEF) - state = dd->link_default; + state = HLS_DEFAULT; /* interpret poll -> poll as a link bounce */ poll_bounce = ppd->host_link_state == HLS_DN_POLL && @@ -14895,8 +14895,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, init_vl_arb_caches(ppd); } - dd->link_default = HLS_DN_POLL; - /* * Do remaining PCIe setup and save PCIe values in dd. * Any error printing is already done by the init code. diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 0a64158c2e2d..611eebaeb10d 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -535,6 +535,8 @@ struct rvt_sge_state; #define HLS_UP (HLS_UP_INIT | HLS_UP_ARMED | HLS_UP_ACTIVE) #define HLS_DOWN ~(HLS_UP) +#define HLS_DEFAULT HLS_DN_POLL + /* use this MTU size if none other is given */ #define HFI1_DEFAULT_ACTIVE_MTU 10240 /* use this MTU size as the default maximum */ @@ -1108,8 +1110,7 @@ struct hfi1_devdata { u16 rcvegrbufsize_shift; /* both sides of the PCIe link are gen3 capable */ u8 link_gen3_capable; - /* default link down value (poll/sleep) */ - u8 link_default; + u8 dc_shutdown; /* localbus width (1, 2,4,8,16,32) from config space */ u32 lbus_width; /* localbus speed in MHz */ @@ -1293,7 +1294,6 @@ struct hfi1_devdata { u8 oui1; u8 oui2; u8 oui3; - u8 dc_shutdown; /* Timer and counter used to detect RcvBufOvflCnt changes */ struct timer_list rcverr_timer; From 6fee036916f3efbd840631f2ea4ac88950c1592e Mon Sep 17 00:00:00 2001 From: Jan Sokolowski Date: Tue, 26 Sep 2017 07:00:50 -0700 Subject: [PATCH 057/296] IB/hfi1: Remove unnecessary error messages on alloc failures Per-cpu variables int_counter, rcv_limit, and send_schedule print unnecessary error messages on failed allocations. Remove the error messages. Reviewed-by: Harish Chegondi Reviewed-by: Dennis Dalessandro Signed-off-by: Jan Sokolowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/init.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index fba77001c3a7..3e686d2edcc3 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1272,24 +1272,18 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) dd->int_counter = alloc_percpu(u64); if (!dd->int_counter) { ret = -ENOMEM; - hfi1_early_err(&pdev->dev, - "Could not allocate per-cpu int_counter\n"); goto bail; } dd->rcv_limit = alloc_percpu(u64); if (!dd->rcv_limit) { ret = -ENOMEM; - hfi1_early_err(&pdev->dev, - "Could not allocate per-cpu rcv_limit\n"); goto bail; } dd->send_schedule = alloc_percpu(u64); if (!dd->send_schedule) { ret = -ENOMEM; - hfi1_early_err(&pdev->dev, - "Could not allocate per-cpu int_counter\n"); goto bail; } From 21e5acc06403f6ce48ef98ad9d081cdec2944d60 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Tue, 26 Sep 2017 07:00:56 -0700 Subject: [PATCH 058/296] IB/hfi1: Inline common calculation Calculating the offset to a context is done several times throughout the code. Create a common inlined function for doing this calculation. Reviewed-by: Ira Weiny Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 21 ++++++++------------- drivers/infiniband/hw/hfi1/hfi.h | 6 ++++++ drivers/infiniband/hw/hfi1/user_exp_rcv.c | 6 ++---- 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 2bc89260235a..668c9bc19505 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -595,9 +595,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) * Use the page where this context's flags are. User level * knows where it's own bitmap is within the page. */ - memaddr = (unsigned long)(dd->events + - ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * - HFI1_MAX_SHARED_CTXTS)) & PAGE_MASK; + memaddr = (unsigned long) + (dd->events + uctxt_offset(uctxt)) & PAGE_MASK; memlen = PAGE_SIZE; /* * v3.7 removes VM_RESERVED but the effect is kept by @@ -779,8 +778,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) * Clear any left over, unhandled events so the next process that * gets this context doesn't get confused. */ - ev = dd->events + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * - HFI1_MAX_SHARED_CTXTS) + fdata->subctxt; + ev = dd->events + uctxt_offset(uctxt) + fdata->subctxt; *ev = 0; spin_lock_irqsave(&dd->uctxt_lock, flags); @@ -1389,9 +1387,8 @@ static int get_base_info(struct hfi1_filedata *fd, void __user *ubase, */ binfo.user_regbase = HFI1_MMAP_TOKEN(UREGS, uctxt->ctxt, fd->subctxt, 0); - offset = offset_in_page((((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * - HFI1_MAX_SHARED_CTXTS) + fd->subctxt) * - sizeof(*dd->events)); + offset = offset_in_page((uctxt_offset(uctxt) + fd->subctxt) * + sizeof(*dd->events)); binfo.events_bufbase = HFI1_MMAP_TOKEN(EVENTS, uctxt->ctxt, fd->subctxt, offset); @@ -1482,14 +1479,13 @@ int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit) ctxt++) { uctxt = hfi1_rcd_get_by_index(dd, ctxt); if (uctxt) { - unsigned long *evs = dd->events + - (uctxt->ctxt - dd->first_dyn_alloc_ctxt) * - HFI1_MAX_SHARED_CTXTS; + unsigned long *evs; int i; /* * subctxt_cnt is 0 if not shared, so do base * separately, first, then remaining subctxt, if any */ + evs = dd->events + uctxt_offset(uctxt); set_bit(evtbit, evs); for (i = 1; i < uctxt->subctxt_cnt; i++) set_bit(evtbit, evs + i); @@ -1555,8 +1551,7 @@ static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt, if (!dd->events) return 0; - evs = dd->events + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * - HFI1_MAX_SHARED_CTXTS) + subctxt; + evs = dd->events + uctxt_offset(uctxt) + subctxt; for (i = 0; i <= _HFI1_MAX_EVENT_BIT; i++) { if (!test_bit(i, &events)) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 611eebaeb10d..fc1ee8d7ca0d 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1374,6 +1374,12 @@ struct hfi1_devdata *hfi1_lookup(int unit); extern u32 hfi1_cpulist_count; extern unsigned long *hfi1_cpulist; +static inline unsigned long uctxt_offset(struct hfi1_ctxtdata *uctxt) +{ + return (uctxt->ctxt - uctxt->dd->first_dyn_alloc_ctxt) * + HFI1_MAX_SHARED_CTXTS; +} + int hfi1_init(struct hfi1_devdata *dd, int reinit); int hfi1_count_active_units(void); diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 6f6c14df383e..ff9ad69dc0a7 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -542,8 +542,7 @@ int hfi1_user_exp_rcv_invalid(struct hfi1_filedata *fd, { struct hfi1_ctxtdata *uctxt = fd->uctxt; unsigned long *ev = uctxt->dd->events + - (((uctxt->ctxt - uctxt->dd->first_dyn_alloc_ctxt) * - HFI1_MAX_SHARED_CTXTS) + fd->subctxt); + (uctxt_offset(uctxt) + fd->subctxt); u32 *array; int ret = 0; @@ -942,8 +941,7 @@ static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode) * process in question. */ ev = uctxt->dd->events + - (((uctxt->ctxt - uctxt->dd->first_dyn_alloc_ctxt) * - HFI1_MAX_SHARED_CTXTS) + fdata->subctxt); + (uctxt_offset(uctxt) + fdata->subctxt); set_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev); } fdata->invalid_tid_idx++; From 6fab2a88f737da0858dd22ab5155ff3f6c89a166 Mon Sep 17 00:00:00 2001 From: Jan Sokolowski Date: Tue, 26 Sep 2017 07:01:02 -0700 Subject: [PATCH 059/296] IB/hfi1: Remove unused hfi1_cpulist variables Following variables: hfi1_cpulist and hfi1_cpulist_count are unused. Remove them. Reviewed-by: Harish Chegondi Reviewed-by: Jakub Byczkowski Signed-off-by: Jan Sokolowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/hfi.h | 2 -- drivers/infiniband/hw/hfi1/init.c | 16 ---------------- 2 files changed, 18 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index fc1ee8d7ca0d..1e1140e7cbcc 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1371,8 +1371,6 @@ struct hfi1_filedata { extern struct list_head hfi1_dev_list; extern spinlock_t hfi1_devs_lock; struct hfi1_devdata *hfi1_lookup(int unit); -extern u32 hfi1_cpulist_count; -extern unsigned long *hfi1_cpulist; static inline unsigned long uctxt_offset(struct hfi1_ctxtdata *uctxt) { diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 3e686d2edcc3..04a383b8b0db 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -123,8 +123,6 @@ MODULE_PARM_DESC(user_credit_return_threshold, "Credit return threshold for user static inline u64 encode_rcv_header_entry_size(u16 size); static struct idr hfi1_unit_table; -u32 hfi1_cpulist_count; -unsigned long *hfi1_cpulist; static int hfi1_create_kctxt(struct hfi1_devdata *dd, struct hfi1_pportdata *ppd) @@ -1287,18 +1285,6 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) goto bail; } - if (!hfi1_cpulist_count) { - u32 count = num_online_cpus(); - - hfi1_cpulist = kcalloc(BITS_TO_LONGS(count), sizeof(long), - GFP_KERNEL); - if (hfi1_cpulist) - hfi1_cpulist_count = count; - else - hfi1_early_err( - &pdev->dev, - "Could not alloc cpulist info, cpu affinity might be wrong\n"); - } kobject_init(&dd->kobj, &hfi1_devdata_type); return dd; @@ -1471,8 +1457,6 @@ static void __exit hfi1_mod_cleanup(void) node_affinity_destroy(); hfi1_wss_exit(); hfi1_dbg_exit(); - hfi1_cpulist_count = 0; - kfree(hfi1_cpulist); idr_destroy(&hfi1_unit_table); dispose_firmware(); /* asymmetric with obtain_firmware() */ From 442e55661db1d5833b0eb5b85f78a845ace7a7cc Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Tue, 26 Sep 2017 07:01:09 -0700 Subject: [PATCH 060/296] IB/hfi1: Extend input hdr tracing for packet type The etype field from the rhf can have more values than just IB and BYPASS. Extend the current tracing to report a symbolic for the etype field for non-bypass packets. Bypass packets will continue to report the l2. As part of this fix the etype and the l2 are added to the tracing struct and are available for trigger and filter operations. Fixes: Commit 863cf89d472f ("IB/hfi1: Add 16B trace support") Reviewed-by: Don Hiatt Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/trace.c | 23 ++++++------ drivers/infiniband/hw/hfi1/trace.h | 10 ++++++ drivers/infiniband/hw/hfi1/trace_ibhdrs.h | 44 ++++++++++++----------- drivers/infiniband/hw/hfi1/trace_rx.h | 11 +----- 4 files changed, 44 insertions(+), 44 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c index 9938bb983ce6..ce5a43f1da77 100644 --- a/drivers/infiniband/hw/hfi1/trace.c +++ b/drivers/infiniband/hw/hfi1/trace.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -91,12 +91,17 @@ u8 hfi1_trace_opa_hdr_len(struct hfi1_opa_header *opa_hdr) return __get_16b_hdr_len(&opa_hdr->opah); } -const char *hfi1_trace_get_packet_str(struct hfi1_packet *packet) +const char *hfi1_trace_get_packet_l4_str(u8 l4) { - if (packet->etype != RHF_RCV_TYPE_BYPASS) - return "IB"; + if (l4) + return "16B"; + else + return "9B"; +} - switch (hfi1_16B_get_l2(packet->hdr)) { +const char *hfi1_trace_get_packet_l2_str(u8 l2) +{ + switch (l2) { case 0: return "0"; case 1: @@ -109,14 +114,6 @@ const char *hfi1_trace_get_packet_str(struct hfi1_packet *packet) return ""; } -const char *hfi1_trace_get_packet_type_str(u8 l4) -{ - if (l4) - return "16B"; - else - return "9B"; -} - #define IMM_PRN "imm:%d" #define RETH_PRN "reth vaddr:0x%.16llx rkey:0x%.8x dlen:0x%.8x" #define AETH_PRN "aeth syn:0x%.2x %s msn:0x%.8x" diff --git a/drivers/infiniband/hw/hfi1/trace.h b/drivers/infiniband/hw/hfi1/trace.h index af50c0793450..8540463ef3f7 100644 --- a/drivers/infiniband/hw/hfi1/trace.h +++ b/drivers/infiniband/hw/hfi1/trace.h @@ -44,6 +44,16 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ + +#define packettype_name(etype) { RHF_RCV_TYPE_##etype, #etype } +#define show_packettype(etype) \ +__print_symbolic(etype, \ + packettype_name(EXPECTED), \ + packettype_name(EAGER), \ + packettype_name(IB), \ + packettype_name(ERROR), \ + packettype_name(BYPASS)) + #include "trace_dbg.h" #include "trace_misc.h" #include "trace_ctxts.h" diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h index 6721f84dafa5..6dd0d639f27d 100644 --- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h +++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -99,8 +99,7 @@ u8 ibhdr_exhdr_len(struct ib_header *hdr); const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs); u8 hfi1_trace_opa_hdr_len(struct hfi1_opa_header *opah); u8 hfi1_trace_packet_hdr_len(struct hfi1_packet *packet); -const char *hfi1_trace_get_packet_type_str(u8 l4); -const char *hfi1_trace_get_packet_str(struct hfi1_packet *packet); +const char *hfi1_trace_get_packet_l4_str(u8 l4); void hfi1_trace_parse_9b_bth(struct ib_other_headers *ohdr, u8 *ack, u8 *becn, u8 *fecn, u8 *mig, u8 *se, u8 *pad, u8 *opcode, u8 *tver, @@ -129,6 +128,8 @@ const char *hfi1_trace_fmt_bth(struct trace_seq *p, bool bypass, u8 se, u8 pad, u8 opcode, const char *opname, u8 tver, u16 pkey, u32 psn, u32 qpn); +const char *hfi1_trace_get_packet_l2_str(u8 l2); + #define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs) #define lrh_name(lrh) { HFI1_##lrh, #lrh } @@ -136,8 +137,6 @@ const char *hfi1_trace_fmt_bth(struct trace_seq *p, bool bypass, __print_symbolic(lrh, \ lrh_name(LRH_BTH), \ lrh_name(LRH_GRH)) -#define PKT_ENTRY(pkt) __string(ptype, hfi1_trace_get_packet_str(packet)) -#define PKT_ASSIGN(pkt) __assign_str(ptype, hfi1_trace_get_packet_str(packet)) DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, TP_PROTO(struct hfi1_devdata *dd, @@ -146,12 +145,12 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, TP_ARGS(dd, packet, sc5), TP_STRUCT__entry( DD_DEV_ENTRY(dd) - PKT_ENTRY(packet) - __field(bool, bypass) + __field(u8, etype) __field(u8, ack) __field(u8, age) __field(u8, becn) __field(u8, fecn) + __field(u8, l2) __field(u8, l4) __field(u8, lnh) __field(u8, lver) @@ -176,10 +175,10 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, ), TP_fast_assign( DD_DEV_ASSIGN(dd); - PKT_ASSIGN(packet); - if (packet->etype == RHF_RCV_TYPE_BYPASS) { - __entry->bypass = true; + __entry->etype = packet->etype; + __entry->l2 = hfi1_16B_get_l2(packet->hdr); + if (__entry->etype == RHF_RCV_TYPE_BYPASS) { hfi1_trace_parse_16b_hdr(packet->hdr, &__entry->age, &__entry->becn, @@ -203,7 +202,6 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, &__entry->psn, &__entry->qpn); } else { - __entry->bypass = false; hfi1_trace_parse_9b_hdr(packet->hdr, sc5, &__entry->lnh, &__entry->lver, @@ -233,9 +231,13 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, ), TP_printk("[%s] (%s) %s %s hlen:%d %s", __get_str(dev), - __get_str(ptype), + __entry->etype != RHF_RCV_TYPE_BYPASS ? + show_packettype(__entry->etype) : + hfi1_trace_get_packet_l2_str( + __entry->l2), hfi1_trace_fmt_lrh(p, - __entry->bypass, + __entry->etype == + RHF_RCV_TYPE_BYPASS, __entry->age, __entry->becn, __entry->fecn, @@ -252,7 +254,8 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, __entry->dlid, __entry->slid), hfi1_trace_fmt_bth(p, - __entry->bypass, + __entry->etype == + RHF_RCV_TYPE_BYPASS, __entry->ack, __entry->becn, __entry->fecn, @@ -284,7 +287,7 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, TP_ARGS(dd, opah, sc5), TP_STRUCT__entry( DD_DEV_ENTRY(dd) - __field(bool, bypass) + __field(u8, hdr_type) __field(u8, ack) __field(u8, age) __field(u8, becn) @@ -316,8 +319,8 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, DD_DEV_ASSIGN(dd); - if (opah->hdr_type) { - __entry->bypass = true; + __entry->hdr_type = opah->hdr_type; + if (__entry->hdr_type) { hfi1_trace_parse_16b_hdr(&opah->opah, &__entry->age, &__entry->becn, @@ -345,7 +348,6 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, &__entry->psn, &__entry->qpn); } else { - __entry->bypass = false; hfi1_trace_parse_9b_hdr(&opah->ibh, sc5, &__entry->lnh, &__entry->lver, @@ -378,9 +380,9 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, ), TP_printk("[%s] (%s) %s %s hlen:%d %s", __get_str(dev), - hfi1_trace_get_packet_type_str(__entry->l4), + hfi1_trace_get_packet_l4_str(__entry->l4), hfi1_trace_fmt_lrh(p, - __entry->bypass, + !!__entry->hdr_type, __entry->age, __entry->becn, __entry->fecn, @@ -397,7 +399,7 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, __entry->dlid, __entry->slid), hfi1_trace_fmt_bth(p, - __entry->bypass, + !!__entry->hdr_type, __entry->ack, __entry->becn, __entry->fecn, diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h index f9909d240dcc..4d487fee105d 100644 --- a/drivers/infiniband/hw/hfi1/trace_rx.h +++ b/drivers/infiniband/hw/hfi1/trace_rx.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -62,15 +62,6 @@ __print_symbolic(type, \ #undef TRACE_SYSTEM #define TRACE_SYSTEM hfi1_rx -#define packettype_name(etype) { RHF_RCV_TYPE_##etype, #etype } -#define show_packettype(etype) \ -__print_symbolic(etype, \ - packettype_name(EXPECTED), \ - packettype_name(EAGER), \ - packettype_name(IB), \ - packettype_name(ERROR), \ - packettype_name(BYPASS)) - TRACE_EVENT(hfi1_rcvhdr, TP_PROTO(struct hfi1_devdata *dd, u32 ctxt, From d59075ad1e091cdb663974b10c8bac5491acc356 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Tue, 26 Sep 2017 07:01:16 -0700 Subject: [PATCH 061/296] IB/hfi1: Add a safe wrapper for _rcd_get_by_index hfi1_rcd_get_by_index assumes that the given index is in the correct range. In most cases this is correct because the index is bounded by a loop. For these cases, adding a range check to the function is redundant. For the use case that is not bounded by the loop range, a _safe wrapper function is needed to validate the index before accessing the rcd array. Add a _safe wrapper to _get_by_index to validate the index range. Update appropriate call sites with the new _safe function. Reviewed-by: Ira Weiny Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 2 +- drivers/infiniband/hw/hfi1/debugfs.c | 4 ++-- drivers/infiniband/hw/hfi1/driver.c | 4 ++-- drivers/infiniband/hw/hfi1/hfi.h | 2 ++ drivers/infiniband/hw/hfi1/init.c | 21 +++++++++++++++++++++ 5 files changed, 28 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 6b63201513fe..091c735f5c9b 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -13065,7 +13065,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd) me->type = IRQ_SDMA; } else if (first_rx <= i && i < last_rx) { idx = i - first_rx; - rcd = hfi1_rcd_get_by_index(dd, idx); + rcd = hfi1_rcd_get_by_index_safe(dd, idx); if (rcd) { /* * Set the interrupt register and mask for this diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index 36ae1fd86502..24128f4e5748 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -243,7 +243,7 @@ static int _ctx_stats_seq_show(struct seq_file *s, void *v) spos = v; i = *spos; - rcd = hfi1_rcd_get_by_index(dd, i); + rcd = hfi1_rcd_get_by_index_safe(dd, i); if (!rcd) return SEQ_SKIP; @@ -402,7 +402,7 @@ static int _rcds_seq_show(struct seq_file *s, void *v) loff_t *spos = v; loff_t i = *spos; - rcd = hfi1_rcd_get_by_index(dd, i); + rcd = hfi1_rcd_get_by_index_safe(dd, i); if (rcd) seqfile_dump_rcd(s, rcd); hfi1_rcd_put(rcd); diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 7372cc00cb2d..85a425ef740e 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -866,7 +866,7 @@ static inline void set_nodma_rtail(struct hfi1_devdata *dd, u16 ctxt) * interrupt handler for all statically allocated kernel contexts. */ if (ctxt >= dd->first_dyn_alloc_ctxt) { - rcd = hfi1_rcd_get_by_index(dd, ctxt); + rcd = hfi1_rcd_get_by_index_safe(dd, ctxt); if (rcd) { rcd->do_interrupt = &handle_receive_interrupt_nodma_rtail; @@ -895,7 +895,7 @@ static inline void set_dma_rtail(struct hfi1_devdata *dd, u16 ctxt) * interrupt handler for all statically allocated kernel contexts. */ if (ctxt >= dd->first_dyn_alloc_ctxt) { - rcd = hfi1_rcd_get_by_index(dd, ctxt); + rcd = hfi1_rcd_get_by_index_safe(dd, ctxt); if (rcd) { rcd->do_interrupt = &handle_receive_interrupt_dma_rtail; diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 1e1140e7cbcc..cb74ae8b0991 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1398,6 +1398,8 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd); int hfi1_rcd_put(struct hfi1_ctxtdata *rcd); void hfi1_rcd_get(struct hfi1_ctxtdata *rcd); +struct hfi1_ctxtdata *hfi1_rcd_get_by_index_safe(struct hfi1_devdata *dd, + u16 ctxt); struct hfi1_ctxtdata *hfi1_rcd_get_by_index(struct hfi1_devdata *dd, u16 ctxt); int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread); int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread); diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 04a383b8b0db..36c8a4ae8278 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -283,6 +283,27 @@ static int allocate_rcd_index(struct hfi1_devdata *dd, return 0; } +/** + * hfi1_rcd_get_by_index_safe - validate the ctxt index before accessing the + * array + * @dd: pointer to a valid devdata structure + * @ctxt: the index of an possilbe rcd + * + * This is a wrapper for hfi1_rcd_get_by_index() to validate that the given + * ctxt index is valid. + * + * The caller is responsible for making the _put(). + * + */ +struct hfi1_ctxtdata *hfi1_rcd_get_by_index_safe(struct hfi1_devdata *dd, + u16 ctxt) +{ + if (ctxt < dd->num_rcv_contexts) + return hfi1_rcd_get_by_index(dd, ctxt); + + return NULL; +} + /** * hfi1_rcd_get_by_index * @dd: pointer to a valid devdata structure From ddebe9810cb0d5f28e101125bfa249a826bc0536 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Tue, 26 Sep 2017 07:03:50 -0700 Subject: [PATCH 062/296] IB/hfi1: Refactor assign_ctxt() IOCTL The IOCTL is a bit unwieldy. Refactor to a common pattern. Refactor the assign_ctxt() IOCTL. Reviewed-by: Ira Weiny Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 36 ++++++++++++++------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 668c9bc19505..41f9cf98c81a 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -78,7 +78,7 @@ static unsigned int hfi1_poll(struct file *fp, struct poll_table_struct *pt); static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma); static u64 kvirt_to_phys(void *addr); -static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo); +static int assign_ctxt(struct hfi1_filedata *fd, unsigned long arg, u32 len); static void init_subctxts(struct hfi1_ctxtdata *uctxt, const struct hfi1_user_info *uinfo); static int init_user_ctxt(struct hfi1_filedata *fd, @@ -221,7 +221,6 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, { struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; - struct hfi1_user_info uinfo; struct hfi1_tid_info tinfo; int ret = 0; unsigned long addr; @@ -237,16 +236,9 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, switch (cmd) { case HFI1_IOCTL_ASSIGN_CTXT: - if (uctxt) - return -EINVAL; - - if (copy_from_user(&uinfo, - (struct hfi1_user_info __user *)arg, - sizeof(uinfo))) - return -EFAULT; - - ret = assign_ctxt(fd, &uinfo); + ret = assign_ctxt(fd, arg, _IOC_SIZE(cmd)); break; + case HFI1_IOCTL_CTXT_INFO: ret = get_ctxt_info(fd, (void __user *)(unsigned long)arg, sizeof(struct hfi1_ctxt_info)); @@ -889,20 +881,30 @@ static int complete_subctxt(struct hfi1_filedata *fd) return ret; } -static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) +static int assign_ctxt(struct hfi1_filedata *fd, unsigned long arg, u32 len) { int ret; unsigned int swmajor, swminor; struct hfi1_ctxtdata *uctxt = NULL; + struct hfi1_user_info uinfo; - swmajor = uinfo->userversion >> 16; + if (fd->uctxt) + return -EINVAL; + + if (sizeof(uinfo) != len) + return -EINVAL; + + if (copy_from_user(&uinfo, (void __user *)arg, sizeof(uinfo))) + return -EFAULT; + + swmajor = uinfo.userversion >> 16; if (swmajor != HFI1_USER_SWMAJOR) return -ENODEV; - if (uinfo->subctxt_cnt > HFI1_MAX_SHARED_CTXTS) + if (uinfo.subctxt_cnt > HFI1_MAX_SHARED_CTXTS) return -EINVAL; - swminor = uinfo->userversion & 0xffff; + swminor = uinfo.userversion & 0xffff; /* * Acquire the mutex to protect against multiple creations of what @@ -913,14 +915,14 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) * Get a sub context if available (fd->uctxt will be set). * ret < 0 error, 0 no context, 1 sub-context found */ - ret = find_sub_ctxt(fd, uinfo); + ret = find_sub_ctxt(fd, &uinfo); /* * Allocate a base context if context sharing is not required or a * sub context wasn't found. */ if (!ret) - ret = allocate_ctxt(fd, fd->dd, uinfo, &uctxt); + ret = allocate_ctxt(fd, fd->dd, &uinfo, &uctxt); mutex_unlock(&hfi1_mutex); From ff1a5582c9516b324394fbaaf24772d1b6bd9c15 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Tue, 26 Sep 2017 07:03:57 -0700 Subject: [PATCH 063/296] IB/hfi1: Refactor get_ctxt_info The IOCTL is a bit unwieldy. Refactor to a common pattern. Reviewed-by: Ira Weiny Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 41f9cf98c81a..2c373fc885b2 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -84,8 +84,7 @@ static void init_subctxts(struct hfi1_ctxtdata *uctxt, static int init_user_ctxt(struct hfi1_filedata *fd, struct hfi1_ctxtdata *uctxt); static void user_init(struct hfi1_ctxtdata *uctxt); -static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase, - __u32 len); +static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len); static int get_base_info(struct hfi1_filedata *fd, void __user *ubase, __u32 len); static int setup_base_ctxt(struct hfi1_filedata *fd, @@ -240,9 +239,9 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, break; case HFI1_IOCTL_CTXT_INFO: - ret = get_ctxt_info(fd, (void __user *)(unsigned long)arg, - sizeof(struct hfi1_ctxt_info)); + ret = get_ctxt_info(fd, arg, _IOC_SIZE(cmd)); break; + case HFI1_IOCTL_USER_INFO: ret = get_base_info(fd, (void __user *)(unsigned long)arg, sizeof(struct hfi1_base_info)); @@ -1237,12 +1236,13 @@ static void user_init(struct hfi1_ctxtdata *uctxt) hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt); } -static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase, - __u32 len) +static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len) { struct hfi1_ctxt_info cinfo; struct hfi1_ctxtdata *uctxt = fd->uctxt; - int ret = 0; + + if (sizeof(cinfo) != len) + return -EINVAL; memset(&cinfo, 0, sizeof(cinfo)); cinfo.runtime_flags = (((uctxt->flags >> HFI1_CAP_MISC_SHIFT) & @@ -1272,10 +1272,10 @@ static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase, cinfo.rcvegr_size = uctxt->egrbufs.rcvtid_size; trace_hfi1_ctxt_info(uctxt->dd, uctxt->ctxt, fd->subctxt, cinfo); - if (copy_to_user(ubase, &cinfo, sizeof(cinfo))) - ret = -EFAULT; + if (copy_to_user((void __user *)arg, &cinfo, len)) + return -EFAULT; - return ret; + return 0; } static int init_user_ctxt(struct hfi1_filedata *fd, From 033c16d71fdcd99a04eb670713124f88aa2a6922 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Tue, 26 Sep 2017 07:04:03 -0700 Subject: [PATCH 064/296] IB/hfi1: Fix parenthesis alignment issues In preparation to refactoring get_base_info(), cleanup some checkpatch issues. Reviewed-by: Ira Weiny Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 2c373fc885b2..f0af5709ff85 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -1382,34 +1382,34 @@ static int get_base_info(struct hfi1_filedata *fd, void __user *ubase, fd->subctxt, uctxt->egrbufs.rcvtids[0].dma); binfo.sdma_comp_bufbase = HFI1_MMAP_TOKEN(SDMA_COMP, uctxt->ctxt, - fd->subctxt, 0); + fd->subctxt, 0); /* * user regs are at * (RXE_PER_CONTEXT_USER + (ctxt * RXE_PER_CONTEXT_SIZE)) */ binfo.user_regbase = HFI1_MMAP_TOKEN(UREGS, uctxt->ctxt, - fd->subctxt, 0); + fd->subctxt, 0); offset = offset_in_page((uctxt_offset(uctxt) + fd->subctxt) * sizeof(*dd->events)); binfo.events_bufbase = HFI1_MMAP_TOKEN(EVENTS, uctxt->ctxt, - fd->subctxt, - offset); + fd->subctxt, + offset); binfo.status_bufbase = HFI1_MMAP_TOKEN(STATUS, uctxt->ctxt, - fd->subctxt, - dd->status); + fd->subctxt, + dd->status); if (HFI1_CAP_IS_USET(DMA_RTAIL)) binfo.rcvhdrtail_base = HFI1_MMAP_TOKEN(RTAIL, uctxt->ctxt, - fd->subctxt, 0); + fd->subctxt, 0); if (uctxt->subctxt_cnt) { binfo.subctxt_uregbase = HFI1_MMAP_TOKEN(SUBCTXT_UREGS, - uctxt->ctxt, - fd->subctxt, 0); + uctxt->ctxt, + fd->subctxt, 0); binfo.subctxt_rcvhdrbuf = HFI1_MMAP_TOKEN(SUBCTXT_RCV_HDRQ, - uctxt->ctxt, - fd->subctxt, 0); + uctxt->ctxt, + fd->subctxt, 0); binfo.subctxt_rcvegrbuf = HFI1_MMAP_TOKEN(SUBCTXT_EGRBUF, - uctxt->ctxt, - fd->subctxt, 0); + uctxt->ctxt, + fd->subctxt, 0); } sz = (len < sizeof(binfo)) ? len : sizeof(binfo); if (copy_to_user(ubase, &binfo, sz)) From 45afb32453692704de6c32acdd3c2a9a7ec22139 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Tue, 26 Sep 2017 07:04:10 -0700 Subject: [PATCH 065/296] IB/hfi1: Refactor get_base_info The IOCTL is a bit unwieldy. Refactor to a common pattern. Reviewed-by: Ira Weiny Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index f0af5709ff85..8618a3b240ec 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -85,8 +85,7 @@ static int init_user_ctxt(struct hfi1_filedata *fd, struct hfi1_ctxtdata *uctxt); static void user_init(struct hfi1_ctxtdata *uctxt); static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len); -static int get_base_info(struct hfi1_filedata *fd, void __user *ubase, - __u32 len); +static int get_base_info(struct hfi1_filedata *fd, unsigned long arg, u32 len); static int setup_base_ctxt(struct hfi1_filedata *fd, struct hfi1_ctxtdata *uctxt); static int setup_subctxt(struct hfi1_ctxtdata *uctxt); @@ -243,9 +242,9 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, break; case HFI1_IOCTL_USER_INFO: - ret = get_base_info(fd, (void __user *)(unsigned long)arg, - sizeof(struct hfi1_base_info)); + ret = get_base_info(fd, arg, _IOC_SIZE(cmd)); break; + case HFI1_IOCTL_CREDIT_UPD: if (uctxt) sc_return_credits(uctxt->sc); @@ -1341,18 +1340,18 @@ static int setup_base_ctxt(struct hfi1_filedata *fd, return ret; } -static int get_base_info(struct hfi1_filedata *fd, void __user *ubase, - __u32 len) +static int get_base_info(struct hfi1_filedata *fd, unsigned long arg, u32 len) { struct hfi1_base_info binfo; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; - ssize_t sz; unsigned offset; - int ret = 0; trace_hfi1_uctxtdata(uctxt->dd, uctxt, fd->subctxt); + if (sizeof(binfo) != len) + return -EINVAL; + memset(&binfo, 0, sizeof(binfo)); binfo.hw_version = dd->revision; binfo.sw_version = HFI1_KERN_SWVERSION; @@ -1411,10 +1410,11 @@ static int get_base_info(struct hfi1_filedata *fd, void __user *ubase, uctxt->ctxt, fd->subctxt, 0); } - sz = (len < sizeof(binfo)) ? len : sizeof(binfo); - if (copy_to_user(ubase, &binfo, sz)) - ret = -EFAULT; - return ret; + + if (copy_to_user((void __user *)arg, &binfo, len)) + return -EFAULT; + + return 0; } static unsigned int poll_urgent(struct file *fp, From f404ca4c7ea8e650ba09ba87c71c7a89c865d5be Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Tue, 26 Sep 2017 07:04:16 -0700 Subject: [PATCH 066/296] IB/hfi1: Refactor hfi_user_exp_rcv_setup() IOCTL The IOCTL is a bit unwieldy. Refactor to a common pattern. Refactor the _TID_UPDATE IOCTL. Reviewed-by: Ira Weiny Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 66 ++++++++++++++++++--------- 1 file changed, 45 insertions(+), 21 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 8618a3b240ec..38630a686e3e 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -86,6 +86,8 @@ static int init_user_ctxt(struct hfi1_filedata *fd, static void user_init(struct hfi1_ctxtdata *uctxt); static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len); static int get_base_info(struct hfi1_filedata *fd, unsigned long arg, u32 len); +static int user_exp_rcv_setup(struct hfi1_filedata *fd, unsigned long arg, + u32 len); static int setup_base_ctxt(struct hfi1_filedata *fd, struct hfi1_ctxtdata *uctxt); static int setup_subctxt(struct hfi1_ctxtdata *uctxt); @@ -251,27 +253,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, break; case HFI1_IOCTL_TID_UPDATE: - if (copy_from_user(&tinfo, - (struct hfi11_tid_info __user *)arg, - sizeof(tinfo))) - return -EFAULT; - - ret = hfi1_user_exp_rcv_setup(fd, &tinfo); - if (!ret) { - /* - * Copy the number of tidlist entries we used - * and the length of the buffer we registered. - */ - addr = arg + offsetof(struct hfi1_tid_info, tidcnt); - if (copy_to_user((void __user *)addr, &tinfo.tidcnt, - sizeof(tinfo.tidcnt))) - return -EFAULT; - - addr = arg + offsetof(struct hfi1_tid_info, length); - if (copy_to_user((void __user *)addr, &tinfo.length, - sizeof(tinfo.length))) - ret = -EFAULT; - } + ret = user_exp_rcv_setup(fd, arg, _IOC_SIZE(cmd)); break; case HFI1_IOCTL_TID_FREE: @@ -1417,6 +1399,48 @@ static int get_base_info(struct hfi1_filedata *fd, unsigned long arg, u32 len) return 0; } +/** + * user_exp_rcv_setup - Set up the given tid rcv list + * @fd: file data of the current driver instance + * @arg: ioctl argumnent for user space information + * @len: length of data structure associated with ioctl command + * + * Wrapper to validate ioctl information before doing _rcv_setup. + * + */ +static int user_exp_rcv_setup(struct hfi1_filedata *fd, unsigned long arg, + u32 len) +{ + int ret; + unsigned long addr; + struct hfi1_tid_info tinfo; + + if (sizeof(tinfo) != len) + return -EINVAL; + + if (copy_from_user(&tinfo, (void __user *)arg, (sizeof(tinfo)))) + return -EFAULT; + + ret = hfi1_user_exp_rcv_setup(fd, &tinfo); + if (!ret) { + /* + * Copy the number of tidlist entries we used + * and the length of the buffer we registered. + */ + addr = arg + offsetof(struct hfi1_tid_info, tidcnt); + if (copy_to_user((void __user *)addr, &tinfo.tidcnt, + sizeof(tinfo.tidcnt))) + return -EFAULT; + + addr = arg + offsetof(struct hfi1_tid_info, length); + if (copy_to_user((void __user *)addr, &tinfo.length, + sizeof(tinfo.length))) + ret = -EFAULT; + } + + return ret; +} + static unsigned int poll_urgent(struct file *fp, struct poll_table_struct *pt) { From 3920eef7a7e0cd90364944e6b3b9be3f9e73e4cc Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Tue, 26 Sep 2017 07:04:22 -0700 Subject: [PATCH 067/296] IB/hfi1: Refactor hfi_user_exp_rcv_clear() IOCTLs The IOCTL is a bit unwieldy. Refactor to a common pattern. Refactor the _TID_FREE IOCTL. Reviewed-by: Ira Weiny Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 49 ++++++++++++++++++++------- 1 file changed, 37 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 38630a686e3e..aecd37132f6c 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -88,6 +88,8 @@ static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len); static int get_base_info(struct hfi1_filedata *fd, unsigned long arg, u32 len); static int user_exp_rcv_setup(struct hfi1_filedata *fd, unsigned long arg, u32 len); +static int user_exp_rcv_clear(struct hfi1_filedata *fd, unsigned long arg, + u32 len); static int setup_base_ctxt(struct hfi1_filedata *fd, struct hfi1_ctxtdata *uctxt); static int setup_subctxt(struct hfi1_ctxtdata *uctxt); @@ -257,18 +259,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, break; case HFI1_IOCTL_TID_FREE: - if (copy_from_user(&tinfo, - (struct hfi11_tid_info __user *)arg, - sizeof(tinfo))) - return -EFAULT; - - ret = hfi1_user_exp_rcv_clear(fd, &tinfo); - if (ret) - break; - addr = arg + offsetof(struct hfi1_tid_info, tidcnt); - if (copy_to_user((void __user *)addr, &tinfo.tidcnt, - sizeof(tinfo.tidcnt))) - ret = -EFAULT; + ret = user_exp_rcv_clear(fd, arg, _IOC_SIZE(cmd)); break; case HFI1_IOCTL_TID_INVAL_READ: @@ -1441,6 +1432,40 @@ static int user_exp_rcv_setup(struct hfi1_filedata *fd, unsigned long arg, return ret; } +/** + * user_exp_rcv_clear - Clear the given tid rcv list + * @fd: file data of the current driver instance + * @arg: ioctl argumnent for user space information + * @len: length of data structure associated with ioctl command + * + * The hfi1_user_exp_rcv_clear() can be called from the error path. Because + * of this, we need to use this wrapper to copy the user space information + * before doing the clear. + */ +static int user_exp_rcv_clear(struct hfi1_filedata *fd, unsigned long arg, + u32 len) +{ + int ret; + unsigned long addr; + struct hfi1_tid_info tinfo; + + if (sizeof(tinfo) != len) + return -EINVAL; + + if (copy_from_user(&tinfo, (void __user *)arg, (sizeof(tinfo)))) + return -EFAULT; + + ret = hfi1_user_exp_rcv_clear(fd, &tinfo); + if (!ret) { + addr = arg + offsetof(struct hfi1_tid_info, tidcnt); + if (copy_to_user((void __user *)addr, &tinfo.tidcnt, + sizeof(tinfo.tidcnt))) + return -EFAULT; + } + + return ret; +} + static unsigned int poll_urgent(struct file *fp, struct poll_table_struct *pt) { From 8a41da09e6189598fea2750e64c620cf83f22aba Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Tue, 26 Sep 2017 07:04:29 -0700 Subject: [PATCH 068/296] IB/hfi1: Refactor hfi_user_exp_rcv_invalid() IOCTLs The IOCTL is a bit unwieldy. Refactor to a common pattern. Refactor _TID_INVAL_READ IOCTLs. Reviewed-by: Ira Weiny Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 54 +++++++++++++++++------ drivers/infiniband/hw/hfi1/user_exp_rcv.c | 3 -- 2 files changed, 40 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index aecd37132f6c..e898991655d4 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -90,6 +90,8 @@ static int user_exp_rcv_setup(struct hfi1_filedata *fd, unsigned long arg, u32 len); static int user_exp_rcv_clear(struct hfi1_filedata *fd, unsigned long arg, u32 len); +static int user_exp_rcv_invalid(struct hfi1_filedata *fd, unsigned long arg, + u32 len); static int setup_base_ctxt(struct hfi1_filedata *fd, struct hfi1_ctxtdata *uctxt); static int setup_subctxt(struct hfi1_ctxtdata *uctxt); @@ -223,9 +225,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, { struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; - struct hfi1_tid_info tinfo; int ret = 0; - unsigned long addr; int uval = 0; unsigned long ul_uval = 0; u16 uval16 = 0; @@ -263,18 +263,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, break; case HFI1_IOCTL_TID_INVAL_READ: - if (copy_from_user(&tinfo, - (struct hfi11_tid_info __user *)arg, - sizeof(tinfo))) - return -EFAULT; - - ret = hfi1_user_exp_rcv_invalid(fd, &tinfo); - if (ret) - break; - addr = arg + offsetof(struct hfi1_tid_info, tidcnt); - if (copy_to_user((void __user *)addr, &tinfo.tidcnt, - sizeof(tinfo.tidcnt))) - ret = -EFAULT; + ret = user_exp_rcv_invalid(fd, arg, _IOC_SIZE(cmd)); break; case HFI1_IOCTL_RECV_CTRL: @@ -1466,6 +1455,43 @@ static int user_exp_rcv_clear(struct hfi1_filedata *fd, unsigned long arg, return ret; } +/** + * user_exp_rcv_invalid - Invalidate the given tid rcv list + * @fd: file data of the current driver instance + * @arg: ioctl argumnent for user space information + * @len: length of data structure associated with ioctl command + * + * Wrapper to validate ioctl information before doing _rcv_invalid. + * + */ +static int user_exp_rcv_invalid(struct hfi1_filedata *fd, unsigned long arg, + u32 len) +{ + int ret; + unsigned long addr; + struct hfi1_tid_info tinfo; + + if (sizeof(tinfo) != len) + return -EINVAL; + + if (!fd->invalid_tids) + return -EINVAL; + + if (copy_from_user(&tinfo, (void __user *)arg, (sizeof(tinfo)))) + return -EFAULT; + + ret = hfi1_user_exp_rcv_invalid(fd, &tinfo); + if (ret) + return ret; + + addr = arg + offsetof(struct hfi1_tid_info, tidcnt); + if (copy_to_user((void __user *)addr, &tinfo.tidcnt, + sizeof(tinfo.tidcnt))) + ret = -EFAULT; + + return ret; +} + static unsigned int poll_urgent(struct file *fp, struct poll_table_struct *pt) { diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index ff9ad69dc0a7..c1c596adcd01 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -546,9 +546,6 @@ int hfi1_user_exp_rcv_invalid(struct hfi1_filedata *fd, u32 *array; int ret = 0; - if (!fd->invalid_tids) - return -EINVAL; - /* * copy_to_user() can sleep, which will leave the invalid_lock * locked and cause the MMU notifier to be blocked on the lock From 88a69b65f36fd5d8bcba712dd2bd8c689463ccbd Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Tue, 26 Sep 2017 07:04:35 -0700 Subject: [PATCH 069/296] IB/hfi1: Refactor get_user() IOCTLs The IOCTL is a bit unwieldy. Refactor to a common pattern. Refactor _RECV_CTRL, _POLL_TYPE, _ACK_EVENT and _SET_PKEY IOCTLs to a common pattern. Reviewed-by: Ira Weiny Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 74 ++++++++++++--------------- 1 file changed, 34 insertions(+), 40 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index e898991655d4..4e089f2e1fbd 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -105,10 +105,10 @@ static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt); static unsigned int poll_urgent(struct file *fp, struct poll_table_struct *pt); static unsigned int poll_next(struct file *fp, struct poll_table_struct *pt); static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt, - unsigned long events); -static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, u16 subctxt, u16 pkey); + unsigned long arg); +static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned long arg); static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, - int start_stop); + unsigned long arg); static int vma_fault(struct vm_fault *vmf); static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, unsigned long arg); @@ -227,8 +227,6 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, struct hfi1_ctxtdata *uctxt = fd->uctxt; int ret = 0; int uval = 0; - unsigned long ul_uval = 0; - u16 uval16 = 0; hfi1_cdbg(IOCTL, "IOCTL recv: 0x%x", cmd); if (cmd != HFI1_IOCTL_ASSIGN_CTXT && @@ -267,34 +265,21 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, break; case HFI1_IOCTL_RECV_CTRL: - ret = get_user(uval, (int __user *)arg); - if (ret != 0) - return -EFAULT; - ret = manage_rcvq(uctxt, fd->subctxt, uval); + ret = manage_rcvq(uctxt, fd->subctxt, arg); break; case HFI1_IOCTL_POLL_TYPE: - ret = get_user(uval, (int __user *)arg); - if (ret != 0) + if (get_user(uval, (int __user *)arg)) return -EFAULT; uctxt->poll_type = (typeof(uctxt->poll_type))uval; break; case HFI1_IOCTL_ACK_EVENT: - ret = get_user(ul_uval, (unsigned long __user *)arg); - if (ret != 0) - return -EFAULT; - ret = user_event_ack(uctxt, fd->subctxt, ul_uval); + ret = user_event_ack(uctxt, fd->subctxt, arg); break; case HFI1_IOCTL_SET_PKEY: - ret = get_user(uval16, (u16 __user *)arg); - if (ret != 0) - return -EFAULT; - if (HFI1_CAP_IS_USET(PKEY_CHECK)) - ret = set_ctxt_pkey(uctxt, fd->subctxt, uval16); - else - return -EPERM; + ret = set_ctxt_pkey(uctxt, arg); break; case HFI1_IOCTL_CTXT_RESET: { @@ -1584,13 +1569,18 @@ int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit) * re-init the software copy of the head register */ static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, - int start_stop) + unsigned long arg) { struct hfi1_devdata *dd = uctxt->dd; unsigned int rcvctrl_op; + int start_stop; if (subctxt) - goto bail; + return 0; + + if (get_user(start_stop, (int __user *)arg)) + return -EFAULT; + /* atomically clear receive enable ctxt. */ if (start_stop) { /* @@ -1609,7 +1599,7 @@ static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, } hfi1_rcvctrl(dd, rcvctrl_op, uctxt); /* always; new head should be equal to new tail; see above */ -bail: + return 0; } @@ -1619,15 +1609,19 @@ static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, * set, if desired, and checks again in future. */ static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt, - unsigned long events) + unsigned long arg) { int i; struct hfi1_devdata *dd = uctxt->dd; unsigned long *evs; + unsigned long events; if (!dd->events) return 0; + if (get_user(events, (unsigned long __user *)arg)) + return -EFAULT; + evs = dd->events + uctxt_offset(uctxt) + subctxt; for (i = 0; i <= _HFI1_MAX_EVENT_BIT; i++) { @@ -1638,27 +1632,27 @@ static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt, return 0; } -static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, u16 subctxt, u16 pkey) +static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned long arg) { - int ret = -ENOENT, i, intable = 0; + int i; struct hfi1_pportdata *ppd = uctxt->ppd; struct hfi1_devdata *dd = uctxt->dd; + u16 pkey; - if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY) { - ret = -EINVAL; - goto done; - } + if (!HFI1_CAP_IS_USET(PKEY_CHECK)) + return -EPERM; + + if (get_user(pkey, (u16 __user *)arg)) + return -EFAULT; + + if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY) + return -EINVAL; for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) - if (pkey == ppd->pkeys[i]) { - intable = 1; - break; - } + if (pkey == ppd->pkeys[i]) + return hfi1_set_ctxt_pkey(dd, uctxt, pkey); - if (intable) - ret = hfi1_set_ctxt_pkey(dd, uctxt, pkey); -done: - return ret; + return -ENOENT; } static void user_remove(struct hfi1_devdata *dd) From ecf799852bde85ae9c1713b2a68dce4a03054f1b Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Tue, 26 Sep 2017 07:04:42 -0700 Subject: [PATCH 070/296] IB/hfi1: Refactor reset_ctxt() IOCTL The IOCTL is a bit unwieldy. Refactor reset_ctxt() to be a bit more manageable. Reviewed-by: Ira Weiny Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 122 ++++++++++++++------------ 1 file changed, 66 insertions(+), 56 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 4e089f2e1fbd..2b7144e0405e 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -107,6 +107,7 @@ static unsigned int poll_next(struct file *fp, struct poll_table_struct *pt); static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt, unsigned long arg); static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned long arg); +static int ctxt_reset(struct hfi1_ctxtdata *uctxt); static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, unsigned long arg); static int vma_fault(struct vm_fault *vmf); @@ -282,63 +283,9 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, ret = set_ctxt_pkey(uctxt, arg); break; - case HFI1_IOCTL_CTXT_RESET: { - struct send_context *sc; - struct hfi1_devdata *dd; - - if (!uctxt || !uctxt->dd || !uctxt->sc) - return -EINVAL; - - /* - * There is no protection here. User level has to - * guarantee that no one will be writing to the send - * context while it is being re-initialized. - * If user level breaks that guarantee, it will break - * it's own context and no one else's. - */ - dd = uctxt->dd; - sc = uctxt->sc; - /* - * Wait until the interrupt handler has marked the - * context as halted or frozen. Report error if we time - * out. - */ - wait_event_interruptible_timeout( - sc->halt_wait, (sc->flags & SCF_HALTED), - msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); - if (!(sc->flags & SCF_HALTED)) - return -ENOLCK; - - /* - * If the send context was halted due to a Freeze, - * wait until the device has been "unfrozen" before - * resetting the context. - */ - if (sc->flags & SCF_FROZEN) { - wait_event_interruptible_timeout( - dd->event_queue, - !(ACCESS_ONCE(dd->flags) & HFI1_FROZEN), - msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); - if (dd->flags & HFI1_FROZEN) - return -ENOLCK; - - if (dd->flags & HFI1_FORCED_FREEZE) - /* - * Don't allow context reset if we are into - * forced freeze - */ - return -ENODEV; - - sc_disable(sc); - ret = sc_enable(sc); - hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, uctxt); - } else { - ret = sc_restart(sc); - } - if (!ret) - sc_return_credits(sc); + case HFI1_IOCTL_CTXT_RESET: + ret = ctxt_reset(uctxt); break; - } case HFI1_IOCTL_GET_VERS: uval = HFI1_USER_SWVERSION; @@ -1655,6 +1602,69 @@ static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned long arg) return -ENOENT; } +/** + * ctxt_reset - Reset the user context + * @uctxt: valid user context + */ +static int ctxt_reset(struct hfi1_ctxtdata *uctxt) +{ + struct send_context *sc; + struct hfi1_devdata *dd; + int ret = 0; + + if (!uctxt || !uctxt->dd || !uctxt->sc) + return -EINVAL; + + /* + * There is no protection here. User level has to guarantee that + * no one will be writing to the send context while it is being + * re-initialized. If user level breaks that guarantee, it will + * break it's own context and no one else's. + */ + dd = uctxt->dd; + sc = uctxt->sc; + + /* + * Wait until the interrupt handler has marked the context as + * halted or frozen. Report error if we time out. + */ + wait_event_interruptible_timeout( + sc->halt_wait, (sc->flags & SCF_HALTED), + msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); + if (!(sc->flags & SCF_HALTED)) + return -ENOLCK; + + /* + * If the send context was halted due to a Freeze, wait until the + * device has been "unfrozen" before resetting the context. + */ + if (sc->flags & SCF_FROZEN) { + wait_event_interruptible_timeout( + dd->event_queue, + !(READ_ONCE(dd->flags) & HFI1_FROZEN), + msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); + if (dd->flags & HFI1_FROZEN) + return -ENOLCK; + + if (dd->flags & HFI1_FORCED_FREEZE) + /* + * Don't allow context reset if we are into + * forced freeze + */ + return -ENODEV; + + sc_disable(sc); + ret = sc_enable(sc); + hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, uctxt); + } else { + ret = sc_restart(sc); + } + if (!ret) + sc_return_credits(sc); + + return ret; +} + static void user_remove(struct hfi1_devdata *dd) { From e538e0aca33f81fbfaa10a6954b40ff4360686dd Mon Sep 17 00:00:00 2001 From: "Kalderon, Michal" Date: Wed, 20 Sep 2017 08:39:46 +0300 Subject: [PATCH 071/296] RDMA/qedr: Fix rdma_type initialization Initialize the rdma_type (iWARP or RoCE) which is set according to device configuration in qed. Fixes: e6a38c54faf ("RDMA/qedr: Add support for registering an iWARP device") Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index ca9317107ab7..1afc26785049 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -870,6 +870,7 @@ static struct qedr_dev *qedr_add(struct qed_dev *cdev, struct pci_dev *pdev, goto init_err; dev->user_dpm_enabled = dev_info.user_dpm_enabled; + dev->rdma_type = dev_info.rdma_type; dev->num_hwfns = dev_info.common.num_hwfns; dev->rdma_ctx = dev->ops->rdma_get_rdma_ctx(cdev); From e1ac263fcb58a63aed0d9a75d22e045f88a53bb2 Mon Sep 17 00:00:00 2001 From: Himanshu Jha Date: Sun, 24 Sep 2017 17:21:07 +0530 Subject: [PATCH 072/296] IB/qib: Use setup_timer and mod_timer Use setup_timer and mod_timer API instead of structure assignments. This is done using Coccinelle and semantic patch used for this as follows: @@ expression x,y,z,a,b; @@ -init_timer (&x); +setup_timer (&x, y, z); +mod_timer (&a, b); -x.function = y; -x.data = z; -x.expires = b; -add_timer(&a); Signed-off-by: Himanshu Jha Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_driver.c | 8 +++----- drivers/infiniband/hw/qib/qib_mad.c | 10 ++++------ 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 719906a9fd51..2314e8a9ca3f 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -735,11 +735,9 @@ void qib_set_led_override(struct qib_pportdata *ppd, unsigned int val) */ if (atomic_inc_return(&ppd->led_override_timer_active) == 1) { /* Need to start timer */ - init_timer(&ppd->led_override_timer); - ppd->led_override_timer.function = qib_run_led_override; - ppd->led_override_timer.data = (unsigned long) ppd; - ppd->led_override_timer.expires = jiffies + 1; - add_timer(&ppd->led_override_timer); + setup_timer(&ppd->led_override_timer, qib_run_led_override, + (unsigned long)ppd); + mod_timer(&ppd->led_override_timer, jiffies + 1); } else { if (ppd->led_override_vals[0] || ppd->led_override_vals[1]) mod_timer(&ppd->led_override_timer, jiffies + 1); diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 82d9da9b6997..1237952c144e 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -2478,12 +2478,10 @@ void qib_notify_create_mad_agent(struct rvt_dev_info *rdi, int port_idx) /* Initialize xmit_wait structure */ dd->pport[port_idx].cong_stats.counter = 0; - init_timer(&dd->pport[port_idx].cong_stats.timer); - dd->pport[port_idx].cong_stats.timer.function = xmit_wait_timer_func; - dd->pport[port_idx].cong_stats.timer.data = - (unsigned long)(&dd->pport[port_idx]); - dd->pport[port_idx].cong_stats.timer.expires = 0; - add_timer(&dd->pport[port_idx].cong_stats.timer); + setup_timer(&dd->pport[port_idx].cong_stats.timer, + xmit_wait_timer_func, + (unsigned long)(&dd->pport[port_idx])); + mod_timer(&dd->pport[port_idx].cong_stats.timer, 0); } void qib_notify_free_mad_agent(struct rvt_dev_info *rdi, int port_idx) From b04dc1999073250b7d9919eb266da61283ba3db0 Mon Sep 17 00:00:00 2001 From: Ajaykumar Hotchandani Date: Wed, 27 Sep 2017 21:26:08 +0300 Subject: [PATCH 073/296] IB/{ipoib, iser}: Consistent print format of vendor error Vendor error print should be consistent across protocols to avoid any confusion. This patch corrects that. Suggested-by: Santosh Shilimkar Signed-off-by: Ajaykumar Hotchandani Reviewed-by: Santosh Shilimkar Acked-by: Wengang Wang Reviewed-by: Yuval Shaia Signed-off-by: Yuval Shaia Acked-by: Sagi Grimberg Acked-by: Roi Dayan Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 10 +++++----- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 8 ++++---- drivers/infiniband/ulp/iser/iser_verbs.c | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 7774654c2ccb..7500c28eac6b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -594,9 +594,9 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) skb = rx_ring[wr_id].skb; if (unlikely(wc->status != IB_WC_SUCCESS)) { - ipoib_dbg(priv, "cm recv error " - "(status=%d, wrid=%d vend_err %x)\n", - wc->status, wr_id, wc->vendor_err); + ipoib_dbg(priv, + "cm recv error (status=%d, wrid=%d vend_err %#x)\n", + wc->status, wr_id, wc->vendor_err); ++dev->stats.rx_dropped; if (has_srq) goto repost; @@ -829,11 +829,11 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) if (wc->status == IB_WC_RNR_RETRY_EXC_ERR || wc->status == IB_WC_RETRY_EXC_ERR) ipoib_dbg(priv, - "%s: failed cm send event (status=%d, wrid=%d vend_err 0x%x)\n", + "%s: failed cm send event (status=%d, wrid=%d vend_err %#x)\n", __func__, wc->status, wr_id, wc->vendor_err); else ipoib_warn(priv, - "%s: failed cm send event (status=%d, wrid=%d vend_err 0x%x)\n", + "%s: failed cm send event (status=%d, wrid=%d vend_err %#x)\n", __func__, wc->status, wr_id, wc->vendor_err); spin_lock_irqsave(&priv->lock, flags); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 2e075377242e..dfd457567aff 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -192,8 +192,8 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) if (unlikely(wc->status != IB_WC_SUCCESS)) { if (wc->status != IB_WC_WR_FLUSH_ERR) - ipoib_warn(priv, "failed recv event " - "(status=%d, wrid=%d vend_err %x)\n", + ipoib_warn(priv, + "failed recv event (status=%d, wrid=%d vend_err %#x)\n", wc->status, wr_id, wc->vendor_err); ipoib_ud_dma_unmap_rx(priv, priv->rx_ring[wr_id].mapping); dev_kfree_skb_any(skb); @@ -414,8 +414,8 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR) { struct ipoib_qp_state_validate *qp_work; - ipoib_warn(priv, "failed send event " - "(status=%d, wrid=%d vend_err %x)\n", + ipoib_warn(priv, + "failed send event (status=%d, wrid=%d vend_err %#x)\n", wc->status, wr_id, wc->vendor_err); qp_work = kzalloc(sizeof(*qp_work), GFP_ATOMIC); if (!qp_work) diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 55a73b0ed4c6..56b7240a3fc3 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -1146,7 +1146,7 @@ void iser_err_comp(struct ib_wc *wc, const char *type) if (wc->status != IB_WC_WR_FLUSH_ERR) { struct iser_conn *iser_conn = to_iser_conn(wc->qp->qp_context); - iser_err("%s failure: %s (%d) vend_err %x\n", type, + iser_err("%s failure: %s (%d) vend_err %#x\n", type, ib_wc_status_msg(wc->status), wc->status, wc->vendor_err); From 4ea597ea3de2733873cdcfd5c4a9a2760a126daf Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 29 Sep 2017 10:49:43 +0300 Subject: [PATCH 074/296] i40iw: delete some stray tabs These lines were indented too far by mistake. Signed-off-by: Dan Carpenter Reviewed-by: Leon Romanovsky Acked-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_cm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 5230dd3c938c..9017c1cc51d4 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -1524,8 +1524,8 @@ static bool i40iw_port_in_use(struct i40iw_cm_core *cm_core, u16 port, bool acti break; } } - if (!ret) - clear_bit(port, cm_core->active_side_ports); + if (!ret) + clear_bit(port, cm_core->active_side_ports); spin_unlock_irqrestore(&cm_core->ht_lock, flags); } else { spin_lock_irqsave(&cm_core->listen_list_lock, flags); From 281d0ccfae86c5b35ac48a0f2f20a6775a85cd05 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 29 Sep 2017 14:16:01 +0100 Subject: [PATCH 075/296] RDMA/hns: make various function static, fixes warnings The functions hns_roce_table_mhop_get, hns_roce_table_mhop_put, hns_roce_cleanup_mhop_hem_table, hns_roce_v1_post_mbox, hns_roce_cmq_setup_basic_desc, hns_roce_cmq_send, hns_roce_cmq_query_hw_info are all local to the source and do not need to be in global scope, so make them static. Cleans up sparse warnings: symbol 'hns_roce_table_mhop_get' was not declared. Should it be static? symbol 'hns_roce_table_mhop_put' was not declared. Should it be static? symbol 'hns_roce_cleanup_mhop_hem_table' was not declared. Should it be static? symbol 'hns_roce_v1_post_mbox' was not declared. Should it be static? symbol 'hns_roce_cmq_setup_basic_desc' was not declared. Should it be static? symbol 'hns_roce_cmq_send' was not declared. Should it be static? symbol 'hns_roce_cmq_query_hw_info' was not declared. Should it be static? Signed-off-by: Colin Ian King Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hem.c | 18 +++++++++--------- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 6 +++--- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 12 ++++++------ 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 125e26ebf250..8388ae25640c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -363,9 +363,9 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev, return ret; } -int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev, - struct hns_roce_hem_table *table, - unsigned long obj) +static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, + unsigned long obj) { struct device *dev = hr_dev->dev; struct hns_roce_hem_mhop mhop; @@ -576,10 +576,10 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev, return ret; } -void hns_roce_table_mhop_put(struct hns_roce_dev *hr_dev, - struct hns_roce_hem_table *table, - unsigned long obj, - int check_refcount) +static void hns_roce_table_mhop_put(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, + unsigned long obj, + int check_refcount) { struct device *dev = hr_dev->dev; struct hns_roce_hem_mhop mhop; @@ -983,8 +983,8 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, return -ENOMEM; } -void hns_roce_cleanup_mhop_hem_table(struct hns_roce_dev *hr_dev, - struct hns_roce_hem_table *table) +static void hns_roce_cleanup_mhop_hem_table(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table) { struct hns_roce_hem_mhop mhop; u32 buf_chunk_size; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 426f55a030e6..290fcc2d8b98 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -1628,9 +1628,9 @@ static int hns_roce_v1_cmd_pending(struct hns_roce_dev *hr_dev) return (!!(status & (1 << HCR_GO_BIT))); } -int hns_roce_v1_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, - u64 out_param, u32 in_modifier, u8 op_modifier, - u16 op, u16 token, int event) +static int hns_roce_v1_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, + u64 out_param, u32 in_modifier, u8 op_modifier, + u16 op, u16 token, int event) { u32 *hcr = (u32 *)(hr_dev->reg_base + ROCEE_MB1_REG); unsigned long end; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 4171f73bdad5..0eab9cedc54b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -496,9 +496,9 @@ static void hns_roce_v2_cmq_exit(struct hns_roce_dev *hr_dev) hns_roce_free_cmq_desc(hr_dev, &priv->cmq.crq); } -void hns_roce_cmq_setup_basic_desc(struct hns_roce_cmq_desc *desc, - enum hns_roce_opcode_type opcode, - bool is_read) +static void hns_roce_cmq_setup_basic_desc(struct hns_roce_cmq_desc *desc, + enum hns_roce_opcode_type opcode, + bool is_read) { memset((void *)desc, 0, sizeof(struct hns_roce_cmq_desc)); desc->opcode = cpu_to_le16(opcode); @@ -542,8 +542,8 @@ static int hns_roce_cmq_csq_clean(struct hns_roce_dev *hr_dev) return clean; } -int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, - struct hns_roce_cmq_desc *desc, int num) +static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, + struct hns_roce_cmq_desc *desc, int num) { struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq; @@ -629,7 +629,7 @@ int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, return ret; } -int hns_roce_cmq_query_hw_info(struct hns_roce_dev *hr_dev) +static int hns_roce_cmq_query_hw_info(struct hns_roce_dev *hr_dev) { struct hns_roce_query_version *resp; struct hns_roce_cmq_desc desc; From e930b4d8a52067c773d9cd6f6bdd7d4ddc99563f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 29 Sep 2017 14:52:27 +0100 Subject: [PATCH 076/296] RDMA/hns: remove redundant assignment to variable j Variable j is being assigned to loop_j and then later being assigned to a new value in for loops. The first initialization is therefore redundant and can be removed. Cleans up clang warning: warning: Value stored to 'j' is never read Signed-off-by: Colin Ian King Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_mr.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 7e6ce76b32df..452136d98ad5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -272,7 +272,6 @@ static void hns_roce_loop_free(struct hns_roce_dev *hr_dev, mhop_num = hr_dev->caps.pbl_hop_num; i = loop_i; - j = loop_j; if (mhop_num == 3 && err_loop_index == 2) { for (; i >= 0; i--) { dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], From a3f12da0e99a8d17118ee9e18a1f760a0d427b26 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 26 Sep 2017 13:07:26 -0700 Subject: [PATCH 077/296] iw_cxgb4: allocate wait object for each memory object Remove the local stack allocated c4iw_wr_wait object in preparation for correctly handling timeouts. Also refactored some code to simplify it and make errpath unwinding more readable. Signed-off-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 2 + drivers/infiniband/hw/cxgb4/mem.c | 230 +++++++++++++++---------- 2 files changed, 142 insertions(+), 90 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 433e78e5f25b..0fabe82f7b6d 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -394,6 +394,7 @@ struct c4iw_mr { dma_addr_t mpl_addr; u32 max_mpl_len; u32 mpl_len; + struct c4iw_wr_wait *wr_waitp; }; static inline struct c4iw_mr *to_c4iw_mr(struct ib_mr *ibmr) @@ -407,6 +408,7 @@ struct c4iw_mw { struct sk_buff *dereg_skb; u64 kva; struct tpt_attributes attr; + struct c4iw_wr_wait *wr_waitp; }; static inline struct c4iw_mw *to_c4iw_mw(struct ib_mw *ibmw) diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index eeadc69ad4be..b2523b213c86 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -60,18 +60,18 @@ static int mr_exceeds_hw_limits(struct c4iw_dev *dev, u64 length) static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr, u32 len, dma_addr_t data, - int wait, struct sk_buff *skb) + struct sk_buff *skb, + struct c4iw_wr_wait *wr_waitp) { struct ulp_mem_io *req; struct ulptx_sgl *sgl; u8 wr_len; int ret = 0; - struct c4iw_wr_wait wr_wait; addr &= 0x7FFFFFF; - if (wait) - c4iw_init_wr_wait(&wr_wait); + if (wr_waitp) + c4iw_init_wr_wait(wr_waitp); wr_len = roundup(sizeof(*req) + sizeof(*sgl), 16); if (!skb) { @@ -84,8 +84,8 @@ static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr, req = __skb_put_zero(skb, wr_len); INIT_ULPTX_WR(req, wr_len, 0, 0); req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR) | - (wait ? FW_WR_COMPL_F : 0)); - req->wr.wr_lo = wait ? (__force __be64)(unsigned long) &wr_wait : 0L; + (wr_waitp ? FW_WR_COMPL_F : 0)); + req->wr.wr_lo = wr_waitp ? (__force __be64)(unsigned long)wr_waitp : 0L; req->wr.wr_mid = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(wr_len, 16))); req->cmd = cpu_to_be32(ULPTX_CMD_V(ULP_TX_MEM_WRITE) | T5_ULP_MEMIO_ORDER_V(1) | @@ -103,19 +103,19 @@ static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr, ret = c4iw_ofld_send(rdev, skb); if (ret) return ret; - if (wait) - ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__); + if (wr_waitp) + ret = c4iw_wait_for_reply(rdev, wr_waitp, 0, 0, __func__); return ret; } static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len, - void *data, struct sk_buff *skb) + void *data, struct sk_buff *skb, + struct c4iw_wr_wait *wr_waitp) { struct ulp_mem_io *req; struct ulptx_idata *sc; u8 wr_len, *to_dp, *from_dp; int copy_len, num_wqe, i, ret = 0; - struct c4iw_wr_wait wr_wait; __be32 cmd = cpu_to_be32(ULPTX_CMD_V(ULP_TX_MEM_WRITE)); if (is_t4(rdev->lldi.adapter_type)) @@ -126,7 +126,7 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len, addr &= 0x7FFFFFF; pr_debug("addr 0x%x len %u\n", addr, len); num_wqe = DIV_ROUND_UP(len, C4IW_MAX_INLINE_SIZE); - c4iw_init_wr_wait(&wr_wait); + c4iw_init_wr_wait(wr_waitp); for (i = 0; i < num_wqe; i++) { copy_len = len > C4IW_MAX_INLINE_SIZE ? C4IW_MAX_INLINE_SIZE : @@ -147,7 +147,7 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len, if (i == (num_wqe-1)) { req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR) | FW_WR_COMPL_F); - req->wr.wr_lo = (__force __be64)(unsigned long)&wr_wait; + req->wr.wr_lo = (__force __be64)(unsigned long)wr_waitp; } else req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR)); req->wr.wr_mid = cpu_to_be32( @@ -180,12 +180,13 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len, len -= C4IW_MAX_INLINE_SIZE; } - ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__); + ret = c4iw_wait_for_reply(rdev, wr_waitp, 0, 0, __func__); return ret; } static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, - void *data, struct sk_buff *skb) + void *data, struct sk_buff *skb, + struct c4iw_wr_wait *wr_waitp) { u32 remain = len; u32 dmalen; @@ -208,7 +209,7 @@ static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, dmalen = T4_ULPTX_MAX_DMA; remain -= dmalen; ret = _c4iw_write_mem_dma_aligned(rdev, addr, dmalen, daddr, - !remain, skb); + skb, remain ? NULL : wr_waitp); if (ret) goto out; addr += dmalen >> 5; @@ -216,7 +217,8 @@ static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, daddr += dmalen; } if (remain) - ret = _c4iw_write_mem_inline(rdev, addr, remain, data, skb); + ret = _c4iw_write_mem_inline(rdev, addr, remain, data, skb, + wr_waitp); out: dma_unmap_single(&rdev->lldi.pdev->dev, save, len, DMA_TO_DEVICE); return ret; @@ -227,23 +229,33 @@ static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, * If data is NULL, clear len byte of memory to zero. */ static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len, - void *data, struct sk_buff *skb) + void *data, struct sk_buff *skb, + struct c4iw_wr_wait *wr_waitp) { - if (rdev->lldi.ulptx_memwrite_dsgl && use_dsgl) { - if (len > inline_threshold) { - if (_c4iw_write_mem_dma(rdev, addr, len, data, skb)) { - pr_warn_ratelimited("%s: dma map failure (non fatal)\n", - pci_name(rdev->lldi.pdev)); - return _c4iw_write_mem_inline(rdev, addr, len, - data, skb); - } else { - return 0; - } - } else - return _c4iw_write_mem_inline(rdev, addr, - len, data, skb); - } else - return _c4iw_write_mem_inline(rdev, addr, len, data, skb); + int ret; + + if (!rdev->lldi.ulptx_memwrite_dsgl || !use_dsgl) { + ret = _c4iw_write_mem_inline(rdev, addr, len, data, skb, + wr_waitp); + goto out; + } + + if (len <= inline_threshold) { + ret = _c4iw_write_mem_inline(rdev, addr, len, data, skb, + wr_waitp); + goto out; + } + + ret = _c4iw_write_mem_dma(rdev, addr, len, data, skb, wr_waitp); + if (ret) { + pr_warn_ratelimited("%s: dma map failure (non fatal)\n", + pci_name(rdev->lldi.pdev)); + ret = _c4iw_write_mem_inline(rdev, addr, len, data, skb, + wr_waitp); + } +out: + return ret; + } /* @@ -257,7 +269,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, enum fw_ri_stag_type type, enum fw_ri_mem_perms perm, int bind_enabled, u32 zbva, u64 to, u64 len, u8 page_size, u32 pbl_size, u32 pbl_addr, - struct sk_buff *skb) + struct sk_buff *skb, struct c4iw_wr_wait *wr_waitp) { int err; struct fw_ri_tpte tpt; @@ -311,7 +323,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, } err = write_adapter_mem(rdev, stag_idx + (rdev->lldi.vr->stag.start >> 5), - sizeof(tpt), &tpt, skb); + sizeof(tpt), &tpt, skb, wr_waitp); if (reset_tpt_entry) { c4iw_put_resource(&rdev->resource.tpt_table, stag_idx); @@ -323,7 +335,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, } static int write_pbl(struct c4iw_rdev *rdev, __be64 *pbl, - u32 pbl_addr, u32 pbl_size) + u32 pbl_addr, u32 pbl_size, struct c4iw_wr_wait *wr_waitp) { int err; @@ -331,37 +343,42 @@ static int write_pbl(struct c4iw_rdev *rdev, __be64 *pbl, pbl_addr, rdev->lldi.vr->pbl.start, pbl_size); - err = write_adapter_mem(rdev, pbl_addr >> 5, pbl_size << 3, pbl, NULL); + err = write_adapter_mem(rdev, pbl_addr >> 5, pbl_size << 3, pbl, NULL, + wr_waitp); return err; } static int dereg_mem(struct c4iw_rdev *rdev, u32 stag, u32 pbl_size, - u32 pbl_addr, struct sk_buff *skb) + u32 pbl_addr, struct sk_buff *skb, + struct c4iw_wr_wait *wr_waitp) { return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0, - pbl_size, pbl_addr, skb); + pbl_size, pbl_addr, skb, wr_waitp); } -static int allocate_window(struct c4iw_rdev *rdev, u32 * stag, u32 pdid) +static int allocate_window(struct c4iw_rdev *rdev, u32 *stag, u32 pdid, + struct c4iw_wr_wait *wr_waitp) { *stag = T4_STAG_UNSET; return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_MW, 0, 0, 0, - 0UL, 0, 0, 0, 0, NULL); + 0UL, 0, 0, 0, 0, NULL, wr_waitp); } static int deallocate_window(struct c4iw_rdev *rdev, u32 stag, - struct sk_buff *skb) + struct sk_buff *skb, + struct c4iw_wr_wait *wr_waitp) { return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0, 0, - 0, skb); + 0, skb, wr_waitp); } static int allocate_stag(struct c4iw_rdev *rdev, u32 *stag, u32 pdid, - u32 pbl_size, u32 pbl_addr) + u32 pbl_size, u32 pbl_addr, + struct c4iw_wr_wait *wr_waitp) { *stag = T4_STAG_UNSET; return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_NSMR, 0, 0, 0, - 0UL, 0, 0, pbl_size, pbl_addr, NULL); + 0UL, 0, 0, pbl_size, pbl_addr, NULL, wr_waitp); } static int finish_mem_reg(struct c4iw_mr *mhp, u32 stag) @@ -388,14 +405,15 @@ static int register_mem(struct c4iw_dev *rhp, struct c4iw_pd *php, mhp->attr.mw_bind_enable, mhp->attr.zbva, mhp->attr.va_fbo, mhp->attr.len ? mhp->attr.len : -1, shift - 12, - mhp->attr.pbl_size, mhp->attr.pbl_addr, NULL); + mhp->attr.pbl_size, mhp->attr.pbl_addr, NULL, + mhp->wr_waitp); if (ret) return ret; ret = finish_mem_reg(mhp, stag); if (ret) { dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, - mhp->attr.pbl_addr, mhp->dereg_skb); + mhp->attr.pbl_addr, mhp->dereg_skb, mhp->wr_waitp); mhp->dereg_skb = NULL; } return ret; @@ -429,11 +447,17 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc) mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); if (!mhp) return ERR_PTR(-ENOMEM); + mhp->wr_waitp = kzalloc(sizeof(*mhp->wr_waitp), GFP_KERNEL); + if (!mhp->wr_waitp) { + ret = -ENOMEM; + goto err_free_mhp; + } + c4iw_init_wr_wait(mhp->wr_waitp); mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL); if (!mhp->dereg_skb) { ret = -ENOMEM; - goto err0; + goto err_free_wr_wait; } mhp->rhp = rhp; @@ -449,20 +473,22 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc) ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, php->pdid, FW_RI_STAG_NSMR, mhp->attr.perms, mhp->attr.mw_bind_enable, 0, 0, ~0ULL, 0, 0, 0, - NULL); + NULL, mhp->wr_waitp); if (ret) - goto err1; + goto err_free_skb; ret = finish_mem_reg(mhp, stag); if (ret) - goto err2; + goto err_dereg_mem; return &mhp->ibmr; -err2: +err_dereg_mem: dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, - mhp->attr.pbl_addr, mhp->dereg_skb); -err1: + mhp->attr.pbl_addr, mhp->dereg_skb, mhp->wr_waitp); +err_free_wr_wait: + kfree(mhp->wr_waitp); +err_free_skb: kfree_skb(mhp->dereg_skb); -err0: +err_free_mhp: kfree(mhp); return ERR_PTR(ret); } @@ -473,7 +499,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, __be64 *pages; int shift, n, len; int i, k, entry; - int err = 0; + int err = -ENOMEM; struct scatterlist *sg; struct c4iw_dev *rhp; struct c4iw_pd *php; @@ -496,34 +522,31 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); if (!mhp) return ERR_PTR(-ENOMEM); + mhp->wr_waitp = kzalloc(sizeof(*mhp->wr_waitp), GFP_KERNEL); + if (!mhp->wr_waitp) + goto err_free_mhp; mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL); - if (!mhp->dereg_skb) { - kfree(mhp); - return ERR_PTR(-ENOMEM); - } + if (!mhp->dereg_skb) + goto err_free_wr_wait; mhp->rhp = rhp; mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0); - if (IS_ERR(mhp->umem)) { - err = PTR_ERR(mhp->umem); - kfree_skb(mhp->dereg_skb); - kfree(mhp); - return ERR_PTR(err); - } + if (IS_ERR(mhp->umem)) + goto err_free_skb; shift = mhp->umem->page_shift; n = mhp->umem->nmap; err = alloc_pbl(mhp, n); if (err) - goto err; + goto err_umem_release; pages = (__be64 *) __get_free_page(GFP_KERNEL); if (!pages) { err = -ENOMEM; - goto err_pbl; + goto err_pbl_free; } i = n = 0; @@ -536,7 +559,8 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (i == PAGE_SIZE / sizeof *pages) { err = write_pbl(&mhp->rhp->rdev, pages, - mhp->attr.pbl_addr + (n << 3), i); + mhp->attr.pbl_addr + (n << 3), i, + mhp->wr_waitp); if (err) goto pbl_done; n += i; @@ -547,12 +571,13 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (i) err = write_pbl(&mhp->rhp->rdev, pages, - mhp->attr.pbl_addr + (n << 3), i); + mhp->attr.pbl_addr + (n << 3), i, + mhp->wr_waitp); pbl_done: free_page((unsigned long) pages); if (err) - goto err_pbl; + goto err_pbl_free; mhp->attr.pdid = php->pdid; mhp->attr.zbva = 0; @@ -563,17 +588,20 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, err = register_mem(rhp, php, mhp, shift); if (err) - goto err_pbl; + goto err_pbl_free; return &mhp->ibmr; -err_pbl: +err_pbl_free: c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, mhp->attr.pbl_size << 3); - -err: +err_umem_release: ib_umem_release(mhp->umem); +err_free_skb: kfree_skb(mhp->dereg_skb); +err_free_wr_wait: + kfree(mhp->wr_waitp); +err_free_mhp: kfree(mhp); return ERR_PTR(err); } @@ -597,13 +625,19 @@ struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, if (!mhp) return ERR_PTR(-ENOMEM); - mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL); - if (!mhp->dereg_skb) { + mhp->wr_waitp = kzalloc(sizeof(*mhp->wr_waitp), GFP_KERNEL); + if (!mhp->wr_waitp) { ret = -ENOMEM; goto free_mhp; } - ret = allocate_window(&rhp->rdev, &stag, php->pdid); + mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL); + if (!mhp->dereg_skb) { + ret = -ENOMEM; + goto free_wr_wait; + } + + ret = allocate_window(&rhp->rdev, &stag, php->pdid, mhp->wr_waitp); if (ret) goto free_skb; mhp->rhp = rhp; @@ -620,9 +654,12 @@ struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, return &(mhp->ibmw); dealloc_win: - deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb); + deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb, + mhp->wr_waitp); free_skb: kfree_skb(mhp->dereg_skb); +free_wr_wait: + kfree(mhp->wr_waitp); free_mhp: kfree(mhp); return ERR_PTR(ret); @@ -638,8 +675,10 @@ int c4iw_dealloc_mw(struct ib_mw *mw) rhp = mhp->rhp; mmid = (mw->rkey) >> 8; remove_handle(rhp, &rhp->mmidr, mmid); - deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb); + deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb, + mhp->wr_waitp); kfree_skb(mhp->dereg_skb); + kfree(mhp->wr_waitp); kfree(mhp); pr_debug("ib_mw %p mmid 0x%x ptr %p\n", mw, mmid, mhp); return 0; @@ -671,23 +710,31 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, goto err; } + mhp->wr_waitp = kzalloc(sizeof(*mhp->wr_waitp), GFP_KERNEL); + if (!mhp->wr_waitp) { + ret = -ENOMEM; + goto err_free_mhp; + } + c4iw_init_wr_wait(mhp->wr_waitp); + mhp->mpl = dma_alloc_coherent(&rhp->rdev.lldi.pdev->dev, length, &mhp->mpl_addr, GFP_KERNEL); if (!mhp->mpl) { ret = -ENOMEM; - goto err_mpl; + goto err_free_wr_wait; } mhp->max_mpl_len = length; mhp->rhp = rhp; ret = alloc_pbl(mhp, max_num_sg); if (ret) - goto err1; + goto err_free_dma; mhp->attr.pbl_size = max_num_sg; ret = allocate_stag(&rhp->rdev, &stag, php->pdid, - mhp->attr.pbl_size, mhp->attr.pbl_addr); + mhp->attr.pbl_size, mhp->attr.pbl_addr, + mhp->wr_waitp); if (ret) - goto err2; + goto err_free_pbl; mhp->attr.pdid = php->pdid; mhp->attr.type = FW_RI_STAG_NSMR; mhp->attr.stag = stag; @@ -696,21 +743,23 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, mhp->ibmr.rkey = mhp->ibmr.lkey = stag; if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) { ret = -ENOMEM; - goto err3; + goto err_dereg; } pr_debug("mmid 0x%x mhp %p stag 0x%x\n", mmid, mhp, stag); return &(mhp->ibmr); -err3: +err_dereg: dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size, - mhp->attr.pbl_addr, mhp->dereg_skb); -err2: + mhp->attr.pbl_addr, mhp->dereg_skb, mhp->wr_waitp); +err_free_pbl: c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, mhp->attr.pbl_size << 3); -err1: +err_free_dma: dma_free_coherent(&mhp->rhp->rdev.lldi.pdev->dev, mhp->max_mpl_len, mhp->mpl, mhp->mpl_addr); -err_mpl: +err_free_wr_wait: + kfree(mhp->wr_waitp); +err_free_mhp: kfree(mhp); err: return ERR_PTR(ret); @@ -754,7 +803,7 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr) dma_free_coherent(&mhp->rhp->rdev.lldi.pdev->dev, mhp->max_mpl_len, mhp->mpl, mhp->mpl_addr); dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, - mhp->attr.pbl_addr, mhp->dereg_skb); + mhp->attr.pbl_addr, mhp->dereg_skb, mhp->wr_waitp); if (mhp->attr.pbl_size) c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, mhp->attr.pbl_size << 3); @@ -763,6 +812,7 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr) if (mhp->umem) ib_umem_release(mhp->umem); pr_debug("mmid 0x%x ptr %p\n", mmid, mhp); + kfree(mhp->wr_waitp); kfree(mhp); return 0; } From 13ce83174afaf4ceb4dddd7b7e421778ee4fcf5e Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 26 Sep 2017 13:08:08 -0700 Subject: [PATCH 078/296] iw_cxgb4: allocate wait object for each cq object Remove the local stack allocated c4iw_wr_wait object in preparation for correctly handling timeouts. Also cleaned up some error path unwind logic to make it more readable. Signed-off-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cq.c | 63 +++++++++++++++----------- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 1 + 2 files changed, 37 insertions(+), 27 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index da62535b3252..020216f5c37f 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -33,12 +33,12 @@ #include "iw_cxgb4.h" static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, - struct c4iw_dev_ucontext *uctx, struct sk_buff *skb) + struct c4iw_dev_ucontext *uctx, struct sk_buff *skb, + struct c4iw_wr_wait *wr_waitp) { struct fw_ri_res_wr *res_wr; struct fw_ri_res *res; int wr_len; - struct c4iw_wr_wait wr_wait; int ret; wr_len = sizeof *res_wr + sizeof *res; @@ -50,16 +50,16 @@ static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, FW_RI_RES_WR_NRES_V(1) | FW_WR_COMPL_F); res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); - res_wr->cookie = (uintptr_t)&wr_wait; + res_wr->cookie = (uintptr_t)wr_waitp; res = res_wr->res; res->u.cq.restype = FW_RI_RES_TYPE_CQ; res->u.cq.op = FW_RI_RES_OP_RESET; res->u.cq.iqid = cpu_to_be32(cq->cqid); - c4iw_init_wr_wait(&wr_wait); + c4iw_init_wr_wait(wr_waitp); ret = c4iw_ofld_send(rdev, skb); if (!ret) { - ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__); + ret = c4iw_wait_for_reply(rdev, wr_waitp, 0, 0, __func__); } kfree(cq->sw_queue); @@ -71,13 +71,13 @@ static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, } static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, - struct c4iw_dev_ucontext *uctx) + struct c4iw_dev_ucontext *uctx, + struct c4iw_wr_wait *wr_waitp) { struct fw_ri_res_wr *res_wr; struct fw_ri_res *res; int wr_len; int user = (uctx != &rdev->uctx); - struct c4iw_wr_wait wr_wait; int ret; struct sk_buff *skb; @@ -119,7 +119,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, FW_RI_RES_WR_NRES_V(1) | FW_WR_COMPL_F); res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); - res_wr->cookie = (uintptr_t)&wr_wait; + res_wr->cookie = (uintptr_t)wr_waitp; res = res_wr->res; res->u.cq.restype = FW_RI_RES_TYPE_CQ; res->u.cq.op = FW_RI_RES_OP_WRITE; @@ -139,13 +139,13 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, res->u.cq.iqsize = cpu_to_be16(cq->size); res->u.cq.iqaddr = cpu_to_be64(cq->dma_addr); - c4iw_init_wr_wait(&wr_wait); + c4iw_init_wr_wait(wr_waitp); ret = c4iw_ofld_send(rdev, skb); if (ret) goto err4; - pr_debug("wait_event wr_wait %p\n", &wr_wait); - ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__); + pr_debug("wait_event wr_wait %p\n", wr_waitp); + ret = c4iw_wait_for_reply(rdev, wr_waitp, 0, 0, __func__); if (ret) goto err4; @@ -868,8 +868,8 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq) : NULL; destroy_cq(&chp->rhp->rdev, &chp->cq, ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx, - chp->destroy_skb); - chp->destroy_skb = NULL; + chp->destroy_skb, chp->wr_waitp); + kfree(chp->wr_waitp); kfree(chp); return 0; } @@ -901,12 +901,18 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, chp = kzalloc(sizeof(*chp), GFP_KERNEL); if (!chp) return ERR_PTR(-ENOMEM); + chp->wr_waitp = kzalloc(sizeof(*chp->wr_waitp), GFP_KERNEL); + if (!chp->wr_waitp) { + ret = -ENOMEM; + goto err_free_chp; + } + c4iw_init_wr_wait(chp->wr_waitp); wr_len = sizeof(struct fw_ri_res_wr) + sizeof(struct fw_ri_res); chp->destroy_skb = alloc_skb(wr_len, GFP_KERNEL); if (!chp->destroy_skb) { ret = -ENOMEM; - goto err1; + goto err_free_wr_wait; } if (ib_context) @@ -947,9 +953,10 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, chp->cq.vector = vector; ret = create_cq(&rhp->rdev, &chp->cq, - ucontext ? &ucontext->uctx : &rhp->rdev.uctx); + ucontext ? &ucontext->uctx : &rhp->rdev.uctx, + chp->wr_waitp); if (ret) - goto err2; + goto err_free_skb; chp->rhp = rhp; chp->cq.size--; /* status page */ @@ -960,16 +967,16 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, init_waitqueue_head(&chp->wait); ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid); if (ret) - goto err3; + goto err_destroy_cq; if (ucontext) { ret = -ENOMEM; mm = kmalloc(sizeof *mm, GFP_KERNEL); if (!mm) - goto err4; + goto err_remove_handle; mm2 = kmalloc(sizeof *mm2, GFP_KERNEL); if (!mm2) - goto err5; + goto err_free_mm; uresp.qid_mask = rhp->rdev.cqmask; uresp.cqid = chp->cq.cqid; @@ -984,7 +991,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp) - sizeof(uresp.reserved)); if (ret) - goto err6; + goto err_free_mm2; mm->key = uresp.key; mm->addr = virt_to_phys(chp->cq.queue); @@ -1000,19 +1007,21 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, chp->cq.cqid, chp, chp->cq.size, chp->cq.memsize, (unsigned long long)chp->cq.dma_addr); return &chp->ibcq; -err6: +err_free_mm2: kfree(mm2); -err5: +err_free_mm: kfree(mm); -err4: +err_remove_handle: remove_handle(rhp, &rhp->cqidr, chp->cq.cqid); -err3: +err_destroy_cq: destroy_cq(&chp->rhp->rdev, &chp->cq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, - chp->destroy_skb); -err2: + chp->destroy_skb, chp->wr_waitp); +err_free_skb: kfree_skb(chp->destroy_skb); -err1: +err_free_wr_wait: + kfree(chp->wr_waitp); +err_free_chp: kfree(chp); return ERR_PTR(ret); } diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 0fabe82f7b6d..3e0ac5648657 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -425,6 +425,7 @@ struct c4iw_cq { spinlock_t comp_handler_lock; atomic_t refcnt; wait_queue_head_t wait; + struct c4iw_wr_wait *wr_waitp; }; static inline struct c4iw_cq *to_c4iw_cq(struct ib_cq *ibcq) From 7088a9ba624599696f920eac552eac69366c8440 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 26 Sep 2017 13:11:36 -0700 Subject: [PATCH 079/296] iw_cxgb4: allocate wait object for each qp object Remove the local stack allocated c4iw_wr_wait object in preparation for correctly handling timeouts. Also cleaned up some error path unwind logic to make it more readable. Signed-off-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 1 + drivers/infiniband/hw/cxgb4/qp.c | 55 +++++++++++++++----------- 2 files changed, 34 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 3e0ac5648657..f143c00731ad 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -487,6 +487,7 @@ struct c4iw_qp { int sq_sig_all; struct work_struct free_work; struct c4iw_ucontext *ucontext; + struct c4iw_wr_wait *wr_waitp; }; static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 88e045b94773..a354001350ec 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -194,13 +194,13 @@ void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid, static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, struct t4_cq *rcq, struct t4_cq *scq, - struct c4iw_dev_ucontext *uctx) + struct c4iw_dev_ucontext *uctx, + struct c4iw_wr_wait *wr_waitp) { int user = (uctx != &rdev->uctx); struct fw_ri_res_wr *res_wr; struct fw_ri_res *res; int wr_len; - struct c4iw_wr_wait wr_wait; struct sk_buff *skb; int ret = 0; int eqsize; @@ -299,7 +299,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, FW_RI_RES_WR_NRES_V(2) | FW_WR_COMPL_F); res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); - res_wr->cookie = (uintptr_t)&wr_wait; + res_wr->cookie = (uintptr_t)wr_waitp; res = res_wr->res; res->u.sqrq.restype = FW_RI_RES_TYPE_SQ; res->u.sqrq.op = FW_RI_RES_OP_WRITE; @@ -352,12 +352,12 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid); res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr); - c4iw_init_wr_wait(&wr_wait); + c4iw_init_wr_wait(wr_waitp); ret = c4iw_ofld_send(rdev, skb); if (ret) goto free_dma; - ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, wq->sq.qid, __func__); + ret = c4iw_wait_for_reply(rdev, wr_waitp, 0, wq->sq.qid, __func__); if (ret) goto free_dma; @@ -730,6 +730,7 @@ static void free_qp_work(struct work_struct *work) if (ucontext) c4iw_put_ucontext(ucontext); + kfree(qhp->wr_waitp); kfree(qhp); } @@ -1794,6 +1795,13 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, qhp = kzalloc(sizeof(*qhp), GFP_KERNEL); if (!qhp) return ERR_PTR(-ENOMEM); + + qhp->wr_waitp = kzalloc(sizeof(*qhp), GFP_KERNEL); + if (!qhp->wr_waitp) { + ret = -ENOMEM; + goto err_free_qhp; + } + qhp->wq.sq.size = sqsize; qhp->wq.sq.memsize = (sqsize + rhp->rdev.hw_queue.t4_eq_status_entries) * @@ -1810,9 +1818,10 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, } ret = create_qp(&rhp->rdev, &qhp->wq, &schp->cq, &rchp->cq, - ucontext ? &ucontext->uctx : &rhp->rdev.uctx); + ucontext ? &ucontext->uctx : &rhp->rdev.uctx, + qhp->wr_waitp); if (ret) - goto err1; + goto err_free_wr_wait; attrs->cap.max_recv_wr = rqsize - 1; attrs->cap.max_send_wr = sqsize - 1; @@ -1843,35 +1852,35 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid); if (ret) - goto err2; + goto err_destroy_qp; if (udata) { sq_key_mm = kmalloc(sizeof(*sq_key_mm), GFP_KERNEL); if (!sq_key_mm) { ret = -ENOMEM; - goto err3; + goto err_remove_handle; } rq_key_mm = kmalloc(sizeof(*rq_key_mm), GFP_KERNEL); if (!rq_key_mm) { ret = -ENOMEM; - goto err4; + goto err_free_sq_key; } sq_db_key_mm = kmalloc(sizeof(*sq_db_key_mm), GFP_KERNEL); if (!sq_db_key_mm) { ret = -ENOMEM; - goto err5; + goto err_free_rq_key; } rq_db_key_mm = kmalloc(sizeof(*rq_db_key_mm), GFP_KERNEL); if (!rq_db_key_mm) { ret = -ENOMEM; - goto err6; + goto err_free_sq_db_key; } if (t4_sq_onchip(&qhp->wq.sq)) { ma_sync_key_mm = kmalloc(sizeof(*ma_sync_key_mm), GFP_KERNEL); if (!ma_sync_key_mm) { ret = -ENOMEM; - goto err7; + goto err_free_rq_db_key; } uresp.flags = C4IW_QPF_ONCHIP; } else @@ -1901,7 +1910,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, spin_unlock(&ucontext->mmap_lock); ret = ib_copy_to_udata(udata, &uresp, sizeof uresp); if (ret) - goto err8; + goto err_free_ma_sync_key; sq_key_mm->key = uresp.sq_key; sq_key_mm->addr = qhp->wq.sq.phys_addr; sq_key_mm->len = PAGE_ALIGN(qhp->wq.sq.memsize); @@ -1938,22 +1947,24 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, attrs->cap.max_send_wr, qhp->wq.rq.qid, qhp->wq.rq.size, qhp->wq.rq.memsize, attrs->cap.max_recv_wr); return &qhp->ibqp; -err8: +err_free_ma_sync_key: kfree(ma_sync_key_mm); -err7: +err_free_rq_db_key: kfree(rq_db_key_mm); -err6: +err_free_sq_db_key: kfree(sq_db_key_mm); -err5: +err_free_rq_key: kfree(rq_key_mm); -err4: +err_free_sq_key: kfree(sq_key_mm); -err3: +err_remove_handle: remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid); -err2: +err_destroy_qp: destroy_qp(&rhp->rdev, &qhp->wq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx); -err1: +err_free_wr_wait: + kfree(qhp->wr_waitp); +err_free_qhp: kfree(qhp); return ERR_PTR(ret); } From ef885dc66c29dd8e6f6a12f164ed11237323c234 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 26 Sep 2017 13:12:16 -0700 Subject: [PATCH 080/296] iw_cxgb4: allocate wait object for each ep object Remove the embedded c4iw_wr_wait object in preparation for correctly handling timeouts. Signed-off-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 38 ++++++++++++++++---------- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 2 +- drivers/infiniband/hw/cxgb4/qp.c | 8 +++--- 3 files changed, 28 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 01e3aba19fe9..72a2d349c7fa 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -318,11 +318,18 @@ static void *alloc_ep(int size, gfp_t gfp) epc = kzalloc(size, gfp); if (epc) { + epc->wr_waitp = kzalloc(sizeof(*epc->wr_waitp), gfp); + if (!epc->wr_waitp) { + kfree(epc); + epc = NULL; + goto out; + } kref_init(&epc->kref); mutex_init(&epc->mutex); - c4iw_init_wr_wait(&epc->wr_wait); + c4iw_init_wr_wait(epc->wr_waitp); } pr_debug("alloc ep %p\n", epc); +out: return epc; } @@ -407,6 +414,7 @@ void _c4iw_free_ep(struct kref *kref) } if (!skb_queue_empty(&ep->com.ep_skb_list)) skb_queue_purge(&ep->com.ep_skb_list); + kfree(ep->com.wr_waitp); kfree(ep); } @@ -1872,7 +1880,7 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) mutex_lock(&ep->com.mutex); switch (ep->com.state) { case ABORTING: - c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); + c4iw_wake_up(ep->com.wr_waitp, -ECONNRESET); __state_set(&ep->com, DEAD); release = 1; break; @@ -2100,7 +2108,7 @@ static int c4iw_reconnect(struct c4iw_ep *ep) pr_debug("qp %p cm_id %p\n", ep->com.qp, ep->com.cm_id); init_timer(&ep->timer); - c4iw_init_wr_wait(&ep->com.wr_wait); + c4iw_init_wr_wait(ep->com.wr_waitp); /* When MPA revision is different on nodes, the node with MPA_rev=2 * tries to reconnect with MPA_rev 1 for the same EP through @@ -2319,7 +2327,7 @@ static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) } pr_debug("ep %p status %d error %d\n", ep, rpl->status, status2errno(rpl->status)); - c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status)); + c4iw_wake_up(ep->com.wr_waitp, status2errno(rpl->status)); c4iw_put_ep(&ep->com); out: return 0; @@ -2336,7 +2344,7 @@ static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb) goto out; } pr_debug("ep %p\n", ep); - c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status)); + c4iw_wake_up(ep->com.wr_waitp, status2errno(rpl->status)); c4iw_put_ep(&ep->com); out: return 0; @@ -2671,12 +2679,12 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) */ __state_set(&ep->com, CLOSING); pr_debug("waking up ep %p tid %u\n", ep, ep->hwtid); - c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); + c4iw_wake_up(ep->com.wr_waitp, -ECONNRESET); break; case MPA_REP_SENT: __state_set(&ep->com, CLOSING); pr_debug("waking up ep %p tid %u\n", ep, ep->hwtid); - c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); + c4iw_wake_up(ep->com.wr_waitp, -ECONNRESET); break; case FPDU_MODE: start_ep_timer(ep); @@ -2758,7 +2766,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) * MPA_REQ_SENT */ if (ep->com.state != MPA_REQ_SENT) - c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); + c4iw_wake_up(ep->com.wr_waitp, -ECONNRESET); mutex_lock(&ep->com.mutex); switch (ep->com.state) { @@ -3346,14 +3354,14 @@ static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep) if (err) return err; } - c4iw_init_wr_wait(&ep->com.wr_wait); + c4iw_init_wr_wait(ep->com.wr_waitp); err = cxgb4_create_server6(ep->com.dev->rdev.lldi.ports[0], ep->stid, &sin6->sin6_addr, sin6->sin6_port, ep->com.dev->rdev.lldi.rxq_ids[0]); if (!err) err = c4iw_wait_for_reply(&ep->com.dev->rdev, - &ep->com.wr_wait, + ep->com.wr_waitp, 0, 0, __func__); else if (err > 0) err = net_xmit_errno(err); @@ -3389,13 +3397,13 @@ static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep) } } while (err == -EBUSY); } else { - c4iw_init_wr_wait(&ep->com.wr_wait); + c4iw_init_wr_wait(ep->com.wr_waitp); err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0], ep->stid, sin->sin_addr.s_addr, sin->sin_port, 0, ep->com.dev->rdev.lldi.rxq_ids[0]); if (!err) err = c4iw_wait_for_reply(&ep->com.dev->rdev, - &ep->com.wr_wait, + ep->com.wr_waitp, 0, 0, __func__); else if (err > 0) err = net_xmit_errno(err); @@ -3487,13 +3495,13 @@ int c4iw_destroy_listen(struct iw_cm_id *cm_id) ep->com.dev->rdev.lldi.rxq_ids[0], 0); } else { struct sockaddr_in6 *sin6; - c4iw_init_wr_wait(&ep->com.wr_wait); + c4iw_init_wr_wait(ep->com.wr_waitp); err = cxgb4_remove_server( ep->com.dev->rdev.lldi.ports[0], ep->stid, ep->com.dev->rdev.lldi.rxq_ids[0], 0); if (err) goto done; - err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, + err = c4iw_wait_for_reply(&ep->com.dev->rdev, ep->com.wr_waitp, 0, 0, __func__); sin6 = (struct sockaddr_in6 *)&ep->com.local_addr; cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], @@ -4216,7 +4224,7 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) } pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid, ep->com.state); - c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); + c4iw_wake_up(ep->com.wr_waitp, -ECONNRESET); out: sched(dev, skb); return 0; diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index f143c00731ad..12c3583fd1f1 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -842,7 +842,7 @@ struct c4iw_ep_common { struct mutex mutex; struct sockaddr_storage local_addr; struct sockaddr_storage remote_addr; - struct c4iw_wr_wait wr_wait; + struct c4iw_wr_wait *wr_waitp; unsigned long flags; unsigned long history; }; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index a354001350ec..cec2be552d88 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1355,14 +1355,14 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp, wqe->flowid_len16 = cpu_to_be32( FW_WR_FLOWID_V(ep->hwtid) | FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16))); - wqe->cookie = (uintptr_t)&ep->com.wr_wait; + wqe->cookie = (uintptr_t)ep->com.wr_waitp; wqe->u.fini.type = FW_RI_TYPE_FINI; ret = c4iw_ofld_send(&rhp->rdev, skb); if (ret) goto out; - ret = c4iw_wait_for_reply(&rhp->rdev, &ep->com.wr_wait, qhp->ep->hwtid, + ret = c4iw_wait_for_reply(&rhp->rdev, ep->com.wr_waitp, qhp->ep->hwtid, qhp->wq.sq.qid, __func__); out: pr_debug("ret %d\n", ret); @@ -1425,7 +1425,7 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp) FW_WR_FLOWID_V(qhp->ep->hwtid) | FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16))); - wqe->cookie = (uintptr_t)&qhp->ep->com.wr_wait; + wqe->cookie = (uintptr_t)qhp->ep->com.wr_waitp; wqe->u.init.type = FW_RI_TYPE_INIT; wqe->u.init.mpareqbit_p2ptype = @@ -1466,7 +1466,7 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp) if (ret) goto err1; - ret = c4iw_wait_for_reply(&rhp->rdev, &qhp->ep->com.wr_wait, + ret = c4iw_wait_for_reply(&rhp->rdev, qhp->ep->com.wr_waitp, qhp->ep->hwtid, qhp->wq.sq.qid, __func__); if (!ret) goto out; From 2015f26cfadec126265fabfbb0e6566e2cca94b4 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 26 Sep 2017 13:13:17 -0700 Subject: [PATCH 081/296] iw_cxgb4: add referencing to wait objects For messages sent from the host to fw that solicit a reply from fw, the c4iw_wr_wait struct pointer is passed in the host->fw message, and included in the fw->host fw6_msg reply. This allows the sender to wait until the reply is received, and the code processing the ingress reply to wake up the sender. If c4iw_wait_for_reply() times out, however, we need to keep the c4iw_wr_wait object around in case the reply eventually does arrive. Otherwise we have touch-after-free bugs in the wake_up paths. This was hit due to a bad kernel driver that blocked ingress processing of cxgb4 for a long time, causing iw_cxgb4 timeouts, but eventually resuming ingress processing and thus hitting the touch-after-free bug. So I want to fix iw_cxgb4 such that we'll at least keep the wait object around until the reply comes. If it never comes we leak a small amount of memory, but if it does come late, we won't potentially crash the system. So add a kref struct in the c4iw_wr_wait struct, and take a reference before sending a message to FW that will generate a FW6 reply. And remove the reference (and potentially free the wait object) when the reply is processed. The ep code also uses the wr_wait for non FW6 CPL messages and doesn't embed the c4iw_wr_wait object in the message sent to firmware. So for those cases we add c4iw_wake_up_noref(). The mr/mw, cq, and qp object create/destroy paths do need this reference logic. For these paths, c4iw_ref_send_wait() is introduced to take the wr_wait reference, send the msg to fw, and then wait for the reply. So going forward, iw_cxgb4 either uses c4iw_ofld_send(), c4iw_wait_for_reply() and c4iw_wake_up_noref() like is done in the some of the endpoint logic, or c4iw_ref_send_wait() and c4iw_wake_up_deref() (formerly c4iw_wake_up()) when sending messages with the c4iw_wr_wait object pointer embedded in the message and resulting FW6 reply. Signed-off-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 20 ++++----- drivers/infiniband/hw/cxgb4/cq.c | 18 +++----- drivers/infiniband/hw/cxgb4/device.c | 21 +++++++++ drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 60 ++++++++++++++++++++++++-- drivers/infiniband/hw/cxgb4/mem.c | 38 ++++++++-------- drivers/infiniband/hw/cxgb4/qp.c | 31 +++++-------- 6 files changed, 123 insertions(+), 65 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 72a2d349c7fa..e395afcd7d33 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -318,7 +318,7 @@ static void *alloc_ep(int size, gfp_t gfp) epc = kzalloc(size, gfp); if (epc) { - epc->wr_waitp = kzalloc(sizeof(*epc->wr_waitp), gfp); + epc->wr_waitp = c4iw_alloc_wr_wait(gfp); if (!epc->wr_waitp) { kfree(epc); epc = NULL; @@ -414,7 +414,7 @@ void _c4iw_free_ep(struct kref *kref) } if (!skb_queue_empty(&ep->com.ep_skb_list)) skb_queue_purge(&ep->com.ep_skb_list); - kfree(ep->com.wr_waitp); + c4iw_put_wr_wait(ep->com.wr_waitp); kfree(ep); } @@ -1880,7 +1880,7 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) mutex_lock(&ep->com.mutex); switch (ep->com.state) { case ABORTING: - c4iw_wake_up(ep->com.wr_waitp, -ECONNRESET); + c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET); __state_set(&ep->com, DEAD); release = 1; break; @@ -2327,7 +2327,7 @@ static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) } pr_debug("ep %p status %d error %d\n", ep, rpl->status, status2errno(rpl->status)); - c4iw_wake_up(ep->com.wr_waitp, status2errno(rpl->status)); + c4iw_wake_up_noref(ep->com.wr_waitp, status2errno(rpl->status)); c4iw_put_ep(&ep->com); out: return 0; @@ -2344,7 +2344,7 @@ static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb) goto out; } pr_debug("ep %p\n", ep); - c4iw_wake_up(ep->com.wr_waitp, status2errno(rpl->status)); + c4iw_wake_up_noref(ep->com.wr_waitp, status2errno(rpl->status)); c4iw_put_ep(&ep->com); out: return 0; @@ -2679,12 +2679,12 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) */ __state_set(&ep->com, CLOSING); pr_debug("waking up ep %p tid %u\n", ep, ep->hwtid); - c4iw_wake_up(ep->com.wr_waitp, -ECONNRESET); + c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET); break; case MPA_REP_SENT: __state_set(&ep->com, CLOSING); pr_debug("waking up ep %p tid %u\n", ep, ep->hwtid); - c4iw_wake_up(ep->com.wr_waitp, -ECONNRESET); + c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET); break; case FPDU_MODE: start_ep_timer(ep); @@ -2766,7 +2766,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) * MPA_REQ_SENT */ if (ep->com.state != MPA_REQ_SENT) - c4iw_wake_up(ep->com.wr_waitp, -ECONNRESET); + c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET); mutex_lock(&ep->com.mutex); switch (ep->com.state) { @@ -4187,7 +4187,7 @@ static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1]; pr_debug("wr_waitp %p ret %u\n", wr_waitp, ret); if (wr_waitp) - c4iw_wake_up(wr_waitp, ret ? -ret : 0); + c4iw_wake_up_deref(wr_waitp, ret ? -ret : 0); kfree_skb(skb); break; case FW6_TYPE_CQE: @@ -4224,7 +4224,7 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) } pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid, ep->com.state); - c4iw_wake_up(ep->com.wr_waitp, -ECONNRESET); + c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET); out: sched(dev, skb); return 0; diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 020216f5c37f..8e2d490e757a 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -57,10 +57,7 @@ static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, res->u.cq.iqid = cpu_to_be32(cq->cqid); c4iw_init_wr_wait(wr_waitp); - ret = c4iw_ofld_send(rdev, skb); - if (!ret) { - ret = c4iw_wait_for_reply(rdev, wr_waitp, 0, 0, __func__); - } + ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__); kfree(cq->sw_queue); dma_free_coherent(&(rdev->lldi.pdev->dev), @@ -140,12 +137,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, res->u.cq.iqaddr = cpu_to_be64(cq->dma_addr); c4iw_init_wr_wait(wr_waitp); - - ret = c4iw_ofld_send(rdev, skb); - if (ret) - goto err4; - pr_debug("wait_event wr_wait %p\n", wr_waitp); - ret = c4iw_wait_for_reply(rdev, wr_waitp, 0, 0, __func__); + ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__); if (ret) goto err4; @@ -869,7 +861,7 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq) destroy_cq(&chp->rhp->rdev, &chp->cq, ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx, chp->destroy_skb, chp->wr_waitp); - kfree(chp->wr_waitp); + c4iw_put_wr_wait(chp->wr_waitp); kfree(chp); return 0; } @@ -901,7 +893,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, chp = kzalloc(sizeof(*chp), GFP_KERNEL); if (!chp) return ERR_PTR(-ENOMEM); - chp->wr_waitp = kzalloc(sizeof(*chp->wr_waitp), GFP_KERNEL); + chp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); if (!chp->wr_waitp) { ret = -ENOMEM; goto err_free_chp; @@ -1020,7 +1012,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, err_free_skb: kfree_skb(chp->destroy_skb); err_free_wr_wait: - kfree(chp->wr_waitp); + c4iw_put_wr_wait(chp->wr_waitp); err_free_chp: kfree(chp); return ERR_PTR(ret); diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index d19f7fb2966c..fa16a4a8c9a4 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -1518,6 +1518,27 @@ static struct cxgb4_uld_info c4iw_uld_info = { .control = c4iw_uld_control, }; +void _c4iw_free_wr_wait(struct kref *kref) +{ + struct c4iw_wr_wait *wr_waitp; + + wr_waitp = container_of(kref, struct c4iw_wr_wait, kref); + pr_debug("Free wr_wait %p\n", wr_waitp); + kfree(wr_waitp); +} + +struct c4iw_wr_wait *c4iw_alloc_wr_wait(gfp_t gfp) +{ + struct c4iw_wr_wait *wr_waitp; + + wr_waitp = kzalloc(sizeof(*wr_waitp), gfp); + if (wr_waitp) { + kref_init(&wr_waitp->kref); + pr_debug("wr_wait %p\n", wr_waitp); + } + return wr_waitp; +} + static int __init c4iw_init_module(void) { int err; diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 12c3583fd1f1..d730bb735d74 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -202,18 +202,50 @@ static inline int c4iw_num_stags(struct c4iw_rdev *rdev) struct c4iw_wr_wait { struct completion completion; int ret; + struct kref kref; }; +void _c4iw_free_wr_wait(struct kref *kref); + +static inline void c4iw_put_wr_wait(struct c4iw_wr_wait *wr_waitp) +{ + pr_debug("wr_wait %p ref before put %u\n", wr_waitp, + kref_read(&wr_waitp->kref)); + WARN_ON(kref_read(&wr_waitp->kref) == 0); + kref_put(&wr_waitp->kref, _c4iw_free_wr_wait); +} + +static inline void c4iw_get_wr_wait(struct c4iw_wr_wait *wr_waitp) +{ + pr_debug("wr_wait %p ref before get %u\n", wr_waitp, + kref_read(&wr_waitp->kref)); + WARN_ON(kref_read(&wr_waitp->kref) == 0); + kref_get(&wr_waitp->kref); +} + static inline void c4iw_init_wr_wait(struct c4iw_wr_wait *wr_waitp) { wr_waitp->ret = 0; init_completion(&wr_waitp->completion); } -static inline void c4iw_wake_up(struct c4iw_wr_wait *wr_waitp, int ret) +static inline void _c4iw_wake_up(struct c4iw_wr_wait *wr_waitp, int ret, + bool deref) { wr_waitp->ret = ret; complete(&wr_waitp->completion); + if (deref) + c4iw_put_wr_wait(wr_waitp); +} + +static inline void c4iw_wake_up_noref(struct c4iw_wr_wait *wr_waitp, int ret) +{ + _c4iw_wake_up(wr_waitp, ret, false); +} + +static inline void c4iw_wake_up_deref(struct c4iw_wr_wait *wr_waitp, int ret) +{ + _c4iw_wake_up(wr_waitp, ret, true); } static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev, @@ -234,14 +266,36 @@ static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev, func, pci_name(rdev->lldi.pdev), hwtid, qpid); rdev->flags |= T4_FATAL_ERROR; wr_waitp->ret = -EIO; + goto out; } -out: if (wr_waitp->ret) pr_debug("%s: FW reply %d tid %u qpid %u\n", pci_name(rdev->lldi.pdev), wr_waitp->ret, hwtid, qpid); +out: return wr_waitp->ret; } +int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb); + +static inline int c4iw_ref_send_wait(struct c4iw_rdev *rdev, + struct sk_buff *skb, + struct c4iw_wr_wait *wr_waitp, + u32 hwtid, u32 qpid, + const char *func) +{ + int ret; + + pr_debug("%s wr_wait %p hwtid %u qpid %u\n", func, wr_waitp, hwtid, + qpid); + c4iw_get_wr_wait(wr_waitp); + ret = c4iw_ofld_send(rdev, skb); + if (ret) { + c4iw_put_wr_wait(wr_waitp); + return ret; + } + return c4iw_wait_for_reply(rdev, wr_waitp, hwtid, qpid, func); +} + enum db_state { NORMAL = 0, FLOW_CONTROL = 1, @@ -991,7 +1045,6 @@ u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size); void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size); u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size); void c4iw_ocqp_pool_free(struct c4iw_rdev *rdev, u32 addr, int size); -int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb); void c4iw_flush_hw_cq(struct c4iw_cq *chp); void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count); int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp); @@ -1019,5 +1072,6 @@ extern int db_fc_threshold; extern int db_coalescing_threshold; extern int use_dsgl; void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey); +struct c4iw_wr_wait *c4iw_alloc_wr_wait(gfp_t gfp); #endif diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index b2523b213c86..7e0eb201cc26 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -100,11 +100,10 @@ static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr, sgl->len0 = cpu_to_be32(len); sgl->addr0 = cpu_to_be64(data); - ret = c4iw_ofld_send(rdev, skb); - if (ret) - return ret; if (wr_waitp) - ret = c4iw_wait_for_reply(rdev, wr_waitp, 0, 0, __func__); + ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__); + else + ret = c4iw_ofld_send(rdev, skb); return ret; } @@ -173,14 +172,17 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len, if (copy_len % T4_ULPTX_MIN_IO) memset(to_dp + copy_len, 0, T4_ULPTX_MIN_IO - (copy_len % T4_ULPTX_MIN_IO)); - ret = c4iw_ofld_send(rdev, skb); - skb = NULL; + if (i == (num_wqe-1)) + ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, + __func__); + else + ret = c4iw_ofld_send(rdev, skb); if (ret) - return ret; + break; + skb = NULL; len -= C4IW_MAX_INLINE_SIZE; } - ret = c4iw_wait_for_reply(rdev, wr_waitp, 0, 0, __func__); return ret; } @@ -447,7 +449,7 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc) mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); if (!mhp) return ERR_PTR(-ENOMEM); - mhp->wr_waitp = kzalloc(sizeof(*mhp->wr_waitp), GFP_KERNEL); + mhp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); if (!mhp->wr_waitp) { ret = -ENOMEM; goto err_free_mhp; @@ -485,7 +487,7 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc) dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, mhp->attr.pbl_addr, mhp->dereg_skb, mhp->wr_waitp); err_free_wr_wait: - kfree(mhp->wr_waitp); + c4iw_put_wr_wait(mhp->wr_waitp); err_free_skb: kfree_skb(mhp->dereg_skb); err_free_mhp: @@ -522,7 +524,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); if (!mhp) return ERR_PTR(-ENOMEM); - mhp->wr_waitp = kzalloc(sizeof(*mhp->wr_waitp), GFP_KERNEL); + mhp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); if (!mhp->wr_waitp) goto err_free_mhp; @@ -600,7 +602,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, err_free_skb: kfree_skb(mhp->dereg_skb); err_free_wr_wait: - kfree(mhp->wr_waitp); + c4iw_put_wr_wait(mhp->wr_waitp); err_free_mhp: kfree(mhp); return ERR_PTR(err); @@ -625,7 +627,7 @@ struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, if (!mhp) return ERR_PTR(-ENOMEM); - mhp->wr_waitp = kzalloc(sizeof(*mhp->wr_waitp), GFP_KERNEL); + mhp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); if (!mhp->wr_waitp) { ret = -ENOMEM; goto free_mhp; @@ -659,7 +661,7 @@ struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, free_skb: kfree_skb(mhp->dereg_skb); free_wr_wait: - kfree(mhp->wr_waitp); + c4iw_put_wr_wait(mhp->wr_waitp); free_mhp: kfree(mhp); return ERR_PTR(ret); @@ -678,7 +680,7 @@ int c4iw_dealloc_mw(struct ib_mw *mw) deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb, mhp->wr_waitp); kfree_skb(mhp->dereg_skb); - kfree(mhp->wr_waitp); + c4iw_put_wr_wait(mhp->wr_waitp); kfree(mhp); pr_debug("ib_mw %p mmid 0x%x ptr %p\n", mw, mmid, mhp); return 0; @@ -710,7 +712,7 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, goto err; } - mhp->wr_waitp = kzalloc(sizeof(*mhp->wr_waitp), GFP_KERNEL); + mhp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); if (!mhp->wr_waitp) { ret = -ENOMEM; goto err_free_mhp; @@ -758,7 +760,7 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, dma_free_coherent(&mhp->rhp->rdev.lldi.pdev->dev, mhp->max_mpl_len, mhp->mpl, mhp->mpl_addr); err_free_wr_wait: - kfree(mhp->wr_waitp); + c4iw_put_wr_wait(mhp->wr_waitp); err_free_mhp: kfree(mhp); err: @@ -812,7 +814,7 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr) if (mhp->umem) ib_umem_release(mhp->umem); pr_debug("mmid 0x%x ptr %p\n", mmid, mhp); - kfree(mhp->wr_waitp); + c4iw_put_wr_wait(mhp->wr_waitp); kfree(mhp); return 0; } diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index cec2be552d88..56655a000121 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -353,11 +353,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr); c4iw_init_wr_wait(wr_waitp); - - ret = c4iw_ofld_send(rdev, skb); - if (ret) - goto free_dma; - ret = c4iw_wait_for_reply(rdev, wr_waitp, 0, wq->sq.qid, __func__); + ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, wq->sq.qid, __func__); if (ret) goto free_dma; @@ -730,7 +726,7 @@ static void free_qp_work(struct work_struct *work) if (ucontext) c4iw_put_ucontext(ucontext); - kfree(qhp->wr_waitp); + c4iw_put_wr_wait(qhp->wr_waitp); kfree(qhp); } @@ -1358,13 +1354,10 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp, wqe->cookie = (uintptr_t)ep->com.wr_waitp; wqe->u.fini.type = FW_RI_TYPE_FINI; - ret = c4iw_ofld_send(&rhp->rdev, skb); - if (ret) - goto out; - ret = c4iw_wait_for_reply(&rhp->rdev, ep->com.wr_waitp, qhp->ep->hwtid, - qhp->wq.sq.qid, __func__); -out: + ret = c4iw_ref_send_wait(&rhp->rdev, skb, ep->com.wr_waitp, + qhp->ep->hwtid, qhp->wq.sq.qid, __func__); + pr_debug("ret %d\n", ret); return ret; } @@ -1462,15 +1455,11 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp) if (qhp->attr.mpa_attr.initiator) build_rtr_msg(qhp->attr.mpa_attr.p2p_type, &wqe->u.init); - ret = c4iw_ofld_send(&rhp->rdev, skb); - if (ret) - goto err1; - - ret = c4iw_wait_for_reply(&rhp->rdev, qhp->ep->com.wr_waitp, - qhp->ep->hwtid, qhp->wq.sq.qid, __func__); + ret = c4iw_ref_send_wait(&rhp->rdev, skb, qhp->ep->com.wr_waitp, + qhp->ep->hwtid, qhp->wq.sq.qid, __func__); if (!ret) goto out; -err1: + free_ird(rhp, qhp->attr.max_ird); out: pr_debug("ret %d\n", ret); @@ -1796,7 +1785,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, if (!qhp) return ERR_PTR(-ENOMEM); - qhp->wr_waitp = kzalloc(sizeof(*qhp), GFP_KERNEL); + qhp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); if (!qhp->wr_waitp) { ret = -ENOMEM; goto err_free_qhp; @@ -1963,7 +1952,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, destroy_qp(&rhp->rdev, &qhp->wq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx); err_free_wr_wait: - kfree(qhp->wr_waitp); + c4iw_put_wr_wait(qhp->wr_waitp); err_free_qhp: kfree(qhp); return ERR_PTR(ret); From be7acd9d01c65a5d947ecd36d7b78b838b7313e5 Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Fri, 29 Sep 2017 23:10:07 +0800 Subject: [PATCH 082/296] RDMA/hns: Modify the value with rd&dest_rd of qp_attr The value of max_rd_atomic and max_dest_rd_atomic in query_qp are incorrect. It should be assigned by left shifting of the bit in hip06 SoC. Signed-off-by: Lijun Ou Signed-off-by: Wei Hu (Xavier) Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 290fcc2d8b98..2a3c8bf2dd93 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -3484,10 +3484,10 @@ static int hns_roce_v1_q_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S); qp_attr->port_num = hr_qp->port + 1; qp_attr->sq_draining = 0; - qp_attr->max_rd_atomic = roce_get_field(context->qpc_bytes_156, + qp_attr->max_rd_atomic = 1 << roce_get_field(context->qpc_bytes_156, QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_M, QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_S); - qp_attr->max_dest_rd_atomic = roce_get_field(context->qpc_bytes_32, + qp_attr->max_dest_rd_atomic = 1 << roce_get_field(context->qpc_bytes_32, QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_M, QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_S); qp_attr->min_rnr_timer = (u8)(roce_get_field(context->qpc_bytes_24, From f44c863be282f575becb1aacb69fd42f8ea53c6d Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Fri, 29 Sep 2017 23:10:08 +0800 Subject: [PATCH 083/296] RDMA/hns: Refactor code for readability Put the code for checking the send doorbell status into a separate function and call it from check_qp_db_process_status to improve indenting and readability. It fixes the warning from static checker: drivers/infiniband/hw/hns/hns_roce_hw_v1.c:3562 check_qp_db_process_status() warn: inconsistent indenting. Fixes: 5f110ac4bed8 ("IB/hns: Fix for checkpatch.pl comment style) Signed-off-by: Lijun Ou Signed-off-by: Wei Hu (Xavier) Signed-off-by: Shaobo Xu Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 95 ++++++++++++---------- 1 file changed, 51 insertions(+), 44 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 2a3c8bf2dd93..5d232e3e0daa 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -3532,6 +3532,53 @@ int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, hns_roce_v1_q_qp(ibqp, qp_attr, qp_attr_mask, qp_init_attr); } +static void hns_roce_check_sdb_status(struct hns_roce_dev *hr_dev, + u32 *old_send, u32 *old_retry, + u32 *tsp_st, u32 *success_flags) +{ + u32 sdb_retry_cnt; + u32 sdb_send_ptr; + u32 cur_cnt, old_cnt; + u32 send_ptr; + + sdb_send_ptr = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG); + sdb_retry_cnt = roce_read(hr_dev, ROCEE_SDB_RETRY_CNT_REG); + cur_cnt = roce_get_field(sdb_send_ptr, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + + roce_get_field(sdb_retry_cnt, + ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, + ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); + if (!roce_get_bit(*tsp_st, ROCEE_CNT_CLR_CE_CNT_CLR_CE_S)) { + old_cnt = roce_get_field(*old_send, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + + roce_get_field(*old_retry, + ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, + ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); + if (cur_cnt - old_cnt > SDB_ST_CMP_VAL) + *success_flags = 1; + } else { + old_cnt = roce_get_field(*old_send, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S); + if (cur_cnt - old_cnt > SDB_ST_CMP_VAL) { + *success_flags = 1; + } else { + send_ptr = roce_get_field(*old_send, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + + roce_get_field(sdb_retry_cnt, + ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, + ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); + roce_set_field(*old_send, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S, + send_ptr); + } + } +} + static int check_qp_db_process_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, u32 sdb_issue_ptr, @@ -3539,12 +3586,10 @@ static int check_qp_db_process_status(struct hns_roce_dev *hr_dev, u32 *wait_stage) { struct device *dev = &hr_dev->pdev->dev; - u32 sdb_retry_cnt, old_retry; u32 sdb_send_ptr, old_send; u32 success_flags = 0; - u32 cur_cnt, old_cnt; unsigned long end; - u32 send_ptr; + u32 old_retry; u32 inv_cnt; u32 tsp_st; @@ -3602,47 +3647,9 @@ static int check_qp_db_process_status(struct hns_roce_dev *hr_dev, msleep(HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS); - sdb_send_ptr = roce_read(hr_dev, - ROCEE_SDB_SEND_PTR_REG); - sdb_retry_cnt = roce_read(hr_dev, - ROCEE_SDB_RETRY_CNT_REG); - cur_cnt = roce_get_field(sdb_send_ptr, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + - roce_get_field(sdb_retry_cnt, - ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, - ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); - if (!roce_get_bit(tsp_st, - ROCEE_CNT_CLR_CE_CNT_CLR_CE_S)) { - old_cnt = roce_get_field(old_send, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + - roce_get_field(old_retry, - ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, - ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); - if (cur_cnt - old_cnt > SDB_ST_CMP_VAL) - success_flags = 1; - } else { - old_cnt = roce_get_field(old_send, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S); - if (cur_cnt - old_cnt > - SDB_ST_CMP_VAL) { - success_flags = 1; - } else { - send_ptr = - roce_get_field(old_send, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + - roce_get_field(sdb_retry_cnt, - ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, - ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); - roce_set_field(old_send, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S, - send_ptr); - } - } + hns_roce_check_sdb_status(hr_dev, &old_send, + &old_retry, &tsp_st, + &success_flags); } while (!success_flags); } From a74dc41d49959e46ca356a388fab3a23a4b593cd Mon Sep 17 00:00:00 2001 From: "Wei Hu(Xavier)" Date: Fri, 29 Sep 2017 23:10:09 +0800 Subject: [PATCH 084/296] RDMA/hns: Check return value of kzalloc When lp_qp_work is NULL, we should return ENOMEM. In order to do so, we had to make some upper layer functions return a value instead of being void type so we can propagate the error up the stack. This patch fixes the smatch error as below: drivers/infiniband/hw/hns/hns_roce_hw_v1.c:918 hns_roce_v1_recreate_lp_qp() error: potential null dereference 'lp_qp_work'. (kzalloc returns null) Signed-off-by: Wei Hu (Xavier) Signed-off-by: Lijun Ou Signed-off-by: Shaobo Xu Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_device.h | 2 +- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 16 +++++++++++++--- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 6 ++++-- drivers/infiniband/hw/hns/hns_roce_main.c | 17 +++++++++++------ 4 files changed, 29 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 4f43c91db01a..4d9d5d7da60a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -581,7 +581,7 @@ struct hns_roce_hw { int (*chk_mbox)(struct hns_roce_dev *hr_dev, unsigned long timeout); void (*set_gid)(struct hns_roce_dev *hr_dev, u8 port, int gid_index, union ib_gid *gid); - void (*set_mac)(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr); + int (*set_mac)(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr); void (*set_mtu)(struct hns_roce_dev *hr_dev, u8 phy_port, enum ib_mtu mtu); int (*write_mtpt)(void *mb_buf, struct hns_roce_mr *mr, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 5d232e3e0daa..93010a5d0cbf 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -912,6 +912,8 @@ static int hns_roce_v1_recreate_lp_qp(struct hns_roce_dev *hr_dev) lp_qp_work = kzalloc(sizeof(struct hns_roce_recreate_lp_qp_work), GFP_KERNEL); + if (!lp_qp_work) + return -ENOMEM; INIT_WORK(&(lp_qp_work->work), hns_roce_v1_recreate_lp_qp_work_fn); @@ -1719,7 +1721,8 @@ void hns_roce_v1_set_gid(struct hns_roce_dev *hr_dev, u8 port, int gid_index, (HNS_ROCE_V1_GID_NUM * gid_idx)); } -void hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr) +static int hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, + u8 *addr) { u32 reg_smac_l; u16 reg_smac_h; @@ -1732,8 +1735,13 @@ void hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr) * because of smac not equal to dmac. * We Need to release and create reserved qp again. */ - if (hr_dev->hw->dereg_mr && hns_roce_v1_recreate_lp_qp(hr_dev)) - dev_warn(&hr_dev->pdev->dev, "recreate lp qp timeout!\n"); + if (hr_dev->hw->dereg_mr) { + int ret; + + ret = hns_roce_v1_recreate_lp_qp(hr_dev); + if (ret && ret != -ETIMEDOUT) + return ret; + } p = (u32 *)(&addr[0]); reg_smac_l = *p; @@ -1748,6 +1756,8 @@ void hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr) ROCEE_SMAC_H_ROCEE_SMAC_H_S, reg_smac_h); roce_write(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET, val); + + return 0; } void hns_roce_v1_set_mtu(struct hns_roce_dev *hr_dev, u8 phy_port, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 0eab9cedc54b..fa60eb3d0248 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1068,8 +1068,8 @@ static void hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port, roce_write(hr_dev, ROCEE_VF_SGID_CFG4_REG + 0x20 * gid_index, val); } -static void hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, - u8 *addr) +static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, + u8 *addr) { u16 reg_smac_h; u32 reg_smac_l; @@ -1084,6 +1084,8 @@ static void hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, roce_set_field(val, ROCEE_VF_SMAC_CFG1_VF_SMAC_H_M, ROCEE_VF_SMAC_CFG1_VF_SMAC_H_S, reg_smac_h); roce_write(hr_dev, ROCEE_VF_SMAC_CFG1_REG + 0x08 * phy_port, val); + + return 0; } static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 7a0c1e8f45dd..6f2d57206c08 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -59,19 +59,19 @@ int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index) } EXPORT_SYMBOL_GPL(hns_get_gid_index); -static void hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr) +static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr) { u8 phy_port; u32 i = 0; if (!memcmp(hr_dev->dev_addr[port], addr, MAC_ADDR_OCTET_NUM)) - return; + return 0; for (i = 0; i < MAC_ADDR_OCTET_NUM; i++) hr_dev->dev_addr[port][i] = addr[i]; phy_port = hr_dev->iboe.phy_port[port]; - hr_dev->hw->set_mac(hr_dev, phy_port, addr); + return hr_dev->hw->set_mac(hr_dev, phy_port, addr); } static int hns_roce_add_gid(struct ib_device *device, u8 port_num, @@ -119,6 +119,7 @@ static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port, { struct device *dev = hr_dev->dev; struct net_device *netdev; + int ret = 0; netdev = hr_dev->iboe.netdevs[port]; if (!netdev) { @@ -131,7 +132,7 @@ static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port, case NETDEV_CHANGE: case NETDEV_REGISTER: case NETDEV_CHANGEADDR: - hns_roce_set_mac(hr_dev, port, netdev->dev_addr); + ret = hns_roce_set_mac(hr_dev, port, netdev->dev_addr); break; case NETDEV_DOWN: /* @@ -143,7 +144,7 @@ static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port, break; } - return 0; + return ret; } static int hns_roce_netdev_event(struct notifier_block *self, @@ -172,13 +173,17 @@ static int hns_roce_netdev_event(struct notifier_block *self, static int hns_roce_setup_mtu_mac(struct hns_roce_dev *hr_dev) { + int ret; u8 i; for (i = 0; i < hr_dev->caps.num_ports; i++) { if (hr_dev->hw->set_mtu) hr_dev->hw->set_mtu(hr_dev, hr_dev->iboe.phy_port[i], hr_dev->caps.max_mtu); - hns_roce_set_mac(hr_dev, i, hr_dev->iboe.netdevs[i]->dev_addr); + ret = hns_roce_set_mac(hr_dev, i, + hr_dev->iboe.netdevs[i]->dev_addr); + if (ret) + return ret; } return 0; From 512f4f1653e63b6efbdccb296b812fa809b7c95c Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Fri, 29 Sep 2017 23:10:10 +0800 Subject: [PATCH 085/296] RDMA/hns: Only assign dest_qp if IB_QP_DEST_QPN bit is set Only when the IB_QP_DEST_QPN flag of attr_mask is set is it valid to assign the dest_qp_num into the dest_qp field of qp context. Signed-off-by: Lijun Ou Signed-off-by: Wei Hu (Xavier) Signed-off-by: Shaobo Xu Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 93010a5d0cbf..55e6daabcde2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -2888,10 +2888,11 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_S, ilog2((unsigned int)attr->max_dest_rd_atomic)); - roce_set_field(context->qpc_bytes_36, - QP_CONTEXT_QPC_BYTES_36_DEST_QP_M, - QP_CONTEXT_QPC_BYTES_36_DEST_QP_S, - attr->dest_qp_num); + if (attr_mask & IB_QP_DEST_QPN) + roce_set_field(context->qpc_bytes_36, + QP_CONTEXT_QPC_BYTES_36_DEST_QP_M, + QP_CONTEXT_QPC_BYTES_36_DEST_QP_S, + attr->dest_qp_num); /* Configure GID index */ port_num = rdma_ah_get_port_num(&attr->ah_attr); From 2bf910d4a23e94a3e2528b65365a5c4a102a3c8e Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Fri, 29 Sep 2017 23:10:11 +0800 Subject: [PATCH 086/296] RDMA/hns: Set rdma_ah_attr type for querying qp When querying qp, It needs to return RoCE device ah_attr type that may be specific to RoCE devices. Signed-off-by: Lijun Ou Signed-off-by: Wei Hu (Xavier) Signed-off-by: Shaobo Xu Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 2 ++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 55e6daabcde2..98887dd8ccb3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -3359,6 +3359,7 @@ static int hns_roce_v1_q_sqp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, qp_attr->path_mtu = IB_MTU_256; qp_attr->path_mig_state = IB_MIG_ARMED; qp_attr->qkey = QKEY_VAL; + qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; qp_attr->rq_psn = 0; qp_attr->sq_psn = 0; qp_attr->dest_qp_num = 1; @@ -3440,6 +3441,7 @@ static int hns_roce_v1_q_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, QP_CONTEXT_QPC_BYTES_48_MTU_M, QP_CONTEXT_QPC_BYTES_48_MTU_S); qp_attr->path_mig_state = IB_MIG_ARMED; + qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; if (hr_qp->ibqp.qp_type == IB_QPT_UD) qp_attr->qkey = QKEY_VAL; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index fa60eb3d0248..95678b7d2344 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2831,6 +2831,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, V2_QPC_BYTE_24_MTU_M, V2_QPC_BYTE_24_MTU_S); qp_attr->path_mig_state = IB_MIG_ARMED; + qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; if (hr_qp->ibqp.qp_type == IB_QPT_UD) qp_attr->qkey = V2_QKEY_VAL; From 5e437b1d7e8d31ff9a4b8e898eb3a6cee309edd9 Mon Sep 17 00:00:00 2001 From: "Wei Hu(Xavier)" Date: Fri, 29 Sep 2017 23:10:12 +0800 Subject: [PATCH 087/296] RDMA/hns: Avoid NULL pointer exception After the loop in hns_roce_v1_mr_free_work_fn function, it is possible that all qps will have been freed (in which case ne will be 0). If that happens, then later in the function when we dereference hr_qp we will get an exception. Check ne is not 0 to make sure we actually have an hr_qp left to work on. This patch fixes the smatch error as below: drivers/infiniband/hw/hns/hns_roce_hw_v1.c:1009 hns_roce_v1_mr_free_work_fn() error: we previously assumed 'hr_qp' could be null Signed-off-by: Wei Hu (Xavier) Signed-off-by: Lijun Ou Signed-off-by: Shaobo Xu Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 98887dd8ccb3..852db18ec128 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -1004,6 +1004,11 @@ static void hns_roce_v1_mr_free_work_fn(struct work_struct *work) } } + if (!ne) { + dev_err(dev, "Reseved loop qp is absent!\n"); + goto free_work; + } + do { ret = hns_roce_v1_poll_cq(&mr_free_cq->ib_cq, ne, wc); if (ret < 0) { From 7944ff4e5d8a457c1d1b7fe063b6b774c47dea86 Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Fri, 29 Sep 2017 23:10:13 +0800 Subject: [PATCH 088/296] RDMA/hns: Don't unregister a callback we didn't register The driver doesn't actually register an inetaddr notifier function, so there is no need to unregister it on shutdown. Signed-off-by: Lijun Ou Signed-off-by: Wei Hu (Xavier) Signed-off-by: Shaobo Xu Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_device.h | 1 - drivers/infiniband/hw/hns/hns_roce_main.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 4d9d5d7da60a..b314ac0451af 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -474,7 +474,6 @@ struct hns_roce_ib_iboe { spinlock_t lock; struct net_device *netdevs[HNS_ROCE_MAX_PORTS]; struct notifier_block nb; - struct notifier_block nb_inet; u8 phy_port[HNS_ROCE_MAX_PORTS]; }; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 6f2d57206c08..3dcb2df20ba3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -424,7 +424,6 @@ static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev) { struct hns_roce_ib_iboe *iboe = &hr_dev->iboe; - unregister_inetaddr_notifier(&iboe->nb_inet); unregister_netdevice_notifier(&iboe->nb); ib_unregister_device(&hr_dev->ib_dev); } From 988e175b9df683ac8c465fca60d64dbe9e4dd81e Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Fri, 29 Sep 2017 23:10:14 +0800 Subject: [PATCH 089/296] RDMA/hns: Fix calltrace for sleeping in atomic We replace usleep_range that was excessively long anyway with udelay to avoid using usleep_range function in spin_lock_bh spin region, thereby avoiding this calltrace: BUG: scheduling while atomic: insmod/1428/0x00000002 Modules linked in: hns-roce-hw-v2(+) hns_roce rdma_ucm rdma_cm iw_cm ib_uverbs ib_cm ib_core CPU: 0 PID: 1428 Comm: insmod Not tainted 4.12.0-rc1-00677-g252e8fd-dirty #43 Hardware name: (null) (DT) Call trace: [] dump_backtrace+0x0/0x274 [] show_stack+0x20/0x28 [] dump_stack+0x94/0xb4 [] __schedule_bug+0x68/0x84 [] __schedule+0x5fc/0x70c [] schedule+0x40/0xa4 [] schedule_hrtimeout_range_clock+0x98/0xfc [] schedule_hrtimeout_range+0x34/0x40 [] usleep_range+0x6c/0x80 [] hns_roce_cmd_send+0xe4/0x264 [hns-roce-hw-v2] [] hns_roce_cmd_query_hw_info+0x40/0x60 [hns-roce-hw-v2] [] hns_roce_v2_profile+0x28/0x668 [hns-roce-hw-v2] [] hns_roce_init+0x6c/0x948 [hns-roce-hw-v2] Signed-off-by: Lijun Ou Signed-off-by: Wei Hu (Xavier) Signed-off-by: Shaobo Xu Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 95678b7d2344..8378ee118ba6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -589,7 +589,7 @@ static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, do { if (hns_roce_cmq_csq_done(hr_dev)) break; - usleep_range(1000, 2000); + udelay(1); timeout++; } while (timeout < priv->cmq.tx_timeout); } From 63ea641f496ffe08b7d81589a1e1e1c43a6af9bd Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 29 Sep 2017 21:13:08 +0100 Subject: [PATCH 090/296] RDMA/hns: return 0 rather than return a garbage status value For the case where hr_qp->state == IB_QPS_RESET, an uninitialized value in ret is being returned by function hns_roce_v2_query_qp. Fix this by setting ret to 0 for this specific return condition. Detected by CoverityScan, CID#1457203 ("Unitialized scalar variable") Signed-off-by: Colin Ian King Acked-by: Wei Hu (Xavier) Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 8378ee118ba6..b99d70aa1120 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2807,6 +2807,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, if (hr_qp->state == IB_QPS_RESET) { qp_attr->qp_state = IB_QPS_RESET; + ret = 0; goto done; } From 1660a26adbb3aab6af0bb067e15a831f926c163a Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Tue, 3 Oct 2017 11:11:46 -0500 Subject: [PATCH 091/296] i40iw: Do not retransmit MPA request after it is ACKed The ACK packets for an MPA request are ignored and the MPA request is retransmitted if the MPA reply is late or missing. Fix this by checking ack_rcvd variable before retransmitting a packet. Fixes: f27b4746f378 ("i40iw: add connection management code") Signed-off-by: Tatyana Nikolova Signed-off-by: Faisal Latif Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_cm.c | 13 ++++++++++--- drivers/infiniband/hw/i40iw/i40iw_cm.h | 1 + 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 9017c1cc51d4..50c43766defd 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -1267,13 +1267,16 @@ static void i40iw_cm_timer_tick(unsigned long pass) spin_lock_irqsave(&cm_node->retrans_list_lock, flags); goto done; } - cm_node->cm_core->stats_pkt_retrans++; spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); vsi = &cm_node->iwdev->vsi; dev = cm_node->dev; - atomic_inc(&send_entry->sqbuf->refcount); - i40iw_puda_send_buf(vsi->ilq, send_entry->sqbuf); + + if (!cm_node->ack_rcvd) { + atomic_inc(&send_entry->sqbuf->refcount); + i40iw_puda_send_buf(vsi->ilq, send_entry->sqbuf); + cm_node->cm_core->stats_pkt_retrans++; + } spin_lock_irqsave(&cm_node->retrans_list_lock, flags); if (send_entry->send_retrans) { send_entry->retranscount--; @@ -2181,6 +2184,7 @@ static struct i40iw_cm_node *i40iw_make_cm_node( cm_node->cm_id = cm_info->cm_id; ether_addr_copy(cm_node->loc_mac, netdev->dev_addr); spin_lock_init(&cm_node->retrans_list_lock); + cm_node->ack_rcvd = false; atomic_set(&cm_node->ref_count, 1); /* associate our parent CM core */ @@ -2719,7 +2723,10 @@ static int i40iw_handle_ack_pkt(struct i40iw_cm_node *cm_node, cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq); if (datasize) { cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; + cm_node->ack_rcvd = false; i40iw_handle_rcv_mpa(cm_node, rbuf); + } else { + cm_node->ack_rcvd = true; } break; case I40IW_CM_STATE_LISTENING: diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.h b/drivers/infiniband/hw/i40iw/i40iw_cm.h index 45abef76295b..0d5840d2c4fc 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.h +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.h @@ -360,6 +360,7 @@ struct i40iw_cm_node { u8 pdata_buf[IETF_MAX_PRIV_DATA_LEN]; struct i40iw_kmem_info mpa_hdr; + bool ack_rcvd; }; /* structure for client or CM to fill when making CM api calls. */ From abae49e433c5b02b6ff583eee00321eee2bad18a Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Tue, 3 Oct 2017 11:11:47 -0500 Subject: [PATCH 092/296] i40iw: Do not generate CQE for RTR on QP flush If RTR WQE is posted and QP is flushed, a CQE is incorrectly generated for the RTR WQE. Add code to look for the RTR and not generate a CQE when QP is flushed. Fixes: 280cfc4b74e6 ("i40iw: user kernel shared files") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_uk.c | 13 +++++++++++++ drivers/infiniband/hw/i40iw/i40iw_user.h | 1 + 2 files changed, 14 insertions(+) diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c index 0aadb7a0d1aa..42dde5924bb5 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_uk.c +++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c @@ -821,6 +821,18 @@ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq, I40IW_RING_SET_TAIL(qp->rq_ring, array_idx + 1); pring = &qp->rq_ring; } else { + if (qp->first_sq_wq) { + qp->first_sq_wq = false; + if (!wqe_idx && (qp->sq_ring.head == qp->sq_ring.tail)) { + I40IW_RING_MOVE_HEAD_NOCHECK(cq->cq_ring); + I40IW_RING_MOVE_TAIL(cq->cq_ring); + set_64bit_val(cq->shadow_area, 0, + I40IW_RING_GETCURRENT_HEAD(cq->cq_ring)); + memset(info, 0, sizeof(struct i40iw_cq_poll_info)); + return i40iw_cq_poll_completion(cq, info); + } + } + if (info->comp_status != I40IW_COMPL_STATUS_FLUSHED) { info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid; info->bytes_xfered = qp->sq_wrtrk_array[wqe_idx].wr_len; @@ -988,6 +1000,7 @@ enum i40iw_status_code i40iw_qp_uk_init(struct i40iw_qp_uk *qp, I40IW_RING_MOVE_TAIL(qp->sq_ring); I40IW_RING_MOVE_HEAD(qp->initial_ring, ret_code); qp->swqe_polarity = 1; + qp->first_sq_wq = true; qp->swqe_polarity_deferred = 1; qp->rwqe_polarity = 0; diff --git a/drivers/infiniband/hw/i40iw/i40iw_user.h b/drivers/infiniband/hw/i40iw/i40iw_user.h index 84be6f13b9c5..15e8f55983fd 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_user.h +++ b/drivers/infiniband/hw/i40iw/i40iw_user.h @@ -376,6 +376,7 @@ struct i40iw_qp_uk { u8 rwqe_polarity; u8 rq_wqe_size; u8 rq_wqe_size_multiplier; + bool first_sq_wq; bool deferred_flag; }; From 40837273d084ad1584dbaea5e8fbefe0a64f2a6b Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Tue, 3 Oct 2017 11:11:48 -0500 Subject: [PATCH 093/296] i40iw: Do not allow posting WR after QP is flushed A Work Request (WR) posted after QP is flushed will not get a flush completion. Correct this problem by not allowing posting of WRs after a QP is flushed. Fixes: d37498417947 ("i40iw: add files for iwarp interface") Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 28b3d02d511b..58d815f06e4e 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -2202,6 +2202,12 @@ static int i40iw_post_send(struct ib_qp *ibqp, ukqp = &iwqp->sc_qp.qp_uk; spin_lock_irqsave(&iwqp->lock, flags); + + if (iwqp->flush_issued) { + err = -EINVAL; + goto out; + } + while (ib_wr) { inv_stag = false; memset(&info, 0, sizeof(info)); @@ -2344,6 +2350,7 @@ static int i40iw_post_send(struct ib_qp *ibqp, ib_wr = ib_wr->next; } +out: if (err) *bad_wr = ib_wr; else @@ -2376,6 +2383,12 @@ static int i40iw_post_recv(struct ib_qp *ibqp, memset(&post_recv, 0, sizeof(post_recv)); spin_lock_irqsave(&iwqp->lock, flags); + + if (iwqp->flush_issued) { + err = -EINVAL; + goto out; + } + while (ib_wr) { post_recv.num_sges = ib_wr->num_sge; post_recv.wr_id = ib_wr->wr_id; From 9773afb97fc30d22db14a425fea519f4a658dc4e Mon Sep 17 00:00:00 2001 From: Jakub Byczkowski Date: Mon, 2 Oct 2017 11:04:03 -0700 Subject: [PATCH 094/296] IB/hfi1: Add parsing for platform configuration format version 4 Platform configuration format version 4, that didn't use the file size field, is not parsed by the host driver. Only version 5 is supported. Add logic in parsing procedure to determine what format is being used and allow to read data from version 4 files. Reviewed-by: Jan Sokolowski Reviewed-by: Ira Weiny Reviewed-by: Andrzej Kacprowski Signed-off-by: Jakub Byczkowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/firmware.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index 5aea8f47e670..9471c4063ebe 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -113,6 +113,12 @@ struct css_header { #define MU_SIZE 8 #define EXPONENT_SIZE 4 +/* size of platform configuration partition */ +#define MAX_PLATFORM_CONFIG_FILE_SIZE 4096 + +/* size of file of plaform configuration encoded in format version 4 */ +#define PLATFORM_CONFIG_FORMAT_4_FILE_SIZE 528 + /* the file itself */ struct firmware_file { struct css_header css_header; @@ -1774,7 +1780,20 @@ int parse_platform_config(struct hfi1_devdata *dd) /* Field is file size in DWORDs */ file_length = (*ptr) * 4; - ptr++; + + /* + * Length can't be larger than partition size. Assume platform + * config format version 4 is being used. Interpret the file size + * field as header instead by not moving the pointer. + */ + if (file_length > MAX_PLATFORM_CONFIG_FILE_SIZE) { + dd_dev_info(dd, + "%s:File length out of bounds, using alternative format\n", + __func__); + file_length = PLATFORM_CONFIG_FORMAT_4_FILE_SIZE; + } else { + ptr++; + } if (file_length > dd->platform_config.size) { dd_dev_info(dd, "%s:File claims to be larger than read size\n", @@ -1789,7 +1808,8 @@ int parse_platform_config(struct hfi1_devdata *dd) /* * In both cases where we proceed, using the self-reported file length - * is the safer option + * is the safer option. In case of old format a predefined value is + * being used. */ while (ptr < (u32 *)(dd->platform_config.data + file_length)) { header1 = *ptr; From e08aa5947688d711d3a2db07604f3f8225d67180 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 2 Oct 2017 11:04:11 -0700 Subject: [PATCH 095/296] IB/hfi1: Fix output trace issues from 16B change The 16B changes to the output side of the header trace introduced two issues: 1. An uninitialized field "l4" for 9B packets This field needs to be given a value of 0 for 9B packets to insure a correct 9B trace. The fix adds a new define to insure that there is a dummy default for 9B packets to insure the correct string is decoded. 2. Use of entry vs. __entry in field references Fixes: Commit 863cf89d472f ("IB/hfi1: Add 16B trace support") Reported-by: Kaike Wan Reviewed-by: Don Hiatt Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/hfi.h | 1 + drivers/infiniband/hw/hfi1/trace_ibhdrs.h | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index cb74ae8b0991..44a2c8f86874 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -390,6 +390,7 @@ struct hfi1_packet { /* * OPA 16B L2/L4 Encodings */ +#define OPA_16B_L4_9B 0x00 #define OPA_16B_L2_TYPE 0x02 #define OPA_16B_L4_IB_LOCAL 0x09 #define OPA_16B_L4_IB_GLOBAL 0x0A diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h index 6dd0d639f27d..fb631278eccd 100644 --- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h +++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h @@ -334,7 +334,7 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, &__entry->dlid, &__entry->slid); - if (entry->l4 == OPA_16B_L4_IB_LOCAL) + if (__entry->l4 == OPA_16B_L4_IB_LOCAL) ohdr = &opah->opah.u.oth; else ohdr = &opah->opah.u.l.oth; @@ -348,6 +348,7 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, &__entry->psn, &__entry->qpn); } else { + __entry->l4 = OPA_16B_L4_9B; hfi1_trace_parse_9b_hdr(&opah->ibh, sc5, &__entry->lnh, &__entry->lver, @@ -356,7 +357,7 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, &__entry->len, &__entry->dlid, &__entry->slid); - if (entry->lnh == HFI1_LRH_BTH) + if (__entry->lnh == HFI1_LRH_BTH) ohdr = &opah->ibh.u.oth; else ohdr = &opah->ibh.u.l.oth; From d7d626179fb283aba73699071af0df6d00e32138 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Mon, 2 Oct 2017 11:04:19 -0700 Subject: [PATCH 096/296] IB/hfi1: Fix incorrect available receive user context count The addition of the VNIC contexts to num_rcv_contexts changes the meaning of the sysfs value nctxts from available user contexts, to user contexts + reserved VNIC contexts. User applications that use nctxts are now broken. Update the calculation so that VNIC contexts are used only if there are hardware contexts available, and do not silently affect nctxts. Update code to use the calculated VNIC context number. Update the sysfs value nctxts to be available user contexts only. Fixes: 2280740f01ae ("IB/hfi1: Virtual Network Interface Controller (VNIC) HW support") Reviewed-by: Ira Weiny Reviewed-by: Niranjana Vishwanathapura Reviewed-by: Mike Marciniszyn Cc: #v4.12+ Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 35 +++++++++++++++----------- drivers/infiniband/hw/hfi1/hfi.h | 2 ++ drivers/infiniband/hw/hfi1/sysfs.c | 2 +- drivers/infiniband/hw/hfi1/vnic_main.c | 7 ++++-- 4 files changed, 29 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 091c735f5c9b..1660a641f4f7 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -13021,7 +13021,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd) first_sdma = last_general; last_sdma = first_sdma + dd->num_sdma; first_rx = last_sdma; - last_rx = first_rx + dd->n_krcv_queues + HFI1_NUM_VNIC_CTXT; + last_rx = first_rx + dd->n_krcv_queues + dd->num_vnic_contexts; /* VNIC MSIx interrupts get mapped when VNIC contexts are created */ dd->first_dyn_msix_idx = first_rx + dd->n_krcv_queues; @@ -13227,8 +13227,9 @@ static int set_up_interrupts(struct hfi1_devdata *dd) * slow source, SDMACleanupDone) * N interrupts - one per used SDMA engine * M interrupt - one per kernel receive context + * V interrupt - one for each VNIC context */ - total = 1 + dd->num_sdma + dd->n_krcv_queues + HFI1_NUM_VNIC_CTXT; + total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_vnic_contexts; /* ask for MSI-X interrupts */ request = request_msix(dd, total); @@ -13289,10 +13290,12 @@ static int set_up_interrupts(struct hfi1_devdata *dd) * in array of contexts * freectxts - number of free user contexts * num_send_contexts - number of PIO send contexts being used + * num_vnic_contexts - number of contexts reserved for VNIC */ static int set_up_context_variables(struct hfi1_devdata *dd) { unsigned long num_kernel_contexts; + u16 num_vnic_contexts = HFI1_NUM_VNIC_CTXT; int total_contexts; int ret; unsigned ngroups; @@ -13326,6 +13329,14 @@ static int set_up_context_variables(struct hfi1_devdata *dd) num_kernel_contexts); num_kernel_contexts = dd->chip_send_contexts - num_vls - 1; } + + /* Accommodate VNIC contexts if possible */ + if ((num_kernel_contexts + num_vnic_contexts) > dd->chip_rcv_contexts) { + dd_dev_err(dd, "No receive contexts available for VNIC\n"); + num_vnic_contexts = 0; + } + total_contexts = num_kernel_contexts + num_vnic_contexts; + /* * User contexts: * - default to 1 user context per real (non-HT) CPU core if @@ -13335,19 +13346,16 @@ static int set_up_context_variables(struct hfi1_devdata *dd) num_user_contexts = cpumask_weight(&node_affinity.real_cpu_mask); - total_contexts = num_kernel_contexts + num_user_contexts; - /* * Adjust the counts given a global max. */ - if (total_contexts > dd->chip_rcv_contexts) { + if (total_contexts + num_user_contexts > dd->chip_rcv_contexts) { dd_dev_err(dd, "Reducing # user receive contexts to: %d, from %d\n", - (int)(dd->chip_rcv_contexts - num_kernel_contexts), + (int)(dd->chip_rcv_contexts - total_contexts), (int)num_user_contexts); - num_user_contexts = dd->chip_rcv_contexts - num_kernel_contexts; /* recalculate */ - total_contexts = num_kernel_contexts + num_user_contexts; + num_user_contexts = dd->chip_rcv_contexts - total_contexts; } /* each user context requires an entry in the RMT */ @@ -13360,25 +13368,24 @@ static int set_up_context_variables(struct hfi1_devdata *dd) user_rmt_reduced); /* recalculate */ num_user_contexts = user_rmt_reduced; - total_contexts = num_kernel_contexts + num_user_contexts; } - /* Accommodate VNIC contexts */ - if ((total_contexts + HFI1_NUM_VNIC_CTXT) <= dd->chip_rcv_contexts) - total_contexts += HFI1_NUM_VNIC_CTXT; + total_contexts += num_user_contexts; /* the first N are kernel contexts, the rest are user/vnic contexts */ dd->num_rcv_contexts = total_contexts; dd->n_krcv_queues = num_kernel_contexts; dd->first_dyn_alloc_ctxt = num_kernel_contexts; + dd->num_vnic_contexts = num_vnic_contexts; dd->num_user_contexts = num_user_contexts; dd->freectxts = num_user_contexts; dd_dev_info(dd, - "rcv contexts: chip %d, used %d (kernel %d, user %d)\n", + "rcv contexts: chip %d, used %d (kernel %d, vnic %u, user %u)\n", (int)dd->chip_rcv_contexts, (int)dd->num_rcv_contexts, (int)dd->n_krcv_queues, - (int)dd->num_rcv_contexts - dd->n_krcv_queues); + dd->num_vnic_contexts, + dd->num_user_contexts); /* * Receive array allocation: diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 44a2c8f86874..2f3c830669a9 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1049,6 +1049,8 @@ struct hfi1_devdata { u64 z_send_schedule; u64 __percpu *send_schedule; + /* number of reserved contexts for VNIC usage */ + u16 num_vnic_contexts; /* number of receive contexts in use by the driver */ u32 num_rcv_contexts; /* number of pio send contexts in use by the driver */ diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index 6d2702ef34ac..25e867393463 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -543,7 +543,7 @@ static ssize_t show_nctxts(struct device *device, * give a more accurate picture of total contexts available. */ return scnprintf(buf, PAGE_SIZE, "%u\n", - min(dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt, + min(dd->num_user_contexts, (u32)dd->sc_sizes[SC_USER].count)); } diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index f419cbb05928..1a17708be46a 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -840,6 +840,9 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, struct rdma_netdev *rn; int i, size, rc; + if (!dd->num_vnic_contexts) + return ERR_PTR(-ENOMEM); + if (!port_num || (port_num > dd->num_pports)) return ERR_PTR(-EINVAL); @@ -848,7 +851,7 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo); netdev = alloc_netdev_mqs(size, name, name_assign_type, setup, - dd->chip_sdma_engines, HFI1_NUM_VNIC_CTXT); + dd->chip_sdma_engines, dd->num_vnic_contexts); if (!netdev) return ERR_PTR(-ENOMEM); @@ -856,7 +859,7 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, vinfo = opa_vnic_dev_priv(netdev); vinfo->dd = dd; vinfo->num_tx_q = dd->chip_sdma_engines; - vinfo->num_rx_q = HFI1_NUM_VNIC_CTXT; + vinfo->num_rx_q = dd->num_vnic_contexts; vinfo->netdev = netdev; rn->free_rdma_netdev = hfi1_vnic_free_rn; rn->set_id = hfi1_vnic_set_vesw_id; From 9be6a5d788b0f236e3b30827187e5db33231fa74 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Mon, 2 Oct 2017 11:04:26 -0700 Subject: [PATCH 097/296] IB/hfi1: Prevent LNI out of sync by resetting host interface version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the link is disabled and re-enabled, the host version bit is not set again, so the firmware behaves as though it’s interacting with an old driver. This causes LNI to get out of sync. The host version bit needs to be set at load_8051_firmware() and _dc_start(). Currently, it's only set at load_8051_firmware(). Create a common function to set the bit with the intent to make the code more maintainable in the future, set the host version bit at _dc_start() and modify the 8051 command API to prevent a deadlock as _dc_start() is already holding the dc8051 lock. Fixes: 913cc67159bc ("IB/hfi1: Always perform offline transition") Reviewed-by: Dennis Dalessandro Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 77 ++++++++++++++++++--------- drivers/infiniband/hw/hfi1/chip.h | 1 + drivers/infiniband/hw/hfi1/firmware.c | 63 ++++++++++++++++------ 3 files changed, 101 insertions(+), 40 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 1660a641f4f7..98bcd06c82d7 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -6518,12 +6518,11 @@ static void _dc_start(struct hfi1_devdata *dd) if (!dd->dc_shutdown) return; - /* Take the 8051 out of reset */ - write_csr(dd, DC_DC8051_CFG_RST, 0ull); - /* Wait until 8051 is ready */ - if (wait_fm_ready(dd, TIMEOUT_8051_START)) - dd_dev_err(dd, "%s: timeout starting 8051 firmware\n", - __func__); + /* + * Take the 8051 out of reset, wait until 8051 is ready, and set host + * version bit. + */ + release_and_wait_ready_8051_firmware(dd); /* Take away reset for LCB and RX FPE (set in lcb_shutdown). */ write_csr(dd, DCC_CFG_RESET, 0x10); @@ -8591,30 +8590,23 @@ int write_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 data) } /* + * If the 8051 is in reset mode (dd->dc_shutdown == 1), this function + * will still continue executing. + * * Returns: * < 0 = Linux error, not able to get access * > 0 = 8051 command RETURN_CODE */ -static int do_8051_command( - struct hfi1_devdata *dd, - u32 type, - u64 in_data, - u64 *out_data) +static int _do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data, + u64 *out_data) { u64 reg, completed; int return_code; unsigned long timeout; + lockdep_assert_held(&dd->dc8051_lock); hfi1_cdbg(DC8051, "type %d, data 0x%012llx", type, in_data); - mutex_lock(&dd->dc8051_lock); - - /* We can't send any commands to the 8051 if it's in reset */ - if (dd->dc_shutdown) { - return_code = -ENODEV; - goto fail; - } - /* * If an 8051 host command timed out previously, then the 8051 is * stuck. @@ -8714,6 +8706,29 @@ static int do_8051_command( */ write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, 0); +fail: + return return_code; +} + +/* + * Returns: + * < 0 = Linux error, not able to get access + * > 0 = 8051 command RETURN_CODE + */ +static int do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data, + u64 *out_data) +{ + int return_code; + + mutex_lock(&dd->dc8051_lock); + /* We can't send any commands to the 8051 if it's in reset */ + if (dd->dc_shutdown) { + return_code = -ENODEV; + goto fail; + } + + return_code = _do_8051_command(dd, type, in_data, out_data); + fail: mutex_unlock(&dd->dc8051_lock); return return_code; @@ -8724,16 +8739,17 @@ static int set_physical_link_state(struct hfi1_devdata *dd, u64 state) return do_8051_command(dd, HCMD_CHANGE_PHY_STATE, state, NULL); } -int load_8051_config(struct hfi1_devdata *dd, u8 field_id, - u8 lane_id, u32 config_data) +int _load_8051_config(struct hfi1_devdata *dd, u8 field_id, + u8 lane_id, u32 config_data) { u64 data; int ret; + lockdep_assert_held(&dd->dc8051_lock); data = (u64)field_id << LOAD_DATA_FIELD_ID_SHIFT | (u64)lane_id << LOAD_DATA_LANE_ID_SHIFT | (u64)config_data << LOAD_DATA_DATA_SHIFT; - ret = do_8051_command(dd, HCMD_LOAD_CONFIG_DATA, data, NULL); + ret = _do_8051_command(dd, HCMD_LOAD_CONFIG_DATA, data, NULL); if (ret != HCMD_SUCCESS) { dd_dev_err(dd, "load 8051 config: field id %d, lane %d, err %d\n", @@ -8742,6 +8758,18 @@ int load_8051_config(struct hfi1_devdata *dd, u8 field_id, return ret; } +int load_8051_config(struct hfi1_devdata *dd, u8 field_id, + u8 lane_id, u32 config_data) +{ + int return_code; + + mutex_lock(&dd->dc8051_lock); + return_code = _load_8051_config(dd, field_id, lane_id, config_data); + mutex_unlock(&dd->dc8051_lock); + + return return_code; +} + /* * Read the 8051 firmware "registers". Use the RAM directly. Always * set the result, even on error. @@ -8857,13 +8885,14 @@ int write_host_interface_version(struct hfi1_devdata *dd, u8 version) u32 frame; u32 mask; + lockdep_assert_held(&dd->dc8051_lock); mask = (HOST_INTERFACE_VERSION_MASK << HOST_INTERFACE_VERSION_SHIFT); read_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG, &frame); /* Clear, then set field */ frame &= ~mask; frame |= ((u32)version << HOST_INTERFACE_VERSION_SHIFT); - return load_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG, - frame); + return _load_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG, + frame); } void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor, diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index b8345a60a0fb..579a6c70b93e 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -709,6 +709,7 @@ void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor, u8 *ver_patch); int write_host_interface_version(struct hfi1_devdata *dd, u8 version); void read_guid(struct hfi1_devdata *dd); +int release_and_wait_ready_8051_firmware(struct hfi1_devdata *dd); int wait_fm_ready(struct hfi1_devdata *dd, u32 mstimeout); void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason, u8 neigh_reason, u8 rem_reason); diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index 9471c4063ebe..dee3c674f33d 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -970,6 +970,46 @@ int wait_fm_ready(struct hfi1_devdata *dd, u32 mstimeout) } } +/* + * Clear all reset bits, releasing the 8051. + * Wait for firmware to be ready to accept host requests. + * Then, set host version bit. + * + * This function executes even if the 8051 is in reset mode when + * dd->dc_shutdown == 1. + * + * Expects dd->dc8051_lock to be held. + */ +int release_and_wait_ready_8051_firmware(struct hfi1_devdata *dd) +{ + int ret; + + lockdep_assert_held(&dd->dc8051_lock); + /* clear all reset bits, releasing the 8051 */ + write_csr(dd, DC_DC8051_CFG_RST, 0ull); + + /* + * Wait for firmware to be ready to accept host + * requests. + */ + ret = wait_fm_ready(dd, TIMEOUT_8051_START); + if (ret) { + dd_dev_err(dd, "8051 start timeout, current FW state 0x%x\n", + get_firmware_state(dd)); + return ret; + } + + ret = write_host_interface_version(dd, HOST_INTERFACE_VERSION); + if (ret != HCMD_SUCCESS) { + dd_dev_err(dd, + "Failed to set host interface version, return 0x%x\n", + ret); + return -EIO; + } + + return 0; +} + /* * Load the 8051 firmware. */ @@ -1035,31 +1075,22 @@ static int load_8051_firmware(struct hfi1_devdata *dd, if (ret) return ret; - /* clear all reset bits, releasing the 8051 */ - write_csr(dd, DC_DC8051_CFG_RST, 0ull); - /* + * Clear all reset bits, releasing the 8051. * DC reset step 5. Wait for firmware to be ready to accept host * requests. + * Then, set host version bit. */ - ret = wait_fm_ready(dd, TIMEOUT_8051_START); - if (ret) { /* timed out */ - dd_dev_err(dd, "8051 start timeout, current state 0x%x\n", - get_firmware_state(dd)); - return -ETIMEDOUT; - } + mutex_lock(&dd->dc8051_lock); + ret = release_and_wait_ready_8051_firmware(dd); + mutex_unlock(&dd->dc8051_lock); + if (ret) + return ret; read_misc_status(dd, &ver_major, &ver_minor, &ver_patch); dd_dev_info(dd, "8051 firmware version %d.%d.%d\n", (int)ver_major, (int)ver_minor, (int)ver_patch); dd->dc8051_ver = dc8051_ver(ver_major, ver_minor, ver_patch); - ret = write_host_interface_version(dd, HOST_INTERFACE_VERSION); - if (ret != HCMD_SUCCESS) { - dd_dev_err(dd, - "Failed to set host interface version, return 0x%x\n", - ret); - return -EIO; - } return 0; } From a917374e8a20cdfbaaaaaa2b01331dc063d38848 Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Mon, 2 Oct 2017 11:04:33 -0700 Subject: [PATCH 098/296] IB/core: Use __be32 for LIDs in opa_is_extended_lid The LIDs passed to opa_extended_lid are in __be32 format, change function signature accordingly. This fixes the following sparse warnings: drivers/infiniband/core/cm.c:1181:60: warning: incorrect type in argument 1 (different ba drivers/infiniband/core/cm.c:1182:60: warning: incorrect type in argument 2 (different ba drivers/infiniband/core/cm.c:1242:68: warning: incorrect type in argument 1 (different ba drivers/infiniband/core/cm.c:1243:68: warning: incorrect type in argument 2 (different ba drivers/infiniband/core/cm.c:2922:66: warning: incorrect type in argument 1 (different ba drivers/infiniband/core/cm.c:2923:66: warning: incorrect type in argument 2 (different ba include/rdma/opa_addr.h:102:14: warning: cast to restricted __be32 Fixes: e92aa00a5189 ("IB/CM: Add OPA Path record support to CM") Reviewed-by: Mike Marciniszyn Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/opa_addr.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/rdma/opa_addr.h b/include/rdma/opa_addr.h index e6e90f18e6d5..f68fca296631 100644 --- a/include/rdma/opa_addr.h +++ b/include/rdma/opa_addr.h @@ -97,15 +97,15 @@ static inline u32 opa_get_lid_from_gid(const union ib_gid *gid) * @dlid: The DLID * @slid: The SLID */ -static inline bool opa_is_extended_lid(u32 dlid, u32 slid) +static inline bool opa_is_extended_lid(__be32 dlid, __be32 slid) { if ((be32_to_cpu(dlid) >= be16_to_cpu(IB_MULTICAST_LID_BASE)) || (be32_to_cpu(slid) >= be16_to_cpu(IB_MULTICAST_LID_BASE))) return true; - else - return false; + + return false; } /* Get multicast lid base */ From 7ebfc93edcefa0c4c09f194ebdace9ddae15efa6 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Mon, 2 Oct 2017 11:04:41 -0700 Subject: [PATCH 099/296] IB/rdmavt: Correct issues with read-mostly and send size cache lines The s_ahgpsn was incorrectly placed in the read-mostly section of the QP and the s_curr_size and s_hdrwords are oversized. The misplaced s_ahgpsn will cause the read-mostly cachelines to thrash. Place s_ahgpsn in the send side cache lines and correctly size and s_hdrwords and s_cur_size to keep the send side cachelines at the same size. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 4 ++-- include/rdma/rdmavt_qp.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 98bcd06c82d7..7befba8fceb2 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -8739,8 +8739,8 @@ static int set_physical_link_state(struct hfi1_devdata *dd, u64 state) return do_8051_command(dd, HCMD_CHANGE_PHY_STATE, state, NULL); } -int _load_8051_config(struct hfi1_devdata *dd, u8 field_id, - u8 lane_id, u32 config_data) +static int _load_8051_config(struct hfi1_devdata *dd, u8 field_id, + u8 lane_id, u32 config_data) { u64 data; int ret; diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 0eed3d8752fa..89ab88c342b6 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -282,7 +282,6 @@ struct rvt_qp { u32 remote_qpn; u32 qkey; /* QKEY for this QP (for UD or RD) */ u32 s_size; /* send work queue size */ - u32 s_ahgpsn; /* set to the psn in the copy of the header */ u16 pmtu; /* decoded from path_mtu */ u8 log_pmtu; /* shift for pmtu */ @@ -344,7 +343,6 @@ struct rvt_qp { struct rvt_swqe *s_wqe; struct rvt_sge_state s_sge; /* current send request data */ struct rvt_mregion *s_rdma_mr; - u32 s_cur_size; /* size of send packet in bytes */ u32 s_len; /* total length of s_sge */ u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ u32 s_last_psn; /* last response PSN processed */ @@ -358,8 +356,10 @@ struct rvt_qp { u32 s_acked; /* last un-ACK'ed entry */ u32 s_last; /* last completed entry */ u32 s_lsn; /* limit sequence number (credit) */ - u16 s_hdrwords; /* size of s_hdr in 32 bit words */ + u32 s_ahgpsn; /* set to the psn in the copy of the header */ + u16 s_cur_size; /* size of send packet in bytes */ u16 s_rdma_ack_cnt; + u8 s_hdrwords; /* size of s_hdr in 32 bit words */ s8 s_ahgidx; u8 s_state; /* opcode of last packet sent */ u8 s_ack_state; /* opcode of packet to ACK */ From 6588e412fe872ed81f3fb8d9b4561a66ecb763d0 Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Mon, 2 Oct 2017 11:04:48 -0700 Subject: [PATCH 100/296] IB/core: Do not warn on lid conversions for OPA On OPA devices the user_mad recv_handler can receive 32Bit LIDs (e.g. OPA_PERMISSIVE_LID) and it is okay to lose the upper 16 bits of the LID as this information is obtained elsewhere. Do not issue a warning when calling ib_lid_be16() in this case by masking out the upper 16Bits. [75667.310846] ------------[ cut here ]------------ [75667.316447] WARNING: CPU: 0 PID: 1718 at ./include/rdma/ib_verbs.h:3799 recv_handler+0x15a/0x170 [ib_umad] [75667.327640] Modules linked in: ib_ipoib hfi1(E) rdmavt(E) rdma_ucm(E) ib_ucm(E) rdma_cm(E) ib_cm(E) iw_cm(E) ib_umad(E) ib_uverbs(E) ib_core(E) libiscsi scsi_transport_iscsi dm_mirror dm_region_hash dm_log dm_mod dax x86_pkg_temp_thermal intel_powerclamp coretemp kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel mei_me ipmi_si iTCO_wdt iTCO_vendor_support crypto_simd ipmi_devintf pcspkr mei sg i2c_i801 glue_helper lpc_ich shpchp ioatdma mfd_core wmi ipmi_msghandler cryptd acpi_power_meter acpi_pad nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c sd_mod mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm igb ptp ahci libahci pps_core crc32c_intel libata dca i2c_algo_bit i2c_core [last unloaded: ib_core] [75667.407704] CPU: 0 PID: 1718 Comm: kworker/0:1H Tainted: G W I E 4.13.0-rc7+ #1 [75667.417310] Hardware name: Intel Corporation S2600WT2/S2600WT2, BIOS SE5C610.86B.01.01.0008.021120151325 02/11/2015 [75667.429555] Workqueue: ib-comp-wq ib_cq_poll_work [ib_core] [75667.436360] task: ffff88084a718000 task.stack: ffffc9000a424000 [75667.443549] RIP: 0010:recv_handler+0x15a/0x170 [ib_umad] [75667.450090] RSP: 0018:ffffc9000a427ce8 EFLAGS: 00010286 [75667.456508] RAX: 00000000ffffffff RBX: ffff88085159ce80 RCX: 0000000000000000 [75667.465094] RDX: ffff88085a47b068 RSI: 0000000000000000 RDI: ffff88085159cf00 [75667.473668] RBP: ffffc9000a427d38 R08: 000000000001efc0 R09: ffff88085159ce80 [75667.482228] R10: ffff88085f007480 R11: ffff88084acf20e8 R12: ffff88085a47b020 [75667.490824] R13: ffff881056842e10 R14: ffff881056840200 R15: ffff88104c8d0800 [75667.499390] FS: 0000000000000000(0000) GS:ffff88085f400000(0000) knlGS:0000000000000000 [75667.509028] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [75667.516080] CR2: 00007f9e4b3d9000 CR3: 0000000001c09000 CR4: 00000000001406f0 [75667.524664] Call Trace: [75667.528044] ? find_mad_agent+0x7c/0x1b0 [ib_core] [75667.534031] ? ib_mark_mad_done+0x73/0xa0 [ib_core] [75667.540142] ib_mad_recv_done+0x423/0x9b0 [ib_core] [75667.546215] __ib_process_cq+0x5d/0xb0 [ib_core] [75667.552007] ib_cq_poll_work+0x20/0x60 [ib_core] [75667.557766] process_one_work+0x149/0x360 [75667.562844] worker_thread+0x4d/0x3c0 [75667.567529] kthread+0x109/0x140 [75667.571713] ? rescuer_thread+0x380/0x380 [75667.576775] ? kthread_park+0x60/0x60 [75667.581447] ret_from_fork+0x25/0x30 [75667.586014] Code: 43 4a 0f b6 45 c6 88 43 4b 48 8b 45 b0 48 89 43 4c 48 8b 45 b8 48 89 43 54 8b 45 c0 0f c8 89 43 5c e9 79 ff ff ff e8 16 4e fa e0 <0f> ff e9 42 ff ff ff 66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 [75667.608323] ---[ end trace cf26df27c9597264 ]--- Fixes: 62ede7779904 ("Add OPA extended LID support") Reviewed-by: Mike Marciniszyn Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/user_mad.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index c1696e6084b2..603acaf91828 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -229,7 +229,16 @@ static void recv_handler(struct ib_mad_agent *agent, packet->mad.hdr.status = 0; packet->mad.hdr.length = hdr_size(file) + mad_recv_wc->mad_len; packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp); - packet->mad.hdr.lid = ib_lid_be16(mad_recv_wc->wc->slid); + /* + * On OPA devices it is okay to lose the upper 16 bits of LID as this + * information is obtained elsewhere. Mask off the upper 16 bits. + */ + if (agent->device->port_immutable[agent->port_num].core_cap_flags & + RDMA_CORE_PORT_INTEL_OPA) + packet->mad.hdr.lid = ib_lid_be16(0xFFFF & + mad_recv_wc->wc->slid); + else + packet->mad.hdr.lid = ib_lid_be16(mad_recv_wc->wc->slid); packet->mad.hdr.sl = mad_recv_wc->wc->sl; packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits; packet->mad.hdr.pkey_index = mad_recv_wc->wc->pkey_index; From 4988be5813ff2afdc0d8bfa315ef34a577d3efbf Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Mon, 2 Oct 2017 11:04:55 -0700 Subject: [PATCH 101/296] IB/hfi1: Do not warn on lid conversions for OPA On OPA devices opa_local_smp_check will receive 32Bit LIDs when the LID is Extended. In such cases, it is okay to lose the upper 16 bits of the LID as this information is obtained elsewhere. Do not issue a warning when calling ib_lid_cpu16() in this case by masking out the upper 16Bits. [75920.148985] ------------[ cut here ]------------ [75920.154651] WARNING: CPU: 0 PID: 1718 at ./include/rdma/ib_verbs.h:3788 hfi1_process_mad+0x1c1f/0x1c80 [hfi1] [75920.166192] Modules linked in: ib_ipoib hfi1(E) rdmavt(E) rdma_ucm(E) ib_ucm(E) rdma_cm(E) ib_cm(E) iw_cm(E) ib_umad(E) ib_uverbs(E) ib_core(E) libiscsi scsi_transport_iscsi dm_mirror dm_region_hash dm_log dm_mod dax x86_pkg_temp_thermal intel_powerclamp coretemp kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel mei_me ipmi_si iTCO_wdt iTCO_vendor_support crypto_simd ipmi_devintf pcspkr mei sg i2c_i801 glue_helper lpc_ich shpchp ioatdma mfd_core wmi ipmi_msghandler cryptd acpi_power_meter acpi_pad nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c sd_mod mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm igb ptp ahci libahci pps_core crc32c_intel libata dca i2c_algo_bit i2c_core [last unloaded: ib_core] [75920.246331] CPU: 0 PID: 1718 Comm: kworker/0:1H Tainted: G W I E 4.13.0-rc7+ #1 [75920.255907] Hardware name: Intel Corporation S2600WT2/S2600WT2, BIOS SE5C610.86B.01.01.0008.021120151325 02/11/2015 [75920.268158] Workqueue: ib-comp-wq ib_cq_poll_work [ib_core] [75920.274934] task: ffff88084a718000 task.stack: ffffc9000a424000 [75920.282123] RIP: 0010:hfi1_process_mad+0x1c1f/0x1c80 [hfi1] [75920.288881] RSP: 0018:ffffc9000a427c38 EFLAGS: 00010206 [75920.295265] RAX: 0000000000010001 RBX: ffff8808361420e8 RCX: ffff880837811d80 [75920.303784] RDX: 0000000000000002 RSI: 0000000000007fff RDI: ffff880837811d80 [75920.312302] RBP: ffffc9000a427d38 R08: 0000000000000000 R09: ffff8808361420e8 [75920.320819] R10: ffff88083841f0e8 R11: ffffc9000a427da8 R12: 0000000000000001 [75920.329335] R13: ffff880837810000 R14: 0000000000000000 R15: ffff88084f1a4800 [75920.337849] FS: 0000000000000000(0000) GS:ffff88085f400000(0000) knlGS:0000000000000000 [75920.347450] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [75920.354405] CR2: 00007f9e4b3d9000 CR3: 0000000001c09000 CR4: 00000000001406f0 [75920.362947] Call Trace: [75920.366257] ? ib_mad_recv_done+0x258/0x9b0 [ib_core] [75920.372457] ? ib_mad_recv_done+0x258/0x9b0 [ib_core] [75920.378652] ? __kmalloc+0x1df/0x210 [75920.383229] ib_mad_recv_done+0x305/0x9b0 [ib_core] [75920.389270] __ib_process_cq+0x5d/0xb0 [ib_core] [75920.395032] ib_cq_poll_work+0x20/0x60 [ib_core] [75920.400777] process_one_work+0x149/0x360 [75920.405836] worker_thread+0x4d/0x3c0 [75920.410505] kthread+0x109/0x140 [75920.414681] ? rescuer_thread+0x380/0x380 [75920.419731] ? kthread_park+0x60/0x60 [75920.424406] ret_from_fork+0x25/0x30 [75920.428972] Code: 4c 89 9d 58 ff ff ff 49 89 45 00 66 b8 00 02 49 89 45 08 e8 44 27 89 e0 4c 8b 9d 58 ff ff ff e9 d8 f6 ff ff 0f ff e9 55 e7 ff ff <0f> ff e9 3b e5 ff ff 0f ff 0f 1f 84 00 00 00 00 00 e9 4b e9 ff [75921.451269] ---[ end trace cf26df27c9597265 ]--- Fixes: 62ede7779904 ("Add OPA extended LID support") Reviewed-by: Mike Marciniszyn Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/mad.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index f4c0ffc040cc..07b80faf1675 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -4293,7 +4293,6 @@ static int opa_local_smp_check(struct hfi1_ibport *ibp, const struct ib_wc *in_wc) { struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - u16 slid = ib_lid_cpu16(in_wc->slid); u16 pkey; if (in_wc->pkey_index >= ARRAY_SIZE(ppd->pkeys)) @@ -4320,7 +4319,11 @@ static int opa_local_smp_check(struct hfi1_ibport *ibp, */ if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY) return 0; - ingress_pkey_table_fail(ppd, pkey, slid); + /* + * On OPA devices it is okay to lose the upper 16 bits of LID as this + * information is obtained elsewhere. Mask off the upper 16 bits. + */ + ingress_pkey_table_fail(ppd, pkey, ib_lid_cpu16(0xFFFF & in_wc->slid)); return 1; } From 4831ca9e4a8e48cb27e0a792f73250390827a228 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 8 Sep 2017 15:37:45 +0100 Subject: [PATCH 102/296] IB/rxe: check for allocation failure on elem The allocation for elem may fail (especially because we're using GFP_ATOMIC) so best to check for a null return. This fixes a potential null pointer dereference when assigning elem->pool. Detected by CoverityScan CID#1357507 ("Dereference null return value") Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Colin Ian King Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_pool.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index c1b5f38f31a5..3b4916680018 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -404,6 +404,8 @@ void *rxe_alloc(struct rxe_pool *pool) elem = kmem_cache_zalloc(pool_cache(pool), (pool->flags & RXE_POOL_ATOMIC) ? GFP_ATOMIC : GFP_KERNEL); + if (!elem) + return NULL; elem->pool = pool; kref_init(&elem->ref_cnt); From 6b9f8970cd30929cb6b372fa44fa66da9e59c650 Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Mon, 9 Oct 2017 09:11:32 -0400 Subject: [PATCH 103/296] IB/rxe: put the pool on allocation failure If the allocation of elem fails, it is not sufficient to simply check for NULL and return. We need to also put our reference on the pool or else we will leave the pool with a permanent ref count and we will never be able to free it. Fixes: 4831ca9e4a8e ("IB/rxe: check for allocation failure on elem") Suggested-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_pool.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index 3b4916680018..b4a8acc7bb7d 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -394,23 +394,25 @@ void *rxe_alloc(struct rxe_pool *pool) kref_get(&pool->rxe->ref_cnt); - if (atomic_inc_return(&pool->num_elem) > pool->max_elem) { - atomic_dec(&pool->num_elem); - rxe_dev_put(pool->rxe); - rxe_pool_put(pool); - return NULL; - } + if (atomic_inc_return(&pool->num_elem) > pool->max_elem) + goto out_put_pool; elem = kmem_cache_zalloc(pool_cache(pool), (pool->flags & RXE_POOL_ATOMIC) ? GFP_ATOMIC : GFP_KERNEL); if (!elem) - return NULL; + goto out_put_pool; elem->pool = pool; kref_init(&elem->ref_cnt); return elem; + +out_put_pool: + atomic_dec(&pool->num_elem); + rxe_dev_put(pool->rxe); + rxe_pool_put(pool); + return NULL; } void rxe_elem_release(struct kref *kref) From f0309de91aeb5271c920e768f56e4d2a12be30f5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 4 Oct 2017 17:45:27 -0700 Subject: [PATCH 104/296] RDMA/nes: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. A pointer from nesadapter back to nesdev was added. Cc: Faisal Latif Cc: Doug Ledford Cc: Sean Hefty Cc: Hal Rosenstock Cc: linux-rdma@vger.kernel.org Cc: Thomas Gleixner Signed-off-by: Kees Cook Signed-off-by: Doug Ledford --- drivers/infiniband/hw/nes/nes.h | 6 +++--- drivers/infiniband/hw/nes/nes_cm.c | 5 ++--- drivers/infiniband/hw/nes/nes_hw.c | 20 ++++++++------------ drivers/infiniband/hw/nes/nes_hw.h | 1 + drivers/infiniband/hw/nes/nes_mgt.c | 9 +++++---- drivers/infiniband/hw/nes/nes_nic.c | 3 +-- drivers/infiniband/hw/nes/nes_utils.c | 11 +++++------ drivers/infiniband/hw/nes/nes_verbs.c | 10 ++++------ 8 files changed, 29 insertions(+), 36 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index 3f9e56e8b379..00c27291dc26 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -536,7 +536,7 @@ void nes_iwarp_ce_handler(struct nes_device *, struct nes_hw_cq *); int nes_destroy_cqp(struct nes_device *); int nes_nic_cm_xmit(struct sk_buff *, struct net_device *); void nes_recheck_link_status(struct work_struct *work); -void nes_terminate_timeout(unsigned long context); +void nes_terminate_timeout(struct timer_list *t); /* nes_nic.c */ struct net_device *nes_netdev_init(struct nes_device *, void __iomem *); @@ -575,8 +575,8 @@ void nes_put_cqp_request(struct nes_device *nesdev, struct nes_cqp_request *cqp_request); void nes_post_cqp_request(struct nes_device *, struct nes_cqp_request *); int nes_arp_table(struct nes_device *, u32, u8 *, u32); -void nes_mh_fix(unsigned long); -void nes_clc(unsigned long); +void nes_mh_fix(struct timer_list *t); +void nes_clc(struct timer_list *t); void nes_dump_mem(unsigned int, void *, int); u32 nes_crc32(u32, u32, u32, u32, u8 *, u32, u32, u32); diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index de4025deaa4a..4961e80dcf95 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -840,7 +840,7 @@ static void handle_recv_entry(struct nes_cm_node *cm_node, u32 rem_node) /** * nes_cm_timer_tick */ -static void nes_cm_timer_tick(unsigned long pass) +static void nes_cm_timer_tick(struct timer_list *unused) { unsigned long flags; unsigned long nexttimeout = jiffies + NES_LONG_TIME; @@ -2670,8 +2670,7 @@ static struct nes_cm_core *nes_cm_alloc_core(void) return NULL; INIT_LIST_HEAD(&cm_core->connected_nodes); - init_timer(&cm_core->tcp_timer); - cm_core->tcp_timer.function = nes_cm_timer_tick; + timer_setup(&cm_core->tcp_timer, nes_cm_timer_tick, 0); cm_core->mtu = NES_CM_DEFAULT_MTU; cm_core->state = NES_CM_STATE_INITED; diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index b0adf65e4bdb..48398b460d5c 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -381,6 +381,7 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { sizeof nesadapter->pft_mcast_map); /* populate the new nesadapter */ + nesadapter->nesdev = nesdev; nesadapter->devfn = nesdev->pcidev->devfn; nesadapter->bus_number = nesdev->pcidev->bus->number; nesadapter->ref_count = 1; @@ -598,19 +599,15 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { } if (nesadapter->hw_rev == NE020_REV) { - init_timer(&nesadapter->mh_timer); - nesadapter->mh_timer.function = nes_mh_fix; + timer_setup(&nesadapter->mh_timer, nes_mh_fix, 0); nesadapter->mh_timer.expires = jiffies + (HZ/5); /* 1 second */ - nesadapter->mh_timer.data = (unsigned long)nesdev; add_timer(&nesadapter->mh_timer); } else { nes_write32(nesdev->regs+NES_INTF_INT_STAT, 0x0f000000); } - init_timer(&nesadapter->lc_timer); - nesadapter->lc_timer.function = nes_clc; + timer_setup(&nesadapter->lc_timer, nes_clc, 0); nesadapter->lc_timer.expires = jiffies + 3600 * HZ; /* 1 hour */ - nesadapter->lc_timer.data = (unsigned long)nesdev; add_timer(&nesadapter->lc_timer); list_add_tail(&nesadapter->list, &nes_adapter_list); @@ -1623,9 +1620,9 @@ static void nes_replenish_nic_rq(struct nes_vnic *nesvnic) /** * nes_rq_wqes_timeout */ -static void nes_rq_wqes_timeout(unsigned long parm) +static void nes_rq_wqes_timeout(struct timer_list *t) { - struct nes_vnic *nesvnic = (struct nes_vnic *)parm; + struct nes_vnic *nesvnic = from_timer(nesvnic, t, rq_wqes_timer); printk("%s: Timer fired.\n", __func__); atomic_set(&nesvnic->rx_skb_timer_running, 0); if (atomic_read(&nesvnic->rx_skbs_needed)) @@ -1849,8 +1846,7 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) wqe_count -= counter; nes_write32(nesdev->regs+NES_WQE_ALLOC, (counter << 24) | nesvnic->nic.qp_id); } while (wqe_count); - setup_timer(&nesvnic->rq_wqes_timer, nes_rq_wqes_timeout, - (unsigned long)nesvnic); + timer_setup(&nesvnic->rq_wqes_timer, nes_rq_wqes_timeout, 0); nes_debug(NES_DBG_INIT, "NAPI support Enabled\n"); if (nesdev->nesadapter->et_use_adaptive_rx_coalesce) { @@ -3474,9 +3470,9 @@ static void nes_terminate_received(struct nes_device *nesdev, } /* Timeout routine in case terminate fails to complete */ -void nes_terminate_timeout(unsigned long context) +void nes_terminate_timeout(struct timer_list *t) { - struct nes_qp *nesqp = (struct nes_qp *)(unsigned long)context; + struct nes_qp *nesqp = from_timer(nesqp, t, terminate_timer); nes_terminate_done(nesqp, 1); } diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h index 1b66ef1e9937..3c56470816a8 100644 --- a/drivers/infiniband/hw/nes/nes_hw.h +++ b/drivers/infiniband/hw/nes/nes_hw.h @@ -1164,6 +1164,7 @@ struct nes_adapter { u8 log_port; /* PCI information */ + struct nes_device *nesdev; unsigned int devfn; unsigned char bus_number; unsigned char OneG_Mode; diff --git a/drivers/infiniband/hw/nes/nes_mgt.c b/drivers/infiniband/hw/nes/nes_mgt.c index 77226cf4ea02..21e0ebd39a05 100644 --- a/drivers/infiniband/hw/nes/nes_mgt.c +++ b/drivers/infiniband/hw/nes/nes_mgt.c @@ -122,9 +122,10 @@ static void nes_replenish_mgt_rq(struct nes_vnic_mgt *mgtvnic) /** * nes_mgt_rq_wqes_timeout */ -static void nes_mgt_rq_wqes_timeout(unsigned long parm) +static void nes_mgt_rq_wqes_timeout(struct timer_list *t) { - struct nes_vnic_mgt *mgtvnic = (struct nes_vnic_mgt *)parm; + struct nes_vnic_mgt *mgtvnic = from_timer(mgtvnic, t, + rq_wqes_timer); atomic_set(&mgtvnic->rx_skb_timer_running, 0); if (atomic_read(&mgtvnic->rx_skbs_needed)) @@ -1040,8 +1041,8 @@ int nes_init_mgt_qp(struct nes_device *nesdev, struct net_device *netdev, struct mgtvnic->mgt.rx_skb[counter] = skb; } - setup_timer(&mgtvnic->rq_wqes_timer, nes_mgt_rq_wqes_timeout, - (unsigned long)mgtvnic); + timer_setup(&mgtvnic->rq_wqes_timer, nes_mgt_rq_wqes_timeout, + 0); wqe_count = NES_MGT_WQ_COUNT - 1; mgtvnic->mgt.rq_head = wqe_count; diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 5921ea3d50ae..448a8e7d0963 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -1746,8 +1746,7 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, nesvnic->rdma_enabled = 0; } nesvnic->nic_cq.cq_number = nesvnic->nic.qp_id; - init_timer(&nesvnic->event_timer); - nesvnic->event_timer.function = NULL; + timer_setup(&nesvnic->event_timer, NULL, 0); spin_lock_init(&nesvnic->tx_lock); spin_lock_init(&nesvnic->port_ibevent_lock); nesdev->netdev[nesdev->netdev_count] = netdev; diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c index 37331e2fdc5f..1dc0527e5a05 100644 --- a/drivers/infiniband/hw/nes/nes_utils.c +++ b/drivers/infiniband/hw/nes/nes_utils.c @@ -740,11 +740,11 @@ int nes_arp_table(struct nes_device *nesdev, u32 ip_addr, u8 *mac_addr, u32 acti /** * nes_mh_fix */ -void nes_mh_fix(unsigned long parm) +void nes_mh_fix(struct timer_list *t) { + struct nes_adapter *nesadapter = from_timer(nesadapter, t, mh_timer); + struct nes_device *nesdev = nesadapter->nesdev; unsigned long flags; - struct nes_device *nesdev = (struct nes_device *)parm; - struct nes_adapter *nesadapter = nesdev->nesadapter; struct nes_vnic *nesvnic; u32 used_chunks_tx; u32 temp_used_chunks_tx; @@ -881,11 +881,10 @@ void nes_mh_fix(unsigned long parm) /** * nes_clc */ -void nes_clc(unsigned long parm) +void nes_clc(struct timer_list *t) { + struct nes_adapter *nesadapter = from_timer(nesadapter, t, lc_timer); unsigned long flags; - struct nes_device *nesdev = (struct nes_device *)parm; - struct nes_adapter *nesadapter = nesdev->nesadapter; spin_lock_irqsave(&nesadapter->phy_lock, flags); nesadapter->link_interrupt_count[0] = 0; diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 442b9bdc0f03..fac59cd1db71 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1304,8 +1304,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, init_completion(&nesqp->rq_drained); nesqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR); - setup_timer(&nesqp->terminate_timer, nes_terminate_timeout, - (unsigned long)nesqp); + timer_setup(&nesqp->terminate_timer, nes_terminate_timeout, 0); /* update the QP table */ nesdev->nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = nesqp; @@ -3788,9 +3787,9 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) /** * nes_handle_delayed_event */ -static void nes_handle_delayed_event(unsigned long data) +static void nes_handle_delayed_event(struct timer_list *t) { - struct nes_vnic *nesvnic = (void *) data; + struct nes_vnic *nesvnic = from_timer(nesvnic, t, event_timer); if (nesvnic->delayed_event != nesvnic->last_dispatched_event) { struct ib_event event; @@ -3820,8 +3819,7 @@ void nes_port_ibevent(struct nes_vnic *nesvnic) if (!nesvnic->event_timer.function) { ib_dispatch_event(&event); nesvnic->last_dispatched_event = event.event; - nesvnic->event_timer.function = nes_handle_delayed_event; - nesvnic->event_timer.data = (unsigned long) nesvnic; + nesvnic->event_timer.function = (TIMER_FUNC_TYPE)nes_handle_delayed_event; nesvnic->event_timer.expires = jiffies + NES_EVENT_DELAY; add_timer(&nesvnic->event_timer); } else { From 4037c92f0a3022825e56f60d09f05e2ab37ccd7e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 4 Oct 2017 17:45:35 -0700 Subject: [PATCH 105/296] IB/qib: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() helper to pass the timer pointer explicitly. Cc: Mike Marciniszyn Cc: Doug Ledford Cc: Sean Hefty Cc: Hal Rosenstock Cc: linux-rdma@vger.kernel.org Cc: Thomas Gleixner Signed-off-by: Kees Cook Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib.h | 4 ++-- drivers/infiniband/hw/qib/qib_7220.h | 1 + drivers/infiniband/hw/qib/qib_driver.c | 9 ++++---- drivers/infiniband/hw/qib/qib_iba6120.c | 19 ++++++++-------- drivers/infiniband/hw/qib/qib_iba7220.c | 20 ++++++++--------- drivers/infiniband/hw/qib/qib_iba7322.c | 17 +++++++-------- drivers/infiniband/hw/qib/qib_init.c | 29 ++++++++++--------------- drivers/infiniband/hw/qib/qib_intr.c | 6 ++--- drivers/infiniband/hw/qib/qib_mad.c | 12 +++++----- drivers/infiniband/hw/qib/qib_sd7220.c | 10 ++++----- drivers/infiniband/hw/qib/qib_tx.c | 4 ++-- drivers/infiniband/hw/qib/qib_verbs.c | 6 ++--- 12 files changed, 62 insertions(+), 75 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index f9e1c69603a5..f9541a0ee5cd 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -1188,7 +1188,7 @@ int qib_set_lid(struct qib_pportdata *, u32, u8); void qib_hol_down(struct qib_pportdata *); void qib_hol_init(struct qib_pportdata *); void qib_hol_up(struct qib_pportdata *); -void qib_hol_event(unsigned long); +void qib_hol_event(struct timer_list *); void qib_disable_after_error(struct qib_devdata *); int qib_set_uevent_bits(struct qib_pportdata *, const int); @@ -1302,7 +1302,7 @@ void qib_get_eeprom_info(struct qib_devdata *); #define qib_inc_eeprom_err(dd, eidx, incr) void qib_dump_lookup_output_queue(struct qib_devdata *); void qib_force_pio_avail_update(struct qib_devdata *); -void qib_clear_symerror_on_linkup(unsigned long opaque); +void qib_clear_symerror_on_linkup(struct timer_list *t); /* * Set LED override, only the two LSBs have "public" meaning, but diff --git a/drivers/infiniband/hw/qib/qib_7220.h b/drivers/infiniband/hw/qib/qib_7220.h index a5356cb4252e..c467a6076aa8 100644 --- a/drivers/infiniband/hw/qib/qib_7220.h +++ b/drivers/infiniband/hw/qib/qib_7220.h @@ -75,6 +75,7 @@ struct qib_chip_specific { char bitsmsgbuf[64]; struct timer_list relock_timer; unsigned int relock_interval; /* in jiffies */ + struct qib_devdata *dd; }; struct qib_chippport_specific { diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 719906a9fd51..33d3335385e8 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -682,9 +682,10 @@ int qib_set_lid(struct qib_pportdata *ppd, u32 lid, u8 lmc) /* Below is "non-zero" to force override, but both actual LEDs are off */ #define LED_OVER_BOTH_OFF (8) -static void qib_run_led_override(unsigned long opaque) +static void qib_run_led_override(struct timer_list *t) { - struct qib_pportdata *ppd = (struct qib_pportdata *)opaque; + struct qib_pportdata *ppd = from_timer(ppd, t, + led_override_timer); struct qib_devdata *dd = ppd->dd; int timeoff; int ph_idx; @@ -735,9 +736,7 @@ void qib_set_led_override(struct qib_pportdata *ppd, unsigned int val) */ if (atomic_inc_return(&ppd->led_override_timer_active) == 1) { /* Need to start timer */ - init_timer(&ppd->led_override_timer); - ppd->led_override_timer.function = qib_run_led_override; - ppd->led_override_timer.data = (unsigned long) ppd; + timer_setup(&ppd->led_override_timer, qib_run_led_override, 0); ppd->led_override_timer.expires = jiffies + 1; add_timer(&ppd->led_override_timer); } else { diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index 3259a60e4f4f..0113b4e60447 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -266,6 +266,7 @@ struct qib_chip_specific { u64 rpkts; /* total packets received (sample result) */ u64 xmit_wait; /* # of ticks no data sent (sample result) */ struct timer_list pma_timer; + struct qib_pportdata *ppd; char emsgbuf[128]; char bitsmsgbuf[64]; u8 pma_sample_status; @@ -2647,9 +2648,9 @@ static void qib_chk_6120_errormask(struct qib_devdata *dd) * need traffic_wds done the way it is * called from add_timer */ -static void qib_get_6120_faststats(unsigned long opaque) +static void qib_get_6120_faststats(struct timer_list *t) { - struct qib_devdata *dd = (struct qib_devdata *) opaque; + struct qib_devdata *dd = from_timer(dd, t, stats_timer); struct qib_pportdata *ppd = dd->pport; unsigned long flags; u64 traffic_wds; @@ -2937,10 +2938,10 @@ static int qib_6120_set_loopback(struct qib_pportdata *ppd, const char *what) return ret; } -static void pma_6120_timer(unsigned long data) +static void pma_6120_timer(struct timer_list *t) { - struct qib_pportdata *ppd = (struct qib_pportdata *)data; - struct qib_chip_specific *cs = ppd->dd->cspec; + struct qib_chip_specific *cs = from_timer(cs, t, pma_timer); + struct qib_pportdata *ppd = cs->ppd; struct qib_ibport *ibp = &ppd->ibport_data; unsigned long flags; @@ -3205,6 +3206,7 @@ static int init_6120_variables(struct qib_devdata *dd) dd->num_pports = 1; dd->cspec = (struct qib_chip_specific *)(ppd + dd->num_pports); + dd->cspec->ppd = ppd; ppd->cpspec = NULL; /* not used in this chip */ spin_lock_init(&dd->cspec->kernel_tid_lock); @@ -3289,11 +3291,8 @@ static int init_6120_variables(struct qib_devdata *dd) dd->rhdrhead_intr_off = 1ULL << 32; /* setup the stats timer; the add_timer is done at end of init */ - setup_timer(&dd->stats_timer, qib_get_6120_faststats, - (unsigned long)dd); - - setup_timer(&dd->cspec->pma_timer, pma_6120_timer, - (unsigned long)ppd); + timer_setup(&dd->stats_timer, qib_get_6120_faststats, 0); + timer_setup(&dd->cspec->pma_timer, pma_6120_timer, 0); dd->ureg_align = qib_read_kreg32(dd, kr_palign); diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 04bdd3d487b1..d400aa7c73b4 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -1042,9 +1042,11 @@ static int qib_decode_7220_err(struct qib_devdata *dd, char *buf, size_t blen, return iserr; } -static void reenable_7220_chase(unsigned long opaque) +static void reenable_7220_chase(struct timer_list *t) { - struct qib_pportdata *ppd = (struct qib_pportdata *)opaque; + struct qib_chippport_specific *cpspec = from_timer(cpspec, t, + chase_timer); + struct qib_pportdata *ppd = &cpspec->pportdata; ppd->cpspec->chase_timer.expires = 0; qib_set_ib_7220_lstate(ppd, QLOGIC_IB_IBCC_LINKCMD_DOWN, @@ -1663,7 +1665,7 @@ static void qib_7220_quiet_serdes(struct qib_pportdata *ppd) dd->control | QLOGIC_IB_C_FREEZEMODE); ppd->cpspec->chase_end = 0; - if (ppd->cpspec->chase_timer.data) /* if initted */ + if (ppd->cpspec->chase_timer.function) /* if initted */ del_timer_sync(&ppd->cpspec->chase_timer); if (ppd->cpspec->ibsymdelta || ppd->cpspec->iblnkerrdelta || @@ -3263,9 +3265,9 @@ static u32 qib_read_7220portcntrs(struct qib_devdata *dd, loff_t pos, u32 port, * need traffic_wds done the way it is * called from add_timer */ -static void qib_get_7220_faststats(unsigned long opaque) +static void qib_get_7220_faststats(struct timer_list *t) { - struct qib_devdata *dd = (struct qib_devdata *) opaque; + struct qib_devdata *dd = from_timer(dd, t, stats_timer); struct qib_pportdata *ppd = dd->pport; unsigned long flags; u64 traffic_wds; @@ -3997,6 +3999,7 @@ static int qib_init_7220_variables(struct qib_devdata *dd) dd->num_pports = 1; dd->cspec = (struct qib_chip_specific *)(cpspec + dd->num_pports); + dd->cspec->dd = dd; ppd->cpspec = cpspec; spin_lock_init(&dd->cspec->sdepb_lock); @@ -4069,8 +4072,7 @@ static int qib_init_7220_variables(struct qib_devdata *dd) if (!qib_mini_init) qib_write_kreg(dd, kr_rcvbthqp, QIB_KD_QP); - setup_timer(&ppd->cpspec->chase_timer, reenable_7220_chase, - (unsigned long)ppd); + timer_setup(&ppd->cpspec->chase_timer, reenable_7220_chase, 0); qib_num_cfg_vls = 1; /* if any 7220's, only one VL */ @@ -4095,9 +4097,7 @@ static int qib_init_7220_variables(struct qib_devdata *dd) dd->rhdrhead_intr_off = 1ULL << 32; /* setup the stats timer; the add_timer is done at end of init */ - init_timer(&dd->stats_timer); - dd->stats_timer.function = qib_get_7220_faststats; - dd->stats_timer.data = (unsigned long) dd; + timer_setup(&dd->stats_timer, qib_get_7220_faststats, 0); dd->stats_timer.expires = jiffies + ACTIVITY_TIMER * HZ; /* diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 14cadf6d6214..a432d8e450b4 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -1739,9 +1739,10 @@ static void qib_error_tasklet(unsigned long data) qib_write_kreg(dd, kr_errmask, dd->cspec->errormask); } -static void reenable_chase(unsigned long opaque) +static void reenable_chase(struct timer_list *t) { - struct qib_pportdata *ppd = (struct qib_pportdata *)opaque; + struct qib_chippport_specific *cp = from_timer(cp, t, chase_timer); + struct qib_pportdata *ppd = cp->ppd; ppd->cpspec->chase_timer.expires = 0; qib_set_ib_7322_lstate(ppd, QLOGIC_IB_IBCC_LINKCMD_DOWN, @@ -2531,7 +2532,7 @@ static void qib_7322_mini_quiet_serdes(struct qib_pportdata *ppd) cancel_delayed_work_sync(&ppd->cpspec->ipg_work); ppd->cpspec->chase_end = 0; - if (ppd->cpspec->chase_timer.data) /* if initted */ + if (ppd->cpspec->chase_timer.function) /* if initted */ del_timer_sync(&ppd->cpspec->chase_timer); /* @@ -5138,9 +5139,9 @@ static u32 qib_read_7322portcntrs(struct qib_devdata *dd, loff_t pos, u32 port, * * called from add_timer */ -static void qib_get_7322_faststats(unsigned long opaque) +static void qib_get_7322_faststats(struct timer_list *t) { - struct qib_devdata *dd = (struct qib_devdata *) opaque; + struct qib_devdata *dd = from_timer(dd, t, stats_timer); struct qib_pportdata *ppd; unsigned long flags; u64 traffic_wds; @@ -6614,8 +6615,7 @@ static int qib_init_7322_variables(struct qib_devdata *dd) if (!qib_mini_init) write_7322_init_portregs(ppd); - setup_timer(&cp->chase_timer, reenable_chase, - (unsigned long)ppd); + timer_setup(&cp->chase_timer, reenable_chase, 0); ppd++; } @@ -6641,8 +6641,7 @@ static int qib_init_7322_variables(struct qib_devdata *dd) (u64) rcv_int_count << IBA7322_HDRHEAD_PKTINT_SHIFT; /* setup the stats timer; the add_timer is done at end of init */ - setup_timer(&dd->stats_timer, qib_get_7322_faststats, - (unsigned long)dd); + timer_setup(&dd->stats_timer, qib_get_7322_faststats, 0); dd->ureg_align = 0x10000; /* 64KB alignment */ diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index c5a4c65636d6..5243ad30dfc0 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -93,7 +93,7 @@ unsigned qib_cc_table_size; module_param_named(cc_table_size, qib_cc_table_size, uint, S_IRUGO); MODULE_PARM_DESC(cc_table_size, "Congestion control table entries 0 (CCA disabled - default), min = 128, max = 1984"); -static void verify_interrupt(unsigned long); +static void verify_interrupt(struct timer_list *); static struct idr qib_unit_table; u32 qib_cpulist_count; @@ -233,8 +233,7 @@ int qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, spin_lock_init(&ppd->cc_shadow_lock); init_waitqueue_head(&ppd->state_wait); - setup_timer(&ppd->symerr_clear_timer, qib_clear_symerror_on_linkup, - (unsigned long)ppd); + timer_setup(&ppd->symerr_clear_timer, qib_clear_symerror_on_linkup, 0); ppd->qib_wq = NULL; ppd->ibport_data.pmastats = @@ -428,8 +427,7 @@ static int loadtime_init(struct qib_devdata *dd) qib_get_eeprom_info(dd); /* setup time (don't start yet) to verify we got interrupt */ - setup_timer(&dd->intrchk_timer, verify_interrupt, - (unsigned long)dd); + timer_setup(&dd->intrchk_timer, verify_interrupt, 0); done: return ret; } @@ -493,9 +491,9 @@ static void enable_chip(struct qib_devdata *dd) } } -static void verify_interrupt(unsigned long opaque) +static void verify_interrupt(struct timer_list *t) { - struct qib_devdata *dd = (struct qib_devdata *) opaque; + struct qib_devdata *dd = from_timer(dd, t, intrchk_timer); u64 int_counter; if (!dd) @@ -753,8 +751,7 @@ int qib_init(struct qib_devdata *dd, int reinit) continue; if (dd->flags & QIB_HAS_SEND_DMA) ret = qib_setup_sdma(ppd); - setup_timer(&ppd->hol_timer, qib_hol_event, - (unsigned long)ppd); + timer_setup(&ppd->hol_timer, qib_hol_event, 0); ppd->hol_state = QIB_HOL_UP; } @@ -815,23 +812,19 @@ static void qib_stop_timers(struct qib_devdata *dd) struct qib_pportdata *ppd; int pidx; - if (dd->stats_timer.data) { + if (dd->stats_timer.function) del_timer_sync(&dd->stats_timer); - dd->stats_timer.data = 0; - } - if (dd->intrchk_timer.data) { + if (dd->intrchk_timer.function) del_timer_sync(&dd->intrchk_timer); - dd->intrchk_timer.data = 0; - } for (pidx = 0; pidx < dd->num_pports; ++pidx) { ppd = dd->pport + pidx; - if (ppd->hol_timer.data) + if (ppd->hol_timer.function) del_timer_sync(&ppd->hol_timer); - if (ppd->led_override_timer.data) { + if (ppd->led_override_timer.function) { del_timer_sync(&ppd->led_override_timer); atomic_set(&ppd->led_override_timer_active, 0); } - if (ppd->symerr_clear_timer.data) + if (ppd->symerr_clear_timer.function) del_timer_sync(&ppd->symerr_clear_timer); } } diff --git a/drivers/infiniband/hw/qib/qib_intr.c b/drivers/infiniband/hw/qib/qib_intr.c index a014fd4cd076..65c3b964ad1b 100644 --- a/drivers/infiniband/hw/qib/qib_intr.c +++ b/drivers/infiniband/hw/qib/qib_intr.c @@ -141,7 +141,7 @@ void qib_handle_e_ibstatuschanged(struct qib_pportdata *ppd, u64 ibcs) qib_hol_up(ppd); /* useful only for 6120 now */ *ppd->statusp |= QIB_STATUS_IB_READY | QIB_STATUS_IB_CONF; - qib_clear_symerror_on_linkup((unsigned long)ppd); + qib_clear_symerror_on_linkup(&ppd->symerr_clear_timer); spin_lock_irqsave(&ppd->lflags_lock, flags); ppd->lflags |= QIBL_LINKACTIVE | QIBL_LINKV; ppd->lflags &= ~(QIBL_LINKINIT | @@ -170,9 +170,9 @@ void qib_handle_e_ibstatuschanged(struct qib_pportdata *ppd, u64 ibcs) signal_ib_event(ppd, ev); } -void qib_clear_symerror_on_linkup(unsigned long opaque) +void qib_clear_symerror_on_linkup(struct timer_list *t) { - struct qib_pportdata *ppd = (struct qib_pportdata *)opaque; + struct qib_pportdata *ppd = from_timer(ppd, t, symerr_clear_timer); if (ppd->lflags & QIBL_LINKACTIVE) return; diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 82d9da9b6997..dd0992e9ab50 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -2446,9 +2446,9 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, return ret; } -static void xmit_wait_timer_func(unsigned long opaque) +static void xmit_wait_timer_func(struct timer_list *t) { - struct qib_pportdata *ppd = (struct qib_pportdata *)opaque; + struct qib_pportdata *ppd = from_timer(ppd, t, cong_stats.timer); struct qib_devdata *dd = dd_from_ppd(ppd); unsigned long flags; u8 status; @@ -2478,10 +2478,8 @@ void qib_notify_create_mad_agent(struct rvt_dev_info *rdi, int port_idx) /* Initialize xmit_wait structure */ dd->pport[port_idx].cong_stats.counter = 0; - init_timer(&dd->pport[port_idx].cong_stats.timer); - dd->pport[port_idx].cong_stats.timer.function = xmit_wait_timer_func; - dd->pport[port_idx].cong_stats.timer.data = - (unsigned long)(&dd->pport[port_idx]); + timer_setup(&dd->pport[port_idx].cong_stats.timer, + xmit_wait_timer_func, 0); dd->pport[port_idx].cong_stats.timer.expires = 0; add_timer(&dd->pport[port_idx].cong_stats.timer); } @@ -2492,7 +2490,7 @@ void qib_notify_free_mad_agent(struct rvt_dev_info *rdi, int port_idx) struct qib_devdata *dd = container_of(ibdev, struct qib_devdata, verbs_dev); - if (dd->pport[port_idx].cong_stats.timer.data) + if (dd->pport[port_idx].cong_stats.timer.function) del_timer_sync(&dd->pport[port_idx].cong_stats.timer); if (dd->pport[port_idx].ibport_data.smi_ah) diff --git a/drivers/infiniband/hw/qib/qib_sd7220.c b/drivers/infiniband/hw/qib/qib_sd7220.c index c72775f27212..c06bcb1a208d 100644 --- a/drivers/infiniband/hw/qib/qib_sd7220.c +++ b/drivers/infiniband/hw/qib/qib_sd7220.c @@ -1390,11 +1390,11 @@ module_param_named(relock_by_timer, qib_relock_by_timer, uint, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(relock_by_timer, "Allow relock attempt if link not up"); -static void qib_run_relock(unsigned long opaque) +static void qib_run_relock(struct timer_list *t) { - struct qib_devdata *dd = (struct qib_devdata *)opaque; + struct qib_chip_specific *cs = from_timer(cs, t, relock_timer); + struct qib_devdata *dd = cs->dd; struct qib_pportdata *ppd = dd->pport; - struct qib_chip_specific *cs = dd->cspec; int timeoff; /* @@ -1440,9 +1440,7 @@ void set_7220_relock_poll(struct qib_devdata *dd, int ibup) /* If timer has not yet been started, do so. */ if (!cs->relock_timer_active) { cs->relock_timer_active = 1; - init_timer(&cs->relock_timer); - cs->relock_timer.function = qib_run_relock; - cs->relock_timer.data = (unsigned long) dd; + timer_setup(&cs->relock_timer, qib_run_relock, 0); cs->relock_interval = timeout; cs->relock_timer.expires = jiffies + timeout; add_timer(&cs->relock_timer); diff --git a/drivers/infiniband/hw/qib/qib_tx.c b/drivers/infiniband/hw/qib/qib_tx.c index eface3b3dacf..db6e925058b7 100644 --- a/drivers/infiniband/hw/qib/qib_tx.c +++ b/drivers/infiniband/hw/qib/qib_tx.c @@ -552,9 +552,9 @@ void qib_hol_up(struct qib_pportdata *ppd) /* * This is only called via the timer. */ -void qib_hol_event(unsigned long opaque) +void qib_hol_event(struct timer_list *t) { - struct qib_pportdata *ppd = (struct qib_pportdata *)opaque; + struct qib_pportdata *ppd = from_timer(ppd, t, hol_timer); /* If hardware error, etc, skip. */ if (!(ppd->dd->flags & QIB_INITTED)) diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 9d92aeb8d9a1..db958b1f60b1 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -389,9 +389,9 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) * This is called from a timer to check for QPs * which need kernel memory in order to send a packet. */ -static void mem_timer(unsigned long data) +static void mem_timer(struct timer_list *t) { - struct qib_ibdev *dev = (struct qib_ibdev *) data; + struct qib_ibdev *dev = from_timer(dev, t, mem_timer); struct list_head *list = &dev->memwait; struct rvt_qp *qp = NULL; struct qib_qp_priv *priv = NULL; @@ -1532,7 +1532,7 @@ int qib_register_ib_device(struct qib_devdata *dd) init_ibport(ppd + i); /* Only need to initialize non-zero fields. */ - setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev); + timer_setup(&dev->mem_timer, mem_timer, 0); INIT_LIST_HEAD(&dev->piowait); INIT_LIST_HEAD(&dev->dmawait); From 605cbb2c443f68b1eb74e4cfdf6c15d7d965f00a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 4 Oct 2017 17:45:41 -0700 Subject: [PATCH 106/296] RDMA/i40iw: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Faisal Latif Cc: Shiraz Saleem Cc: Doug Ledford Cc: Sean Hefty Cc: Hal Rosenstock Cc: linux-rdma@vger.kernel.org Cc: Thomas Gleixner Signed-off-by: Kees Cook Acked-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_ctrl.c | 1 + drivers/infiniband/hw/i40iw/i40iw_type.h | 1 + drivers/infiniband/hw/i40iw/i40iw_utils.c | 10 +++++----- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index 42ca5346777d..1e9f18c86e2b 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -4873,6 +4873,7 @@ enum i40iw_status_code i40iw_vsi_stats_init(struct i40iw_sc_vsi *vsi, struct i40 vsi->pestat = info->pestat; vsi->pestat->hw = vsi->dev->hw; + vsi->pestat->vsi = vsi; if (info->stats_initialize) { i40iw_hw_stats_init(vsi->pestat, fcn_id, true); diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h index 63118f6d5ab4..7798e8e2f9bd 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_type.h +++ b/drivers/infiniband/hw/i40iw/i40iw_type.h @@ -250,6 +250,7 @@ struct i40iw_vsi_pestat { struct i40iw_dev_hw_stats last_read_hw_stats; struct i40iw_dev_hw_stats_offsets hw_stats_offsets; struct timer_list stats_timer; + struct i40iw_sc_vsi *vsi; spinlock_t lock; /* rdma stats lock */ }; diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index e52dbbb4165e..f6c76595e834 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -1445,11 +1445,12 @@ enum i40iw_status_code i40iw_puda_get_tcpip_info(struct i40iw_puda_completion_in * i40iw_hw_stats_timeout - Stats timer-handler which updates all HW stats * @vsi: pointer to the vsi structure */ -static void i40iw_hw_stats_timeout(unsigned long vsi) +static void i40iw_hw_stats_timeout(struct timer_list *t) { - struct i40iw_sc_vsi *sc_vsi = (struct i40iw_sc_vsi *)vsi; + struct i40iw_vsi_pestat *pf_devstat = from_timer(pf_devstat, t, + stats_timer); + struct i40iw_sc_vsi *sc_vsi = pf_devstat->vsi; struct i40iw_sc_dev *pf_dev = sc_vsi->dev; - struct i40iw_vsi_pestat *pf_devstat = sc_vsi->pestat; struct i40iw_vsi_pestat *vf_devstat = NULL; u16 iw_vf_idx; unsigned long flags; @@ -1480,8 +1481,7 @@ void i40iw_hw_stats_start_timer(struct i40iw_sc_vsi *vsi) { struct i40iw_vsi_pestat *devstat = vsi->pestat; - setup_timer(&devstat->stats_timer, i40iw_hw_stats_timeout, - (unsigned long)vsi); + timer_setup(&devstat->stats_timer, i40iw_hw_stats_timeout, 0); mod_timer(&devstat->stats_timer, jiffies + msecs_to_jiffies(STATS_TIMER_DELAY)); } From 6d290d69ac52b3b311d2ef0da4825dc6f1ea8997 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 4 Oct 2017 17:45:54 -0700 Subject: [PATCH 107/296] IB/ipoib: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Doug Ledford Cc: Sean Hefty Cc: Hal Rosenstock Cc: Leon Romanovsky Cc: Alex Vesker Cc: Erez Shitrit Cc: Zhu Yanjun Cc: Dasaratharaman Chandramouli Cc: Paolo Abeni Cc: Ira Weiny Cc: Yuval Shaia Cc: linux-rdma@vger.kernel.org Cc: Thomas Gleixner Signed-off-by: Kees Cook Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib.h | 2 +- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 6 ++++-- drivers/infiniband/ulp/ipoib/ipoib_main.c | 3 +-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 4a5c7a07a631..7cc2b755413d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -500,7 +500,7 @@ void ipoib_mark_paths_invalid(struct net_device *dev); void ipoib_flush_paths(struct net_device *dev); struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port, const char *format); -void ipoib_ib_tx_timer_func(unsigned long ctx); +void ipoib_ib_tx_timer_func(struct timer_list *t); void ipoib_ib_dev_flush_light(struct work_struct *work); void ipoib_ib_dev_flush_normal(struct work_struct *work); void ipoib_ib_dev_flush_heavy(struct work_struct *work); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 6cd61638b441..918930a0bc70 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -821,9 +821,11 @@ int ipoib_ib_dev_stop(struct net_device *dev) return 0; } -void ipoib_ib_tx_timer_func(unsigned long ctx) +void ipoib_ib_tx_timer_func(struct timer_list *t) { - drain_tx_cq((struct net_device *)ctx); + struct ipoib_dev_priv *priv = from_timer(priv, t, poll_timer); + + drain_tx_cq(priv->dev); } int ipoib_ib_dev_open_default(struct net_device *dev) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index dcc77014018d..77c1b7adbb40 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1666,8 +1666,7 @@ static int ipoib_dev_init_default(struct net_device *dev) priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff; priv->dev->dev_addr[3] = (priv->qp->qp_num) & 0xff; - setup_timer(&priv->poll_timer, ipoib_ib_tx_timer_func, - (unsigned long)dev); + timer_setup(&priv->poll_timer, ipoib_ib_tx_timer_func, 0); return 0; From 55c0fcc3de4605d7fd837ba3decbb4b2eaa8dc81 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 4 Oct 2017 17:51:54 -0700 Subject: [PATCH 108/296] net/mlx4_core: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Tariq Toukan Cc: netdev@vger.kernel.org Cc: linux-rdma@vger.kernel.org Cc: Thomas Gleixner Signed-off-by: Kees Cook Signed-off-by: Doug Ledford --- drivers/net/ethernet/mellanox/mlx4/catas.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c index 53daa6ca5d83..e2b6b0cac1ac 100644 --- a/drivers/net/ethernet/mellanox/mlx4/catas.c +++ b/drivers/net/ethernet/mellanox/mlx4/catas.c @@ -231,10 +231,10 @@ static void dump_err_buf(struct mlx4_dev *dev) i, swab32(readl(priv->catas_err.map + i))); } -static void poll_catas(unsigned long dev_ptr) +static void poll_catas(struct timer_list *t) { - struct mlx4_dev *dev = (struct mlx4_dev *) dev_ptr; - struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_priv *priv = from_timer(priv, t, catas_err.timer); + struct mlx4_dev *dev = &priv->dev; u32 slave_read; if (mlx4_is_slave(dev)) { @@ -277,7 +277,7 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev) phys_addr_t addr; INIT_LIST_HEAD(&priv->catas_err.list); - init_timer(&priv->catas_err.timer); + timer_setup(&priv->catas_err.timer, poll_catas, 0); priv->catas_err.map = NULL; if (!mlx4_is_slave(dev)) { @@ -293,8 +293,6 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev) } } - priv->catas_err.timer.data = (unsigned long) dev; - priv->catas_err.timer.function = poll_catas; priv->catas_err.timer.expires = round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL); add_timer(&priv->catas_err.timer); From 9cc12ad6db55d7d902260fead28a91710dd5dbe5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 6 Oct 2017 09:13:46 +0200 Subject: [PATCH 109/296] infiniband: add MMU dependency for user_mem The infiniband subsystem causes a link failure when the umem driver is built on MMU-less systems: mm/mmu_notifier.o: In function `do_mmu_notifier_register': mmu_notifier.c:(.text+0x32): undefined reference to `mm_take_all_locks' drivers/infiniband/core/umem.o: In function `ib_umem_get': umem.c:(.text+0x132): undefined reference to `can_do_mlock' drivers/infiniband/core/umem_odp.o: In function `ib_umem_odp_map_dma_pages': umem_odp.c:(.text+0x766): undefined reference to `get_user_pages_remote' This bug has existed for a while but only become apparent in ARM randconfig builds when the dependency on PCI was lifted, as none of the ARM-NOMMU targets support PCI at the moment. We could probably get the umem driver to build by providing an alternative implementation 'can_do_mlock()' that returns false on NOMMU-systems, but then we'd still have a problem with the mmu-notifiers required by CONFIG_INFINIBAND_ON_DEMAND_PAGING, so simply forbidding umem with NOMMU seems like the simplest workaround. Fixes: 931bc0d91639 ("IB: Move PCI dependency from root KConfig to HW's KConfigs") Signed-off-by: Arnd Bergmann Signed-off-by: Doug Ledford --- drivers/infiniband/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index b62b3b1e09cd..98ac46ed7214 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -45,6 +45,7 @@ config INFINIBAND_EXP_USER_ACCESS config INFINIBAND_USER_MEM bool depends on INFINIBAND_USER_ACCESS != n + depends on MMU default y config INFINIBAND_ON_DEMAND_PAGING From 9d18717790c43c904fabe9da7a4c6b2ebed2c4d8 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 6 Oct 2017 13:06:17 -0700 Subject: [PATCH 110/296] IB/core: Simplify sa_path_set_[sd]lid() calls Instead of making every caller convert the second argument of sa_path_set_slid() and sa_path_set_dlid() to big endian format, make these two functions accept LIDs in CPU endian format. This patch does not change any functionality. Signed-off-by: Bart Van Assche Cc: Sean Hefty Cc: Dasaratharaman Chandramouli Cc: Don Hiatt Cc: Ira Weiny Signed-off-by: Doug Ledford --- drivers/infiniband/core/cm.c | 26 +++++++++++------------ drivers/infiniband/core/uverbs_marshall.c | 10 ++++----- drivers/infiniband/ulp/srp/ib_srp.c | 2 +- include/rdma/ib_sa.h | 12 +++++------ 4 files changed, 24 insertions(+), 26 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 4c4b46586af2..d80911d4abb7 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1472,31 +1472,29 @@ static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg, if (primary_path->rec_type != SA_PATH_REC_TYPE_OPA) { sa_path_set_dlid(primary_path, - htonl(ntohs(req_msg->primary_local_lid))); + ntohs(req_msg->primary_local_lid)); sa_path_set_slid(primary_path, - htonl(ntohs(req_msg->primary_remote_lid))); + ntohs(req_msg->primary_remote_lid)); } else { lid = opa_get_lid_from_gid(&req_msg->primary_local_gid); - sa_path_set_dlid(primary_path, cpu_to_be32(lid)); + sa_path_set_dlid(primary_path, lid); lid = opa_get_lid_from_gid(&req_msg->primary_remote_gid); - sa_path_set_slid(primary_path, cpu_to_be32(lid)); + sa_path_set_slid(primary_path, lid); } if (!cm_req_has_alt_path(req_msg)) return; if (alt_path->rec_type != SA_PATH_REC_TYPE_OPA) { - sa_path_set_dlid(alt_path, - htonl(ntohs(req_msg->alt_local_lid))); - sa_path_set_slid(alt_path, - htonl(ntohs(req_msg->alt_remote_lid))); + sa_path_set_dlid(alt_path, ntohs(req_msg->alt_local_lid)); + sa_path_set_slid(alt_path, ntohs(req_msg->alt_remote_lid)); } else { lid = opa_get_lid_from_gid(&req_msg->alt_local_gid); - sa_path_set_dlid(alt_path, cpu_to_be32(lid)); + sa_path_set_dlid(alt_path, lid); lid = opa_get_lid_from_gid(&req_msg->alt_remote_gid); - sa_path_set_slid(alt_path, cpu_to_be32(lid)); + sa_path_set_slid(alt_path, lid); } } @@ -3037,14 +3035,14 @@ static void cm_format_path_lid_from_lap(struct cm_lap_msg *lap_msg, u32 lid; if (path->rec_type != SA_PATH_REC_TYPE_OPA) { - sa_path_set_dlid(path, htonl(ntohs(lap_msg->alt_local_lid))); - sa_path_set_slid(path, htonl(ntohs(lap_msg->alt_remote_lid))); + sa_path_set_dlid(path, ntohs(lap_msg->alt_local_lid)); + sa_path_set_slid(path, ntohs(lap_msg->alt_remote_lid)); } else { lid = opa_get_lid_from_gid(&lap_msg->alt_local_gid); - sa_path_set_dlid(path, cpu_to_be32(lid)); + sa_path_set_dlid(path, lid); lid = opa_get_lid_from_gid(&lap_msg->alt_remote_gid); - sa_path_set_slid(path, cpu_to_be32(lid)); + sa_path_set_slid(path, lid); } } diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c index bd0acf376af0..aeb2824efbc7 100644 --- a/drivers/infiniband/core/uverbs_marshall.c +++ b/drivers/infiniband/core/uverbs_marshall.c @@ -176,18 +176,18 @@ EXPORT_SYMBOL(ib_copy_path_rec_to_user); void ib_copy_path_rec_from_user(struct sa_path_rec *dst, struct ib_user_path_rec *src) { - __be32 slid, dlid; + u32 slid, dlid; memset(dst, 0, sizeof(*dst)); if ((ib_is_opa_gid((union ib_gid *)src->sgid)) || (ib_is_opa_gid((union ib_gid *)src->dgid))) { dst->rec_type = SA_PATH_REC_TYPE_OPA; - slid = htonl(opa_get_lid_from_gid((union ib_gid *)src->sgid)); - dlid = htonl(opa_get_lid_from_gid((union ib_gid *)src->dgid)); + slid = opa_get_lid_from_gid((union ib_gid *)src->sgid); + dlid = opa_get_lid_from_gid((union ib_gid *)src->dgid); } else { dst->rec_type = SA_PATH_REC_TYPE_IB; - slid = htonl(ntohs(src->slid)); - dlid = htonl(ntohs(src->dlid)); + slid = ntohs(src->slid); + dlid = ntohs(src->dlid); } memcpy(dst->dgid.raw, src->dgid, sizeof dst->dgid); memcpy(dst->sgid.raw, src->sgid, sizeof dst->sgid); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index fa5ccdb3bb2a..778a08250f16 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -2403,7 +2403,7 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id, switch (event->param.rej_rcvd.reason) { case IB_CM_REJ_PORT_CM_REDIRECT: cpi = event->param.rej_rcvd.ari; - sa_path_set_dlid(&ch->path, htonl(ntohs(cpi->redirect_lid))); + sa_path_set_dlid(&ch->path, ntohs(cpi->redirect_lid)); ch->path.pkey = cpi->redirect_pkey; cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff; memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16); diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index 355b81f4242d..1f7f604db5aa 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -590,20 +590,20 @@ static inline bool sa_path_is_roce(struct sa_path_rec *rec) (rec->rec_type == SA_PATH_REC_TYPE_ROCE_V2)); } -static inline void sa_path_set_slid(struct sa_path_rec *rec, __be32 slid) +static inline void sa_path_set_slid(struct sa_path_rec *rec, u32 slid) { if (rec->rec_type == SA_PATH_REC_TYPE_IB) - rec->ib.slid = htons(ntohl(slid)); + rec->ib.slid = cpu_to_be16(slid); else if (rec->rec_type == SA_PATH_REC_TYPE_OPA) - rec->opa.slid = slid; + rec->opa.slid = cpu_to_be32(slid); } -static inline void sa_path_set_dlid(struct sa_path_rec *rec, __be32 dlid) +static inline void sa_path_set_dlid(struct sa_path_rec *rec, u32 dlid) { if (rec->rec_type == SA_PATH_REC_TYPE_IB) - rec->ib.dlid = htons(ntohl(dlid)); + rec->ib.dlid = cpu_to_be16(dlid); else if (rec->rec_type == SA_PATH_REC_TYPE_OPA) - rec->opa.dlid = dlid; + rec->opa.dlid = cpu_to_be32(dlid); } static inline void sa_path_set_raw_traffic(struct sa_path_rec *rec, From 318a8ab7e87887946084a7c0398ecc978c6aa71d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 9 Oct 2017 17:12:00 +0100 Subject: [PATCH 111/296] IB/core: remove redundant check on prot_sg_cnt prot_sg_cnt cannot be zero as a previous check on ret (from which prot_sg_cnt is assigned) returns -ENOMEM if is it zero. Since it cannot be zero we can simplify the code by removing the non -zero check on prot_sg_cnt and redundant else statement. Detected by CoverityScan, COD#1357188 ("Logically dead code") Signed-off-by: Colin Ian King Signed-off-by: Doug Ledford --- drivers/infiniband/core/rw.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index 6ca607e8e293..c8963e91f92a 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -384,21 +384,17 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, count += ret; prev_wr = &ctx->sig->data.reg_wr.wr; - if (prot_sg_cnt) { - ret = rdma_rw_init_one_mr(qp, port_num, &ctx->sig->prot, - prot_sg, prot_sg_cnt, 0); - if (ret < 0) - goto out_destroy_data_mr; - count += ret; + ret = rdma_rw_init_one_mr(qp, port_num, &ctx->sig->prot, + prot_sg, prot_sg_cnt, 0); + if (ret < 0) + goto out_destroy_data_mr; + count += ret; - if (ctx->sig->prot.inv_wr.next) - prev_wr->next = &ctx->sig->prot.inv_wr; - else - prev_wr->next = &ctx->sig->prot.reg_wr.wr; - prev_wr = &ctx->sig->prot.reg_wr.wr; - } else { - ctx->sig->prot.mr = NULL; - } + if (ctx->sig->prot.inv_wr.next) + prev_wr->next = &ctx->sig->prot.inv_wr; + else + prev_wr->next = &ctx->sig->prot.reg_wr.wr; + prev_wr = &ctx->sig->prot.reg_wr.wr; ctx->sig->sig_mr = ib_mr_pool_get(qp, &qp->sig_mrs); if (!ctx->sig->sig_mr) { From 1c3aea2bc8f0b2e5b57375ead40457ff75a3a2ec Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:48:43 -0700 Subject: [PATCH 112/296] IB/core: Fix endianness annotation in rdma_is_multicast_addr() Since ipv4_addr is a big endian 32-bit number, annotate it as such. Fixes: commit be1d325a3358 ("IB/core: Set RoCEv2 MGID according to spec") Signed-off-by: Bart Van Assche Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_addr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index ec5008cf5d51..cfa82d16573d 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -305,12 +305,12 @@ static inline void rdma_get_ll_mac(struct in6_addr *addr, u8 *mac) static inline int rdma_is_multicast_addr(struct in6_addr *addr) { - u32 ipv4_addr; + __be32 ipv4_addr; if (addr->s6_addr[0] == 0xff) return 1; - memcpy(&ipv4_addr, addr->s6_addr + 12, 4); + ipv4_addr = addr->s6_addr32[3]; return (ipv6_addr_v4mapped(addr) && ipv4_is_multicast(ipv4_addr)); } From 401c6ae363bdc66e724b4589881a0c0ffc9d7775 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:48:44 -0700 Subject: [PATCH 113/296] IB/cm: Suppress gcc 7 fall-through complaints Avoid that gcc 7 reports the following warning when building with W=1: warning: this statement may fall through [-Wimplicit-fallthrough=] Signed-off-by: Bart Van Assche Acked-by: Sean Hefty Signed-off-by: Doug Ledford --- drivers/infiniband/core/cm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index d80911d4abb7..5927ee4e57ca 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -2808,6 +2808,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id, msg_response = CM_MSG_RESPONSE_OTHER; break; } + /* fall through */ default: ret = -EINVAL; goto error1; From c0b64f58e8d49570aa9ee55d880f92c20ff0166b Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:48:45 -0700 Subject: [PATCH 114/296] RDMA/cma: Avoid triggering undefined behavior According to the C standard the behavior of computations with integer operands is as follows: * A computation involving unsigned operands can never overflow, because a result that cannot be represented by the resulting unsigned integer type is reduced modulo the number that is one greater than the largest value that can be represented by the resulting type. * The behavior for signed integer underflow and overflow is undefined. Hence only use unsigned integers when checking for integer overflow. This patch is what I came up with after having analyzed the following smatch warnings: drivers/infiniband/core/cma.c:3448: cma_resolve_ib_udp() warn: signed overflow undefined. 'offset + conn_param->private_data_len < conn_param->private_data_len' drivers/infiniband/core/cma.c:3505: cma_connect_ib() warn: signed overflow undefined. 'offset + conn_param->private_data_len < conn_param->private_data_len' Signed-off-by: Bart Van Assche Acked-by: Sean Hefty Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 852c8fec8088..fa79c7076ccd 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1540,7 +1540,7 @@ static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id, return id_priv; } -static inline int cma_user_data_offset(struct rdma_id_private *id_priv) +static inline u8 cma_user_data_offset(struct rdma_id_private *id_priv) { return cma_family(id_priv) == AF_IB ? 0 : sizeof(struct cma_hdr); } @@ -1942,7 +1942,8 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) struct rdma_id_private *listen_id, *conn_id = NULL; struct rdma_cm_event event; struct net_device *net_dev; - int offset, ret; + u8 offset; + int ret; listen_id = cma_id_from_event(cm_id, ib_event, &net_dev); if (IS_ERR(listen_id)) @@ -3440,7 +3441,8 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, struct ib_cm_sidr_req_param req; struct ib_cm_id *id; void *private_data; - int offset, ret; + u8 offset; + int ret; memset(&req, 0, sizeof req); offset = cma_user_data_offset(id_priv); @@ -3497,7 +3499,8 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, struct rdma_route *route; void *private_data; struct ib_cm_id *id; - int offset, ret; + u8 offset; + int ret; memset(&req, 0, sizeof req); offset = cma_user_data_offset(id_priv); From d39dcd6a2d69ed32e56ae0224900ea2aa23a953d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:48:46 -0700 Subject: [PATCH 115/296] RDMA/iwcm: Remove a set-but-not-used variable Signed-off-by: Bart Van Assche Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/core/iwcm.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index fcf42f6bb82a..e9e189ec7502 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -447,9 +447,6 @@ static void destroy_cm_id(struct iw_cm_id *cm_id) */ void iw_destroy_cm_id(struct iw_cm_id *cm_id) { - struct iwcm_id_private *cm_id_priv; - - cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); destroy_cm_id(cm_id); } EXPORT_SYMBOL(iw_destroy_cm_id); From d2271bd0834b8b2497ce39ca84820dd033b2deae Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:48:47 -0700 Subject: [PATCH 116/296] RDMA/isert: Suppress gcc 7 fall-through complaints Avoid that gcc 7 reports the following warning when building with W=1: warning: this statement may fall through [-Wimplicit-fallthrough=] Signed-off-by: Bart Van Assche Acked-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/isert/ib_isert.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index ceabdb85df8b..720dfb3a1ac2 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -788,10 +788,11 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) * the rdma cm id */ return 1; - case RDMA_CM_EVENT_REJECTED: /* FALLTHRU */ + case RDMA_CM_EVENT_REJECTED: isert_info("Connection rejected: %s\n", rdma_reject_msg(cma_id, event->status)); - case RDMA_CM_EVENT_UNREACHABLE: /* FALLTHRU */ + /* fall through */ + case RDMA_CM_EVENT_UNREACHABLE: case RDMA_CM_EVENT_CONNECT_ERROR: ret = isert_connect_error(cma_id); break; @@ -1569,9 +1570,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd, bool comp_err) transport_generic_free_cmd(&cmd->se_cmd, 0); break; } - /* - * Fall-through - */ + /* fall through */ default: iscsit_release_cmd(cmd); break; @@ -1749,8 +1748,9 @@ isert_do_control_comp(struct work_struct *work) switch (cmd->i_state) { case ISTATE_SEND_TASKMGTRSP: iscsit_tmr_post_handler(cmd, cmd->conn); - case ISTATE_SEND_REJECT: /* FALLTHRU */ - case ISTATE_SEND_TEXTRSP: /* FALLTHRU */ + /* fall through */ + case ISTATE_SEND_REJECT: + case ISTATE_SEND_TEXTRSP: cmd->i_state = ISTATE_SENT_STATUS; isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev, false); From 38c8a7105955c18278a7c8af62838b9fa24c3e53 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:48:48 -0700 Subject: [PATCH 117/296] RDMA/bnxt_re: Suppress gcc 7 fall-through complaints Avoid that gcc 7 reports the following warning when building with W=1: warning: this statement may fall through [-Wimplicit-fallthrough=] Signed-off-by: Bart Van Assche Cc: Somnath Kotur Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index e8afc47f8949..e82644cf1aeb 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -1360,7 +1360,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, break; } - /* else, just fall thru */ + /* fall thru */ case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_IMM: case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_INV: { From 6dfa8ae6b405ed5ce76a9f76eaa30d2dceca91f0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:48:49 -0700 Subject: [PATCH 118/296] RDMA/bnxt_re: Remove set-but-not-used variables Signed-off-by: Bart Van Assche Cc: Somnath Kotur Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 12 +++++------- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 5 ----- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 4 ---- drivers/infiniband/hw/bnxt_re/qplib_sp.c | 5 ++--- 4 files changed, 7 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 0d89621d9fe8..af65f8114379 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1643,7 +1643,7 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp, u8 ip_version = 0; u16 vlan_id = 0xFFFF; void *buf; - int i, rc = 0, size; + int i, rc = 0; memset(&qp->qp1_hdr, 0, sizeof(qp->qp1_hdr)); @@ -1760,7 +1760,7 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp, /* Pack the QP1 to the transmit buffer */ buf = bnxt_qplib_get_qp1_sq_buf(&qp->qplib_qp, &sge); if (buf) { - size = ib_ud_header_pack(&qp->qp1_hdr, buf); + ib_ud_header_pack(&qp->qp1_hdr, buf); for (i = wqe->num_sge; i; i--) { wqe->sg_list[i].addr = wqe->sg_list[i - 1].addr; wqe->sg_list[i].lkey = wqe->sg_list[i - 1].lkey; @@ -2216,7 +2216,7 @@ static int bnxt_re_post_recv_shadow_qp(struct bnxt_re_dev *rdev, struct ib_recv_wr *wr) { struct bnxt_qplib_swqe wqe; - int rc = 0, payload_sz = 0; + int rc = 0; memset(&wqe, 0, sizeof(wqe)); while (wr) { @@ -2231,8 +2231,7 @@ static int bnxt_re_post_recv_shadow_qp(struct bnxt_re_dev *rdev, rc = -EINVAL; break; } - payload_sz = bnxt_re_build_sgl(wr->sg_list, wqe.sg_list, - wr->num_sge); + bnxt_re_build_sgl(wr->sg_list, wqe.sg_list, wr->num_sge); wqe.wr_id = wr->wr_id; wqe.type = BNXT_QPLIB_SWQE_TYPE_RECV; @@ -2569,7 +2568,7 @@ static void bnxt_re_process_req_wc(struct ib_wc *wc, struct bnxt_qplib_cqe *cqe) static int bnxt_re_check_packet_type(u16 raweth_qp1_flags, u16 raweth_qp1_flags2) { - bool is_udp = false, is_ipv6 = false, is_ipv4 = false; + bool is_ipv6 = false, is_ipv4 = false; /* raweth_qp1_flags Bit 9-6 indicates itype */ if ((raweth_qp1_flags & CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_ROCE) @@ -2580,7 +2579,6 @@ static int bnxt_re_check_packet_type(u16 raweth_qp1_flags, CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_IP_CS_CALC && raweth_qp1_flags2 & CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_L4_CS_CALC) { - is_udp = true; /* raweth_qp1_flags2 Bit 8 indicates ip_type. 0-v4 1 - v6 */ (raweth_qp1_flags2 & CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_IP_TYPE) ? diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index e82644cf1aeb..a82044dacce0 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -160,11 +160,6 @@ void bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp) static void __bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp) { - struct bnxt_qplib_cq *scq, *rcq; - - scq = qp->scq; - rcq = qp->rcq; - if (qp->sq.flushed) { qp->sq.flushed = false; list_del(&qp->sq_flush); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 2bdb1562bd21..000e6a1940b9 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -88,7 +88,6 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req, unsigned long flags; u32 size, opcode; u16 cookie, cbit; - int pg, idx; u8 *preq; opcode = req->opcode; @@ -149,9 +148,6 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req, preq = (u8 *)req; size = req->cmd_size * BNXT_QPLIB_CMDQE_UNITS; do { - pg = 0; - idx = 0; - /* Locate the next cmdq slot */ sw_prod = HWQ_CMP(cmdq->prod, cmdq); cmdqe = &cmdq_ptr[get_cmdq_pg(sw_prod)][get_cmdq_idx(sw_prod)]; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index e277e54a05eb..9543ce51a28a 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -720,13 +720,12 @@ int bnxt_qplib_map_tc2cos(struct bnxt_qplib_res *res, u16 *cids) struct cmdq_map_tc_to_cos req; struct creq_map_tc_to_cos_resp resp; u16 cmd_flags = 0; - int rc = 0; RCFW_CMD_PREP(req, MAP_TC_TO_COS, cmd_flags); req.cos0 = cpu_to_le16(cids[0]); req.cos1 = cpu_to_le16(cids[1]); - rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, - (void *)&resp, NULL, 0); + bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp, NULL, + 0); return 0; } From 1f5461b2021ae100d0171e6e0d99c009f84f3ac6 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:48:50 -0700 Subject: [PATCH 119/296] RDMA/cxgb3: Annotate locking assumptions Tell sparse what the locking assumptions are for __flush_qp() such that it does not complain about the locking operations inside that function. Signed-off-by: Bart Van Assche Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb3/iwch_qp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index 7f633da0185d..3871e1fd8395 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -722,10 +722,13 @@ int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg) */ static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp, struct iwch_cq *schp) + __releases(&qhp->lock) + __acquires(&qhp->lock) { int count; int flushed; + lockdep_assert_held(&qhp->lock); pr_debug("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp); /* take a ref on the qhp since we must release the lock */ From 90a6b76b2d6445ef83fe65519349377b68d7ae81 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:48:51 -0700 Subject: [PATCH 120/296] RDMA/cxgb3: Annotate an RCU pointer Annotate t3cdev.l2opt with __rcu since it is used as an RCU pointer. Signed-off-by: Bart Van Assche Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/net/ethernet/chelsio/cxgb3/t3cdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb3/t3cdev.h b/drivers/net/ethernet/chelsio/cxgb3/t3cdev.h index 705713b56636..3c3e6cf6aca6 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/t3cdev.h +++ b/drivers/net/ethernet/chelsio/cxgb3/t3cdev.h @@ -60,7 +60,7 @@ struct t3cdev { int (*ctl)(struct t3cdev *dev, unsigned int req, void *data); void (*neigh_update)(struct t3cdev *dev, struct neighbour *neigh); void *priv; /* driver private data */ - void *l2opt; /* optional layer 2 data */ + void __rcu *l2opt; /* optional layer 2 data */ void *l3opt; /* optional layer 3 data */ void *l4opt; /* optional layer 4 data */ void *ulp; /* ulp stuff */ From 17e4e543aaf02efb957692c80b0a358568b03a2e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:48:52 -0700 Subject: [PATCH 121/296] RDMA/cxgb3: Remove a set-but-not-used variable Signed-off-by: Bart Van Assche Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb3/iwch_cm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 86975370a4c0..edc1c549289e 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -1760,8 +1760,8 @@ static void ep_timeout(unsigned long arg) int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) { - int err; struct iwch_ep *ep = to_ep(cm_id); + pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid); if (state_read(&ep->com) == DEAD) { @@ -1772,8 +1772,8 @@ int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) if (mpa_rev == 0) abort_connection(ep, NULL, GFP_KERNEL); else { - err = send_mpa_reject(ep, pdata, pdata_len); - err = iwch_ep_disconnect(ep, 0, GFP_KERNEL); + send_mpa_reject(ep, pdata, pdata_len); + iwch_ep_disconnect(ep, 0, GFP_KERNEL); } put_ep(&ep->com); return 0; From 70d7256819639e2b0c0425a50bbbb9e9fd9e9286 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:48:53 -0700 Subject: [PATCH 122/296] RDMA/cxgb4: Fix indentation This patch avoids that smatch reports the following: drivers/infiniband/hw/cxgb4/device.c:1105: copy_gl_to_skb_pkt() warn: inconsistent indenting drivers/infiniband/hw/cxgb4/cm.c:835: send_connect() warn: inconsistent indenting drivers/infiniband/hw/cxgb4/cm.c:841: send_connect() warn: inconsistent indenting drivers/infiniband/hw/cxgb4/cm.c:888: send_connect() warn: inconsistent indenting drivers/infiniband/hw/cxgb4/cm.c:894: send_connect() warn: inconsistent indenting Signed-off-by: Bart Van Assche Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 8 ++++---- drivers/infiniband/hw/cxgb4/device.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index e395afcd7d33..2331e7d57704 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -832,13 +832,13 @@ static int send_connect(struct c4iw_ep *ep) t5req->params = cpu_to_be64(FILTER_TUPLE_V(params)); t5req->rsvd = cpu_to_be32(isn); - pr_debug("snd_isn %u\n", t5req->rsvd); + pr_debug("snd_isn %u\n", t5req->rsvd); t5req->opt2 = cpu_to_be32(opt2); } else { t6req->params = cpu_to_be64(FILTER_TUPLE_V(params)); t6req->rsvd = cpu_to_be32(isn); - pr_debug("snd_isn %u\n", t6req->rsvd); + pr_debug("snd_isn %u\n", t6req->rsvd); t6req->opt2 = cpu_to_be32(opt2); } } @@ -885,13 +885,13 @@ static int send_connect(struct c4iw_ep *ep) t5req6->params = cpu_to_be64(FILTER_TUPLE_V(params)); t5req6->rsvd = cpu_to_be32(isn); - pr_debug("snd_isn %u\n", t5req6->rsvd); + pr_debug("snd_isn %u\n", t5req6->rsvd); t5req6->opt2 = cpu_to_be32(opt2); } else { t6req6->params = cpu_to_be64(FILTER_TUPLE_V(params)); t6req6->rsvd = cpu_to_be32(isn); - pr_debug("snd_isn %u\n", t6req6->rsvd); + pr_debug("snd_isn %u\n", t6req6->rsvd); t6req6->opt2 = cpu_to_be32(opt2); } diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index fa16a4a8c9a4..33bfddf3b064 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -1102,8 +1102,8 @@ static inline struct sk_buff *copy_gl_to_skb_pkt(const struct pkt_gl *gl, if (unlikely(!skb)) return NULL; - __skb_put(skb, gl->tot_len + sizeof(struct cpl_pass_accept_req) + - sizeof(struct rss_header) - pktshift); + __skb_put(skb, gl->tot_len + sizeof(struct cpl_pass_accept_req) + + sizeof(struct rss_header) - pktshift); /* * This skb will contain: From 76ca0d1b16233b0d7c03aadaafc56931d095b79c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:48:54 -0700 Subject: [PATCH 123/296] RDMA/cxgb4: Remove the obsolete kernel module option 'c4iw_debug' This patch avoids that building the cxgb4 module with W=1 triggers a complaint about a local variable that has not been declared static. Signed-off-by: Bart Van Assche Cc: Steve Wise Acked-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 2331e7d57704..f3235c6169d4 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -99,10 +99,6 @@ module_param(enable_tcp_window_scaling, int, 0644); MODULE_PARM_DESC(enable_tcp_window_scaling, "Enable tcp window scaling (default=1)"); -int c4iw_debug; -module_param(c4iw_debug, int, 0644); -MODULE_PARM_DESC(c4iw_debug, "obsolete"); - static int peer2peer = 1; module_param(peer2peer, int, 0644); MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=1)"); From 9ae970e27729e976598584a9427e8f8705efa585 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:48:55 -0700 Subject: [PATCH 124/296] RDMA/cxgb4: Suppress gcc 7 fall-through complaints Avoid that gcc 7 reports the following warning when building with W=1: warning: this statement may fall through [-Wimplicit-fallthrough=] Signed-off-by: Bart Van Assche Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index f3235c6169d4..6e84179b4f9a 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -3638,6 +3638,7 @@ static void active_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, send_fw_act_open_req(ep, atid); return; } + /* fall through */ case FW_EADDRINUSE: set_bit(ACT_RETRY_INUSE, &ep->com.history); if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) { From 81e74ec286135927df6b360b3cea61764da4fdb9 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:48:56 -0700 Subject: [PATCH 125/296] RDMA/cxgb4: Remove a set-but-not-used variable Signed-off-by: Bart Van Assche Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 6e84179b4f9a..78e2916f9c78 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -3877,7 +3877,6 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) struct net_device *pdev; u16 rss_qid, eth_hdr_len; int step; - u32 tx_chan; struct neighbour *neigh; /* Drop all non-SYN packets */ @@ -3959,14 +3958,12 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh, pdev, 0); pi = (struct port_info *)netdev_priv(pdev); - tx_chan = cxgb4_port_chan(pdev); dev_put(pdev); } else { pdev = get_real_dev(neigh->dev); e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh, pdev, 0); pi = (struct port_info *)netdev_priv(pdev); - tx_chan = cxgb4_port_chan(pdev); } neigh_release(neigh); if (!e) { From 6ffeb21f8e38c71146072fa0a099d976b0762a15 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:48:57 -0700 Subject: [PATCH 126/296] IB/hfi1: Suppress gcc 7 fall-through complaints Avoid that gcc 7 reports the following warning when building with W=1: warning: this statement may fall through [-Wimplicit-fallthrough=] Signed-off-by: Bart Van Assche Reviewed-by: Dennis Dalessandro Cc: Ira Weiny Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/mad.c | 1 + drivers/infiniband/hw/hfi1/rc.c | 2 +- drivers/infiniband/hw/hfi1/sdma.c | 3 ++- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 07b80faf1675..c01335edeabd 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -711,6 +711,7 @@ static int check_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad, /* Bad mkey not a violation below level 2 */ if (ibp->rvp.mkeyprot < 2) break; + /* fall through */ case IB_MGMT_METHOD_SET: case IB_MGMT_METHOD_TRAP_REPRESS: if (ibp->rvp.mkey_violations != 0xFFFF) diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index e1cf0c08ca6f..8ca26e67a096 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -2175,7 +2175,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) goto no_immediate_data; if (opcode == OP(SEND_ONLY_WITH_INVALIDATE)) goto send_last_inv; - /* FALLTHROUGH for SEND_ONLY_WITH_IMMEDIATE */ + /* FALLTHROUGH -- for SEND_ONLY_WITH_IMMEDIATE */ case OP(SEND_LAST_WITH_IMMEDIATE): send_last_imm: wc.ex.imm_data = ohdr->u.imm_data; diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index e7df7794c97e..961507bb0391 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -2593,7 +2593,7 @@ static void __sdma_process_event(struct sdma_engine *sde, * 7220, e.g. */ ss->go_s99_running = 1; - /* fall through and start dma engine */ + /* fall through -- and start dma engine */ case sdma_event_e10_go_hw_start: /* This reference means the state machine is started */ sdma_get(&sde->state); @@ -3016,6 +3016,7 @@ static void __sdma_process_event(struct sdma_engine *sde, case sdma_event_e60_hw_halted: need_progress = 1; sdma_err_progress_check_schedule(sde); + /* fall through */ case sdma_event_e90_sw_halted: /* * SW initiated halt does not perform engines From 6d945a84c69d68a2c494b716642cab2c15d55b4c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:48:58 -0700 Subject: [PATCH 127/296] IB/hfi1: Remove set-but-not-used variables Signed-off-by: Bart Van Assche Reviewed-by: Dennis Dalessandro Cc: Ira Weiny Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 4 +--- drivers/infiniband/hw/hfi1/mad.c | 2 -- drivers/infiniband/hw/hfi1/ruc.c | 9 --------- drivers/infiniband/hw/hfi1/sdma.c | 2 -- drivers/infiniband/hw/hfi1/ud.c | 2 -- drivers/infiniband/hw/hfi1/verbs.c | 10 ---------- 6 files changed, 1 insertion(+), 28 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 2b7144e0405e..9a829ae95bc2 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -776,7 +776,7 @@ static int complete_subctxt(struct hfi1_filedata *fd) static int assign_ctxt(struct hfi1_filedata *fd, unsigned long arg, u32 len) { int ret; - unsigned int swmajor, swminor; + unsigned int swmajor; struct hfi1_ctxtdata *uctxt = NULL; struct hfi1_user_info uinfo; @@ -796,8 +796,6 @@ static int assign_ctxt(struct hfi1_filedata *fd, unsigned long arg, u32 len) if (uinfo.subctxt_cnt > HFI1_MAX_SHARED_CTXTS) return -EINVAL; - swminor = uinfo.userversion & 0xffff; - /* * Acquire the mutex to protect against multiple creations of what * could be a shared base context. diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index c01335edeabd..bf699d23b5c5 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -2889,7 +2889,6 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, struct _vls_dctrs *vlinfo; size_t response_data_size; u32 num_ports; - u8 num_pslm; u8 lq, num_vls; u8 res_lli, res_ler; u64 port_mask; @@ -2899,7 +2898,6 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, int vfi; num_ports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24; - num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3])); num_vls = hweight32(be32_to_cpu(req->vl_select_mask)); vl_select_mask = be32_to_cpu(req->vl_select_mask); res_lli = (u8)(be32_to_cpu(req->resolution) & MSK_LLI) >> MSK_LLI_SFT; diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index b3291f0fde9a..d450d4d4bda3 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -825,11 +825,9 @@ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp, { struct hfi1_qp_priv *priv = qp->priv; struct hfi1_ibport *ibp = ps->ibp; - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); u32 bth1 = 0; u16 pkey = hfi1_get_pkey(ibp, qp->s_pkey_index); u16 lrh0 = HFI1_LRH_BTH; - u16 slid; u8 extra_bytes = -ps->s_txreq->s_cur_size & 3; u32 nwords = SIZE_OF_CRC + ((ps->s_txreq->s_cur_size + extra_bytes) >> 2); @@ -866,13 +864,6 @@ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp, bth1 |= (IB_BECN_MASK << IB_BECN_SHIFT); } hfi1_make_ruc_bth(qp, ohdr, bth0, bth1, bth2); - - if (!ppd->lid) - slid = be16_to_cpu(IB_LID_PERMISSIVE); - else - slid = ppd->lid | - (rdma_ah_get_path_bits(&qp->remote_ah_attr) & - ((1 << ppd->lmc) - 1)); hfi1_make_ib_hdr(&ps->s_txreq->phdr.hdr.ibh, lrh0, qp->s_hdrwords + nwords, diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 961507bb0391..3ba7f4ea4bc9 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -2144,7 +2144,6 @@ void sdma_dumpstate(struct sdma_engine *sde) static void dump_sdma_state(struct sdma_engine *sde) { - struct hw_sdma_desc *descq; struct hw_sdma_desc *descqp; u64 desc[2]; u64 addr; @@ -2155,7 +2154,6 @@ static void dump_sdma_state(struct sdma_engine *sde) head = sde->descq_head & sde->sdma_mask; tail = sde->descq_tail & sde->sdma_mask; cnt = sdma_descq_freecnt(sde); - descq = sde->descq; dd_dev_err(sde->dd, "SDMA (%u) descq_head: %u descq_tail: %u freecnt: %u FLE %d\n", diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 2ba74fdd6f15..84c7dbec0c28 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -854,7 +854,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) int mgmt_pkey_idx = -1; struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - struct ib_header *hdr = packet->hdr; void *data = packet->payload; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; @@ -880,7 +879,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) dlid_is_permissive = (dlid == permissive_lid); slid_is_permissive = (slid == permissive_lid); } else { - hdr = packet->hdr; pkey = ib_bth_get_pkey(ohdr); dlid_is_permissive = (dlid == be16_to_cpu(IB_LID_PERMISSIVE)); slid_is_permissive = (slid == be16_to_cpu(IB_LID_PERMISSIVE)); diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index e232f3c608b4..8acdb5dded24 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -812,7 +812,6 @@ static int build_verbs_tx_desc( int ret = 0; struct hfi1_sdma_header *phdr = &tx->phdr; u16 hdrbytes = tx->hdr_dwords << 2; - u32 *hdr; u8 extra_bytes = 0; static char trail_buf[12]; /* CRC = 4, LT = 1, Pad = 0 to 7 bytes */ @@ -823,9 +822,6 @@ static int build_verbs_tx_desc( */ extra_bytes = hfi1_get_16b_padding(hdrbytes - 8, length) + (SIZE_OF_CRC << 2) + SIZE_OF_LT; - hdr = (u32 *)&phdr->hdr.opah; - } else { - hdr = (u32 *)&phdr->hdr.ibh; } if (!ahg_info->ahgcount) { ret = sdma_txinit_ahg( @@ -891,14 +887,12 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, u8 sc5 = priv->s_sc; int ret; u32 dwords; - bool bypass = false; if (ps->s_txreq->phdr.hdr.hdr_type) { u8 extra_bytes = hfi1_get_16b_padding((hdrwords << 2), len); dwords = (len + extra_bytes + (SIZE_OF_CRC << 2) + SIZE_OF_LT) >> 2; - bypass = true; } else { dwords = (len + 3) >> 2; } @@ -1033,8 +1027,6 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, int wc_status = IB_WC_SUCCESS; int ret = 0; pio_release_cb cb = NULL; - u32 lrh0_16b; - bool bypass = false; u8 extra_bytes = 0; if (ps->s_txreq->phdr.hdr.hdr_type) { @@ -1043,8 +1035,6 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, extra_bytes = pad_size + (SIZE_OF_CRC << 2) + SIZE_OF_LT; dwords = (len + extra_bytes) >> 2; hdr = (u32 *)&ps->s_txreq->phdr.hdr.opah; - lrh0_16b = ps->s_txreq->phdr.hdr.opah.lrh[0]; - bypass = true; } else { dwords = (len + 3) >> 2; hdr = (u32 *)&ps->s_txreq->phdr.hdr.ibh; From e2fdbc23689258d9dd43450048707c953bab5f89 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:48:59 -0700 Subject: [PATCH 128/296] IB/hfi1: Define hfi1_handle_cnp_tbl[] once Move the hfi1_handle_cnp_tbl[] from a header file to a .c file such that only one copy ends up in the hfi1 kernel module. This patch does not change any functionality. Signed-off-by: Bart Van Assche Reviewed-by: Dennis Dalessandro Cc: Ira Weiny Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/driver.c | 6 ++++++ drivers/infiniband/hw/hfi1/hfi.h | 5 ----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 85a425ef740e..b4f69c07ca2a 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -433,6 +433,12 @@ static inline void init_packet(struct hfi1_ctxtdata *rcd, packet->numpkt = 0; } +/* We support only two types - 9B and 16B for now */ +static const hfi1_handle_cnp hfi1_handle_cnp_tbl[2] = { + [HFI1_PKT_TYPE_9B] = &return_cnp, + [HFI1_PKT_TYPE_16B] = &return_cnp_16B +}; + void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, bool do_cnp) { diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 2f3c830669a9..854b64e5976d 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1538,11 +1538,6 @@ typedef void (*hfi1_handle_cnp)(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, u32 pkey, u32 slid, u32 dlid, u8 sc5, const struct ib_grh *old_grh); -/* We support only two types - 9B and 16B for now */ -static const hfi1_handle_cnp hfi1_handle_cnp_tbl[2] = { - [HFI1_PKT_TYPE_9B] = &return_cnp, - [HFI1_PKT_TYPE_16B] = &return_cnp_16B -}; #define PKEY_CHECK_INVALID -1 int egress_pkey_check(struct hfi1_pportdata *ppd, u32 slid, u16 pkey, u8 sc5, int8_t s_pkey_index); From cc4ed08bc56ee42a7ade7602c242007c73a7c94b Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:49:00 -0700 Subject: [PATCH 129/296] IB/hns: Annotate iomem pointers correctly This patch avoids that sparse complains that there is an address space mismatch. Signed-off-by: Bart Van Assche Cc: Lijun Ou Cc: Wei Hu (Xavier) Cc: Shaobo Xu Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_eq.c | 6 +++--- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 9 +++++---- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 3 ++- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_eq.c b/drivers/infiniband/hw/hns/hns_roce_eq.c index b0f43735de1a..d184431e2bf5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_eq.c +++ b/drivers/infiniband/hw/hns/hns_roce_eq.c @@ -558,7 +558,7 @@ static int hns_roce_create_eq(struct hns_roce_dev *hr_dev, writel(eqshift_val, eqc); /* Configure eq extended address 12~44bit */ - writel((u32)(eq->buf_list[0].map >> 12), (u8 *)eqc + 4); + writel((u32)(eq->buf_list[0].map >> 12), eqc + 4); /* * Configure eq extended address 45~49 bit. @@ -572,13 +572,13 @@ static int hns_roce_create_eq(struct hns_roce_dev *hr_dev, roce_set_field(eqcuridx_val, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_M, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_S, 0); - writel(eqcuridx_val, (u8 *)eqc + 8); + writel(eqcuridx_val, eqc + 8); /* Configure eq consumer index */ roce_set_field(eqconsindx_val, ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_M, ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_S, 0); - writel(eqconsindx_val, (u8 *)eqc + 0xc); + writel(eqconsindx_val, eqc + 0xc); return 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 852db18ec128..187241484fa1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -1639,7 +1639,7 @@ static int hns_roce_v1_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, u32 in_modifier, u8 op_modifier, u16 op, u16 token, int event) { - u32 *hcr = (u32 *)(hr_dev->reg_base + ROCEE_MB1_REG); + u32 __iomem *hcr = (u32 __iomem *)(hr_dev->reg_base + ROCEE_MB1_REG); unsigned long end; u32 val = 0; @@ -2534,7 +2534,7 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int rq_pa_start; u32 reg_val; u64 *mtts; - u32 *addr; + u32 __iomem *addr; context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) @@ -2616,8 +2616,9 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP1C_BYTES_40_SQ_CUR_IDX_S, 0); /* Copy context to QP1C register */ - addr = (u32 *)(hr_dev->reg_base + ROCEE_QP1C_CFG0_0_REG + - hr_qp->phy_port * sizeof(*context)); + addr = (u32 __iomem *)(hr_dev->reg_base + + ROCEE_QP1C_CFG0_0_REG + + hr_qp->phy_port * sizeof(*context)); writel(context->qp1c_bytes_4, addr); writel(context->sq_rq_bt_l, addr + 1); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index b99d70aa1120..ff61f30a82c2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -976,7 +976,8 @@ static int hns_roce_v2_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u16 op, u16 token, int event) { struct device *dev = hr_dev->dev; - u32 *hcr = (u32 *)(hr_dev->reg_base + ROCEE_VF_MB_CFG0_REG); + u32 __iomem *hcr = (u32 __iomem *)(hr_dev->reg_base + + ROCEE_VF_MB_CFG0_REG); unsigned long end; u32 val0 = 0; u32 val1 = 0; From d61d6de0744ffc294cb50a3c1952ef383676d440 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:49:01 -0700 Subject: [PATCH 130/296] IB/hns: Declare local functions 'static' Signed-off-by: Bart Van Assche Cc: Lijun Ou Cc: Wei Hu (Xavier) Cc: Shaobo Xu Acked-by: Wei Hu (Xavier) Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 64 ++++++++++++---------- 1 file changed, 35 insertions(+), 29 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 187241484fa1..658e3a7464b3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -57,8 +57,8 @@ static void set_raddr_seg(struct hns_roce_wqe_raddr_seg *rseg, u64 remote_addr, rseg->len = 0; } -int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +static int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah); @@ -317,8 +317,8 @@ int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, return ret; } -int hns_roce_v1_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +static int hns_roce_v1_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) { int ret = 0; int nreq = 0; @@ -877,7 +877,7 @@ static int hns_roce_db_init(struct hns_roce_dev *hr_dev) return 0; } -void hns_roce_v1_recreate_lp_qp_work_fn(struct work_struct *work) +static void hns_roce_v1_recreate_lp_qp_work_fn(struct work_struct *work) { struct hns_roce_recreate_lp_qp_work *lp_qp_work; struct hns_roce_dev *hr_dev; @@ -1033,7 +1033,8 @@ static void hns_roce_v1_mr_free_work_fn(struct work_struct *work) kfree(mr_work); } -int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) +static int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev, + struct hns_roce_mr *mr) { struct device *dev = &hr_dev->pdev->dev; struct hns_roce_mr_free_work *mr_work; @@ -1391,7 +1392,7 @@ static void hns_roce_free_mr_free(struct hns_roce_dev *hr_dev) * @enable: true -- drop reset, false -- reset * return 0 - success , negative --fail */ -int hns_roce_v1_reset(struct hns_roce_dev *hr_dev, bool dereset) +static int hns_roce_v1_reset(struct hns_roce_dev *hr_dev, bool dereset) { struct device_node *dsaf_node; struct device *dev = &hr_dev->pdev->dev; @@ -1466,7 +1467,7 @@ static void hns_roce_des_qp_free(struct hns_roce_dev *hr_dev) destroy_workqueue(des_qp->qp_wq); } -int hns_roce_v1_profile(struct hns_roce_dev *hr_dev) +static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev) { int i = 0; struct hns_roce_caps *caps = &hr_dev->caps; @@ -1538,7 +1539,7 @@ int hns_roce_v1_profile(struct hns_roce_dev *hr_dev) return 0; } -int hns_roce_v1_init(struct hns_roce_dev *hr_dev) +static int hns_roce_v1_init(struct hns_roce_dev *hr_dev) { int ret; u32 val; @@ -1617,7 +1618,7 @@ int hns_roce_v1_init(struct hns_roce_dev *hr_dev) return ret; } -void hns_roce_v1_exit(struct hns_roce_dev *hr_dev) +static void hns_roce_v1_exit(struct hns_roce_dev *hr_dev) { hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_DOWN); hns_roce_free_mr_free(hr_dev); @@ -1701,8 +1702,8 @@ static int hns_roce_v1_chk_mbox(struct hns_roce_dev *hr_dev, return 0; } -void hns_roce_v1_set_gid(struct hns_roce_dev *hr_dev, u8 port, int gid_index, - union ib_gid *gid) +static void hns_roce_v1_set_gid(struct hns_roce_dev *hr_dev, u8 port, + int gid_index, union ib_gid *gid) { u32 *p = NULL; u8 gid_idx = 0; @@ -1765,8 +1766,8 @@ static int hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, return 0; } -void hns_roce_v1_set_mtu(struct hns_roce_dev *hr_dev, u8 phy_port, - enum ib_mtu mtu) +static void hns_roce_v1_set_mtu(struct hns_roce_dev *hr_dev, u8 phy_port, + enum ib_mtu mtu) { u32 val; @@ -1778,8 +1779,8 @@ void hns_roce_v1_set_mtu(struct hns_roce_dev *hr_dev, u8 phy_port, val); } -int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, - unsigned long mtpt_idx) +static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, + unsigned long mtpt_idx) { struct hns_roce_v1_mpt_entry *mpt_entry; struct scatterlist *sg; @@ -1951,7 +1952,7 @@ static struct hns_roce_cqe *next_cqe_sw(struct hns_roce_cq *hr_cq) return get_sw_cqe(hr_cq, hr_cq->cons_index); } -void hns_roce_v1_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index) +static void hns_roce_v1_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index) { u32 doorbell[2]; @@ -2024,9 +2025,10 @@ static void hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, spin_unlock_irq(&hr_cq->lock); } -void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, - struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts, - dma_addr_t dma_handle, int nent, u32 vector) +static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, + struct hns_roce_cq *hr_cq, void *mb_buf, + u64 *mtts, dma_addr_t dma_handle, int nent, + u32 vector) { struct hns_roce_cq_context *cq_context = NULL; struct hns_roce_buf_list *tptr_buf; @@ -2111,7 +2113,8 @@ void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, cq_context->cqc_byte_32 = cpu_to_le32(cq_context->cqc_byte_32); } -int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) +static int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq, + enum ib_cq_notify_flags flags) { struct hns_roce_cq *hr_cq = to_hr_cq(ibcq); u32 notification_flag; @@ -2372,8 +2375,9 @@ int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) return ret; } -int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev, - struct hns_roce_hem_table *table, int obj, int step_idx) +static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, int obj, + int step_idx) { struct device *dev = &hr_dev->pdev->dev; struct hns_roce_v1_priv *priv; @@ -3272,9 +3276,10 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, return ret; } -int hns_roce_v1_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, - int attr_mask, enum ib_qp_state cur_state, - enum ib_qp_state new_state) +static int hns_roce_v1_modify_qp(struct ib_qp *ibqp, + const struct ib_qp_attr *attr, int attr_mask, + enum ib_qp_state cur_state, + enum ib_qp_state new_state) { if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) @@ -3541,8 +3546,9 @@ static int hns_roce_v1_q_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, return ret; } -int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, - int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) +static int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr) { struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); @@ -3893,7 +3899,7 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp) return 0; } -int hns_roce_v1_destroy_cq(struct ib_cq *ibcq) +static int hns_roce_v1_destroy_cq(struct ib_cq *ibcq) { struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device); struct hns_roce_cq *hr_cq = to_hr_cq(ibcq); From b965b51c55c5a21d65ca36592c6048ea60c3ace9 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:49:02 -0700 Subject: [PATCH 131/296] RDMA/i40iw: Fix a race condition Use the proper primitives to dereference the RCU pointer upper_dev->ip_ptr. Compile-tested only. Signed-off-by: Bart Van Assche Reviewed-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_utils.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index e52dbbb4165e..7304189b62ef 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -168,11 +168,16 @@ int i40iw_inetaddr_event(struct notifier_block *notifier, if (netdev != event_netdev) return NOTIFY_DONE; - if (upper_dev) - local_ipaddr = ntohl( - ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address); - else + if (upper_dev) { + struct in_device *in; + + rcu_read_lock(); + in = __in_dev_get_rcu(upper_dev); + local_ipaddr = ntohl(in->ifa_list->ifa_address); + rcu_read_unlock(); + } else { local_ipaddr = ntohl(ifa->ifa_address); + } switch (event) { case NETDEV_DOWN: action = I40IW_ARP_DELETE; From ad4d63401008ea9ac6c5b1613d86c756c6ab9a2a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:49:03 -0700 Subject: [PATCH 132/296] RDMA/i40iw: Suppress gcc 7 fall-through complaints Avoid that gcc 7 reports the following warning when building with W=1: warning: this statement may fall through [-Wimplicit-fallthrough=] Signed-off-by: Bart Van Assche Reviewed-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_cm.c | 1 + drivers/infiniband/hw/i40iw/i40iw_hw.c | 1 + drivers/infiniband/hw/i40iw/i40iw_puda.c | 1 + 3 files changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 50c43766defd..4e1ff81c5ba0 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -2410,6 +2410,7 @@ static void i40iw_handle_rst_pkt(struct i40iw_cm_node *cm_node, case I40IW_CM_STATE_FIN_WAIT1: case I40IW_CM_STATE_LAST_ACK: cm_node->cm_id->rem_ref(cm_node->cm_id); + /* fall through */ case I40IW_CM_STATE_TIME_WAIT: cm_node->state = I40IW_CM_STATE_CLOSED; i40iw_rem_ref_cm_node(cm_node); diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c index 476867a3f584..7e9de845a350 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_hw.c +++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c @@ -410,6 +410,7 @@ void i40iw_process_aeq(struct i40iw_device *iwdev) case I40IW_AE_UDA_XMIT_DGRAM_TOO_LONG: case I40IW_AE_UDA_XMIT_IPADDR_MISMATCH: ctx_info->err_rq_idx_valid = false; + /* fall through */ default: if (!info->sq && ctx_info->err_rq_idx_valid) { ctx_info->err_rq_idx = info->wqe_idx; diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c index c2cab20c4bc5..2a3bf6488b4b 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_puda.c +++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c @@ -816,6 +816,7 @@ void i40iw_puda_dele_resources(struct i40iw_sc_vsi *vsi, switch (rsrc->completion) { case PUDA_HASH_CRC_COMPLETE: i40iw_free_hash_desc(rsrc->hash_desc); + /* fall through */ case PUDA_QP_CREATED: if (!reset) i40iw_puda_free_qp(rsrc); From 4956f031e13a2546da1f7049188e0ca766a25607 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:49:04 -0700 Subject: [PATCH 133/296] RDMA/i40iw: Remove a set-but-not-used variable Signed-off-by: Bart Van Assche Reviewed-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_cm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 4e1ff81c5ba0..f5cfb224e153 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -1199,7 +1199,6 @@ static void i40iw_cm_timer_tick(unsigned long pass) struct i40iw_cm_core *cm_core = (struct i40iw_cm_core *)pass; u32 settimer = 0; unsigned long timetosend; - struct i40iw_sc_dev *dev; unsigned long flags; struct list_head timer_list; @@ -1270,7 +1269,6 @@ static void i40iw_cm_timer_tick(unsigned long pass) spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); vsi = &cm_node->iwdev->vsi; - dev = cm_node->dev; if (!cm_node->ack_rcvd) { atomic_inc(&send_entry->sqbuf->refcount); From 8a53f41b1624c979920b3f1b5558ee4da350f4f0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:49:05 -0700 Subject: [PATCH 134/296] IB/mthca: Fix indentation Signed-off-by: Bart Van Assche Cc: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mthca/mthca_main.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index e36a9bc52268..f3e80dec1334 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -473,11 +473,11 @@ static int mthca_init_icm(struct mthca_dev *mdev, goto err_unmap_eqp; } - mdev->cq_table.table = mthca_alloc_icm_table(mdev, init_hca->cqc_base, - dev_lim->cqc_entry_sz, - mdev->limits.num_cqs, - mdev->limits.reserved_cqs, - 0, 0); + mdev->cq_table.table = mthca_alloc_icm_table(mdev, init_hca->cqc_base, + dev_lim->cqc_entry_sz, + mdev->limits.num_cqs, + mdev->limits.reserved_cqs, + 0, 0); if (!mdev->cq_table.table) { mthca_err(mdev, "Failed to map CQ context memory, aborting.\n"); err = -ENOMEM; From 8aff1fb310ec7472adecea52def6d40962ed9e48 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:49:06 -0700 Subject: [PATCH 135/296] IB/mlx4: Suppress gcc 7 fall-through complaints Avoid that gcc 7 reports the following warning when building with W=1: warning: this statement may fall through [-Wimplicit-fallthrough=] Signed-off-by: Bart Van Assche Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/cq.c | 2 ++ drivers/infiniband/hw/mlx4/mcg.c | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index cab796341697..6d5f405912dd 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -768,11 +768,13 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { case MLX4_OPCODE_RDMA_WRITE_IMM: wc->wc_flags |= IB_WC_WITH_IMM; + /* fall through */ case MLX4_OPCODE_RDMA_WRITE: wc->opcode = IB_WC_RDMA_WRITE; break; case MLX4_OPCODE_SEND_IMM: wc->wc_flags |= IB_WC_WITH_IMM; + /* fall through */ case MLX4_OPCODE_SEND: case MLX4_OPCODE_SEND_INVAL: wc->opcode = IB_WC_SEND; diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c index 70eb9f917303..81ffc007e0a1 100644 --- a/drivers/infiniband/hw/mlx4/mcg.c +++ b/drivers/infiniband/hw/mlx4/mcg.c @@ -944,6 +944,7 @@ int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port, switch (sa_mad->mad_hdr.method) { case IB_MGMT_METHOD_SET: may_create = 1; + /* fall through */ case IB_SA_METHOD_DELETE: req = kzalloc(sizeof *req, GFP_KERNEL); if (!req) From f6b1ee349db4ff1fcc12e2a078b24c4f83beed7e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:49:07 -0700 Subject: [PATCH 136/296] IB/mlx5: Suppress gcc 7 fall-through complaints Avoid that gcc 7 reports the following warning when building with W=1: warning: this statement may fall through [-Wimplicit-fallthrough=] Signed-off-by: Bart Van Assche Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/cq.c | 2 ++ drivers/infiniband/hw/mlx5/qp.c | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 2aa53f427685..b8a116d0e063 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -124,11 +124,13 @@ static void handle_good_req(struct ib_wc *wc, struct mlx5_cqe64 *cqe, switch (be32_to_cpu(cqe->sop_drop_qpn) >> 24) { case MLX5_OPCODE_RDMA_WRITE_IMM: wc->wc_flags |= IB_WC_WITH_IMM; + /* fall through */ case MLX5_OPCODE_RDMA_WRITE: wc->opcode = IB_WC_RDMA_WRITE; break; case MLX5_OPCODE_SEND_IMM: wc->wc_flags |= IB_WC_WITH_IMM; + /* fall through */ case MLX5_OPCODE_SEND: case MLX5_OPCODE_SEND_INVAL: wc->opcode = IB_WC_SEND; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index acb79d3a4f1d..b0b09febafda 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4053,6 +4053,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, *bad_wr = wr; goto out; } + /* fall through */ case MLX5_IB_QPT_HW_GSI: set_datagram_seg(seg, wr); seg += sizeof(struct mlx5_wqe_datagram_seg); From 3d1f236dccd847fb6d083684a7105e0bd9d174fa Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:49:08 -0700 Subject: [PATCH 137/296] IB/mlx5: Remove a set-but-not-used variable References: commit 5fe9dec0d045 ("IB/mlx5: Use blue flame register allocator in mlx5_ib") Signed-off-by: Bart Van Assche Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index b0b09febafda..37a0976240fd 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3858,7 +3858,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, unsigned long flags; unsigned idx; int err = 0; - int inl = 0; int num_sge; void *seg; int nreq; @@ -4117,7 +4116,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, *bad_wr = wr; goto out; } - inl = 1; size += sz; } else { dpseg = seg; From af758a228537d668aba1c352197760c15d5e9067 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:49:09 -0700 Subject: [PATCH 138/296] IB/nes: Fix indentation Signed-off-by: Bart Van Assche Cc: Faisal Latif Acked-by: Faisal Latif Signed-off-by: Doug Ledford --- drivers/infiniband/hw/nes/nes.c | 20 ++++++++++---------- drivers/infiniband/hw/nes/nes_hw.c | 5 +++-- drivers/infiniband/hw/nes/nes_nic.c | 9 ++++----- drivers/infiniband/hw/nes/nes_utils.c | 8 ++++---- drivers/infiniband/hw/nes/nes_verbs.c | 12 ++++++------ 5 files changed, 27 insertions(+), 27 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index 942ca84713c9..ff58dfa071b8 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -757,18 +757,18 @@ static void nes_remove(struct pci_dev *pcidev) int netdev_index = 0; unsigned long flags; - if (nesdev->netdev_count) { - netdev = nesdev->netdev[netdev_index]; - if (netdev) { - netif_stop_queue(netdev); - unregister_netdev(netdev); - nes_netdev_destroy(netdev); + if (nesdev->netdev_count) { + netdev = nesdev->netdev[netdev_index]; + if (netdev) { + netif_stop_queue(netdev); + unregister_netdev(netdev); + nes_netdev_destroy(netdev); - nesdev->netdev[netdev_index] = NULL; - nesdev->netdev_count--; - nesdev->nesadapter->netdev_count--; - } + nesdev->netdev[netdev_index] = NULL; + nesdev->netdev_count--; + nesdev->nesadapter->netdev_count--; } + } nes_notifiers_registered--; if (nes_notifiers_registered == 0) { diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index b0adf65e4bdb..812b59d8f13c 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -1861,8 +1861,9 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) } if ((nesdev->nesadapter->allow_unaligned_fpdus) && (nes_init_mgt_qp(nesdev, netdev, nesvnic))) { - nes_debug(NES_DBG_INIT, "%s: Out of memory for pau nic\n", netdev->name); - nes_destroy_nic_qp(nesvnic); + nes_debug(NES_DBG_INIT, "%s: Out of memory for pau nic\n", + netdev->name); + nes_destroy_nic_qp(nesvnic); return -ENOMEM; } diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 5921ea3d50ae..6977fcfcbb76 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -926,11 +926,10 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev) nesadapter->pft_mcast_map[mc_index] != nesvnic->nic_index && mc_index < max_pft_entries_avaiable) { - nes_debug(NES_DBG_NIC_RX, - "mc_index=%d skipping nic_index=%d, " - "used for=%d \n", mc_index, - nesvnic->nic_index, - nesadapter->pft_mcast_map[mc_index]); + nes_debug(NES_DBG_NIC_RX, + "mc_index=%d skipping nic_index=%d, used for=%d\n", + mc_index, nesvnic->nic_index, + nesadapter->pft_mcast_map[mc_index]); mc_index++; } if (mc_index >= max_pft_entries_avaiable) diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c index 37331e2fdc5f..7b67e9cb57ed 100644 --- a/drivers/infiniband/hw/nes/nes_utils.c +++ b/drivers/infiniband/hw/nes/nes_utils.c @@ -888,10 +888,10 @@ void nes_clc(unsigned long parm) struct nes_adapter *nesadapter = nesdev->nesadapter; spin_lock_irqsave(&nesadapter->phy_lock, flags); - nesadapter->link_interrupt_count[0] = 0; - nesadapter->link_interrupt_count[1] = 0; - nesadapter->link_interrupt_count[2] = 0; - nesadapter->link_interrupt_count[3] = 0; + nesadapter->link_interrupt_count[0] = 0; + nesadapter->link_interrupt_count[1] = 0; + nesadapter->link_interrupt_count[2] = 0; + nesadapter->link_interrupt_count[3] = 0; spin_unlock_irqrestore(&nesadapter->phy_lock, flags); nesadapter->lc_timer.expires = jiffies + 3600 * HZ; /* 1 hour */ diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index f0dc5f4aa177..6abe90b58c91 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -2865,11 +2865,11 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR; /* next_iwarp_state = (NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000); */ - if (nesqp->hte_added) { - nes_debug(NES_DBG_MOD_QP, "set CQP_QP_DEL_HTE\n"); - next_iwarp_state |= NES_CQP_QP_DEL_HTE; - nesqp->hte_added = 0; - } + if (nesqp->hte_added) { + nes_debug(NES_DBG_MOD_QP, "set CQP_QP_DEL_HTE\n"); + next_iwarp_state |= NES_CQP_QP_DEL_HTE; + nesqp->hte_added = 0; + } if ((nesqp->hw_tcp_state > NES_AEQE_TCP_STATE_CLOSED) && (nesdev->iw_status) && (nesqp->hw_tcp_state != NES_AEQE_TCP_STATE_TIME_WAIT)) { @@ -3560,7 +3560,7 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) entry->byte_len = le32_to_cpu(cqe.cqe_words[NES_CQE_PAYLOAD_LENGTH_IDX]); wrid = ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wqe_index].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_LOW_IDX]))) | ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wqe_index].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_HIGH_IDX]))<<32); - entry->opcode = IB_WC_RECV; + entry->opcode = IB_WC_RECV; nesqp->hwqp.rq_tail = (wqe_index+1)&(nesqp->hwqp.rq_size - 1); if ((entry->status != IB_WC_SUCCESS) && (nesqp->hwqp.rq_tail != nesqp->hwqp.rq_head)) { From ea45c45be00e1ab6de11914a43a86d7aa5c91051 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:49:10 -0700 Subject: [PATCH 139/296] IB/nes: Suppress gcc 7 fall-through complaints Avoid that gcc 7 reports the following warning when building with W=1: warning: this statement may fall through [-Wimplicit-fallthrough=] Signed-off-by: Bart Van Assche Cc: Faisal Latif Acked-by: Faisal Latif Signed-off-by: Doug Ledford --- drivers/infiniband/hw/nes/nes_cm.c | 1 + drivers/infiniband/hw/nes/nes_hw.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index de4025deaa4a..a5f6dffef07a 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -1768,6 +1768,7 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, case NES_CM_STATE_FIN_WAIT1: case NES_CM_STATE_LAST_ACK: cm_node->cm_id->rem_ref(cm_node->cm_id); + /* fall through */ case NES_CM_STATE_TIME_WAIT: cm_node->state = NES_CM_STATE_CLOSED; rem_ref_cm_node(cm_node->cm_core, cm_node); diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 812b59d8f13c..c392f83c0629 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -3632,7 +3632,7 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, aeq_info |= NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE; aeqe->aeqe_words[NES_AEQE_MISC_IDX] = cpu_to_le32(aeq_info); } - + /* fall through */ case NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE: case NES_AEQE_AEID_LLP_TOO_MANY_RETRIES: case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE: From fd9e04d6d2949607174166844b790f18be519c75 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:49:11 -0700 Subject: [PATCH 140/296] IB/nes: Remove set-but-not-used variables This patch does not change any functionality. Signed-off-by: Bart Van Assche Cc: Faisal Latif Acked-by: Faisal Latif Signed-off-by: Doug Ledford --- drivers/infiniband/hw/nes/nes_cm.c | 8 -------- drivers/infiniband/hw/nes/nes_utils.c | 5 ++--- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index a5f6dffef07a..bfe2156813b1 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -1389,7 +1389,6 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi struct rtable *rt; struct neighbour *neigh; int rc = arpindex; - struct net_device *netdev; struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter; __be32 dst_ipaddr = htonl(dst_ip); @@ -1400,11 +1399,6 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi return rc; } - if (netif_is_bond_slave(nesvnic->netdev)) - netdev = netdev_master_upper_dev_get(nesvnic->netdev); - else - netdev = nesvnic->netdev; - neigh = dst_neigh_lookup(&rt->dst, &dst_ipaddr); rcu_read_lock(); @@ -3075,7 +3069,6 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) u32 crc_value; int ret; int passive_state; - struct nes_ib_device *nesibdev; struct ib_mr *ibmr = NULL; struct nes_pd *nespd; u64 tagged_offset; @@ -3158,7 +3151,6 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (raddr->sin_addr.s_addr != laddr->sin_addr.s_addr) { u64temp = (unsigned long)nesqp; - nesibdev = nesvnic->nesibdev; nespd = nesqp->nespd; tagged_offset = (u64)(unsigned long)*start_buff; ibmr = nes_reg_phys_mr(&nespd->ibpd, diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c index 7b67e9cb57ed..0613eb5006cc 100644 --- a/drivers/infiniband/hw/nes/nes_utils.c +++ b/drivers/infiniband/hw/nes/nes_utils.c @@ -753,7 +753,6 @@ void nes_mh_fix(unsigned long parm) u32 mac_tx_frames_low; u32 mac_tx_frames_high; u32 mac_tx_pauses; - u32 serdes_status; u32 reset_value; u32 tx_control; u32 tx_config; @@ -846,7 +845,7 @@ void nes_mh_fix(unsigned long parm) } nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, 0x00000008); - serdes_status = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0); + nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0); nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP0, 0x000bdef7); nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_DRIVE0, 0x9ce73000); @@ -859,7 +858,7 @@ void nes_mh_fix(unsigned long parm) } else { nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL0, 0xf0042222); } - serdes_status = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_STATUS0); + nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_STATUS0); nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000ff); nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONTROL, tx_control); From 52adbb691484c175ca067917c858e16727b03cc9 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:49:12 -0700 Subject: [PATCH 141/296] IB/nes: Fix a race condition in nes_inetaddr_event() This patch has been compile-tested only. Signed-off-by: Bart Van Assche Cc: Faisal Latif Acked-by: Faisal Latif Signed-off-by: Doug Ledford --- drivers/infiniband/hw/nes/nes.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index ff58dfa071b8..42b68aa999fc 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -178,11 +178,16 @@ static int nes_inetaddr_event(struct notifier_block *notifier, /* fall through */ case NETDEV_CHANGEADDR: /* Add the address to the IP table */ - if (upper_dev) - nesvnic->local_ipaddr = - ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address; - else + if (upper_dev) { + struct in_device *in; + + rcu_read_lock(); + in = __in_dev_get_rcu(upper_dev); + nesvnic->local_ipaddr = in->ifa_list->ifa_address; + rcu_read_unlock(); + } else { nesvnic->local_ipaddr = ifa->ifa_address; + } nes_write_indexed(nesdev, NES_IDX_DST_IP_ADDR+(0x10*PCI_FUNC(nesdev->pcidev->devfn)), From 69a840357df94f4bc2b658b49273fe452231e3c4 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:49:13 -0700 Subject: [PATCH 142/296] RDMA/ocrdma: Use NULL instead of 0 to represent a pointer Signed-off-by: Bart Van Assche Cc: Selvin Xavier Cc: Devesh Sharma Signed-off-by: Doug Ledford --- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 8e2e9bcf1916..eaacb71b0bff 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -3186,7 +3186,7 @@ void ocrdma_eqd_set_task(struct work_struct *work) { struct ocrdma_dev *dev = container_of(work, struct ocrdma_dev, eqd_work.work); - struct ocrdma_eq *eq = 0; + struct ocrdma_eq *eq = NULL; int i, num = 0, status = -EINVAL; u64 eq_intr; From 705dec3c9f33d1d0e46b75446a80a3d91faf5a96 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:49:14 -0700 Subject: [PATCH 143/296] RDMA/ocrdma: Suppress gcc 7 fall-through complaints Avoid that gcc 7 reports the following warning when building with W=1: warning: this statement may fall through [-Wimplicit-fallthrough=] Signed-off-by: Bart Van Assche Cc: Selvin Xavier Cc: Devesh Sharma Signed-off-by: Doug Ledford --- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 27d5e8d9f08d..3757f1b8cbfc 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -2247,6 +2247,7 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_SEND_WITH_IMM: hdr->cw |= (OCRDMA_FLAG_IMM << OCRDMA_WQE_FLAGS_SHIFT); hdr->immdt = ntohl(wr->ex.imm_data); + /* fall through */ case IB_WR_SEND: hdr->cw |= (OCRDMA_SEND << OCRDMA_WQE_OPCODE_SHIFT); ocrdma_build_send(qp, hdr, wr); @@ -2260,6 +2261,7 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_RDMA_WRITE_WITH_IMM: hdr->cw |= (OCRDMA_FLAG_IMM << OCRDMA_WQE_FLAGS_SHIFT); hdr->immdt = ntohl(wr->ex.imm_data); + /* fall through */ case IB_WR_RDMA_WRITE: hdr->cw |= (OCRDMA_WRITE << OCRDMA_WQE_OPCODE_SHIFT); status = ocrdma_build_write(qp, hdr, wr); From 05a8a1cbf481be88c0183795317ffb509fe8a8cd Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:49:15 -0700 Subject: [PATCH 144/296] RDMA/ocrdma: Remove set-but-not-used variables Signed-off-by: Bart Van Assche Cc: Selvin Xavier Cc: Devesh Sharma Signed-off-by: Doug Ledford --- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 4 ++-- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index eaacb71b0bff..f8c14c7baedb 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -3187,7 +3187,7 @@ void ocrdma_eqd_set_task(struct work_struct *work) struct ocrdma_dev *dev = container_of(work, struct ocrdma_dev, eqd_work.work); struct ocrdma_eq *eq = NULL; - int i, num = 0, status = -EINVAL; + int i, num = 0; u64 eq_intr; for (i = 0; i < dev->eq_cnt; i++) { @@ -3209,7 +3209,7 @@ void ocrdma_eqd_set_task(struct work_struct *work) } if (num) - status = ocrdma_modify_eqd(dev, &dev->eq_tbl[0], num); + ocrdma_modify_eqd(dev, &dev->eq_tbl[0], num); schedule_delayed_work(&dev->eqd_work, msecs_to_jiffies(1000)); } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 3757f1b8cbfc..7866fd8051f6 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -66,9 +66,7 @@ int ocrdma_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *sgid) { int ret; - struct ocrdma_dev *dev; - dev = get_ocrdma_dev(ibdev); memset(sgid, 0, sizeof(*sgid)); if (index >= OCRDMA_MAX_SGID) return -EINVAL; From f3895c2dade19b142d9af121b8cb2529a5416b25 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:49:16 -0700 Subject: [PATCH 145/296] RDMA/qedr: Use NULL instead of 0 to represent a pointer Signed-off-by: Bart Van Assche Cc: Ram Amrani Cc: Michal Kalderon Cc: Ariel Elior Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/qedr_roce_cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.c b/drivers/infiniband/hw/qedr/qedr_roce_cm.c index c3c249ba62b8..2a0346678ddd 100644 --- a/drivers/infiniband/hw/qedr/qedr_roce_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.c @@ -660,7 +660,7 @@ int qedr_gsi_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, dev->gsi_ll2_handle, wr->sg_list[0].addr, wr->sg_list[0].length, - 0 /* cookie */, + NULL /* cookie */, 1 /* notify_fw */); if (rc) { DP_ERR(dev, From 0089985e2505773f090e12fd59644b6adff4ed4f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:49:17 -0700 Subject: [PATCH 146/296] RDMA/qedr: Declare local functions static Signed-off-by: Bart Van Assche Cc: Michal Kalderon Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/main.c | 22 +++++++++--------- drivers/infiniband/hw/qedr/qedr_iw_cm.c | 14 ++++++------ drivers/infiniband/hw/qedr/qedr_roce_cm.c | 27 +++++++++++------------ drivers/infiniband/hw/qedr/verbs.c | 2 +- 4 files changed, 32 insertions(+), 33 deletions(-) diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 1afc26785049..be282e4915d8 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -54,8 +54,8 @@ MODULE_LICENSE("Dual BSD/GPL"); #define QEDR_WQ_MULTIPLIER_DFT (3) -void qedr_ib_dispatch_event(struct qedr_dev *dev, u8 port_num, - enum ib_event_type type) +static void qedr_ib_dispatch_event(struct qedr_dev *dev, u8 port_num, + enum ib_event_type type) { struct ib_event ibev; @@ -96,8 +96,8 @@ static struct net_device *qedr_get_netdev(struct ib_device *dev, u8 port_num) return qdev->ndev; } -int qedr_roce_port_immutable(struct ib_device *ibdev, u8 port_num, - struct ib_port_immutable *immutable) +static int qedr_roce_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) { struct ib_port_attr attr; int err; @@ -115,8 +115,8 @@ int qedr_roce_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } -int qedr_iw_port_immutable(struct ib_device *ibdev, u8 port_num, - struct ib_port_immutable *immutable) +static int qedr_iw_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) { struct ib_port_attr attr; int err; @@ -133,7 +133,7 @@ int qedr_iw_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } -int qedr_iw_register_device(struct qedr_dev *dev) +static int qedr_iw_register_device(struct qedr_dev *dev) { dev->ibdev.node_type = RDMA_NODE_RNIC; dev->ibdev.query_gid = qedr_iw_query_gid; @@ -159,7 +159,7 @@ int qedr_iw_register_device(struct qedr_dev *dev) return 0; } -void qedr_roce_register_device(struct qedr_dev *dev) +static void qedr_roce_register_device(struct qedr_dev *dev) { dev->ibdev.node_type = RDMA_NODE_IB_CA; dev->ibdev.query_gid = qedr_query_gid; @@ -689,12 +689,12 @@ static int qedr_set_device_attr(struct qedr_dev *dev) return 0; } -void qedr_unaffiliated_event(void *context, u8 event_code) +static void qedr_unaffiliated_event(void *context, u8 event_code) { pr_err("unaffiliated event not implemented yet\n"); } -void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle) +static void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle) { #define EVENT_TYPE_NOT_DEFINED 0 #define EVENT_TYPE_CQ 1 @@ -833,7 +833,7 @@ static int qedr_init_hw(struct qedr_dev *dev) return rc; } -void qedr_stop_hw(struct qedr_dev *dev) +static void qedr_stop_hw(struct qedr_dev *dev) { dev->ops->rdma_remove_user(dev->rdma_ctx, dev->dpi); dev->ops->rdma_stop(dev->rdma_ctx); diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index 2950d3f6ecb8..478b7317b80a 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -79,7 +79,7 @@ qedr_fill_sockaddr6(const struct qed_iwarp_cm_info *cm_info, } } -void +static void qedr_iw_mpa_request(void *context, struct qed_iwarp_cm_event_params *params) { struct qedr_iw_listener *listener = (struct qedr_iw_listener *)context; @@ -113,7 +113,7 @@ qedr_iw_mpa_request(void *context, struct qed_iwarp_cm_event_params *params) listener->cm_id->event_handler(listener->cm_id, &event); } -void +static void qedr_iw_issue_event(void *context, struct qed_iwarp_cm_event_params *params, enum iw_cm_event_type event_type) @@ -136,7 +136,7 @@ qedr_iw_issue_event(void *context, ep->cm_id->event_handler(ep->cm_id, &event); } -void +static void qedr_iw_close_event(void *context, struct qed_iwarp_cm_event_params *params) { struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context; @@ -149,7 +149,7 @@ qedr_iw_close_event(void *context, struct qed_iwarp_cm_event_params *params) } } -void +static void qedr_iw_qp_event(void *context, struct qed_iwarp_cm_event_params *params, enum ib_event_type ib_event, char *str) @@ -217,7 +217,7 @@ static void qedr_iw_disconnect_worker(struct work_struct *work) qedr_iw_qp_rem_ref(&qp->ibqp); } -void +static void qedr_iw_disconnect_event(void *context, struct qed_iwarp_cm_event_params *params) { @@ -262,7 +262,7 @@ qedr_iw_passive_complete(void *context, qedr_iw_close_event(context, params); } -int +static int qedr_iw_mpa_reply(void *context, struct qed_iwarp_cm_event_params *params) { struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context; @@ -274,7 +274,7 @@ qedr_iw_mpa_reply(void *context, struct qed_iwarp_cm_event_params *params) return dev->ops->iwarp_send_rtr(dev->rdma_ctx, &rtr_in); } -int +static int qedr_iw_event_handler(void *context, struct qed_iwarp_cm_event_params *params) { struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context; diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.c b/drivers/infiniband/hw/qedr/qedr_roce_cm.c index 2a0346678ddd..9dbf0cc985f3 100644 --- a/drivers/infiniband/hw/qedr/qedr_roce_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.c @@ -64,11 +64,11 @@ void qedr_store_gsi_qp_cq(struct qedr_dev *dev, struct qedr_qp *qp, dev->gsi_qp = qp; } -void qedr_ll2_complete_tx_packet(void *cxt, - u8 connection_handle, - void *cookie, - dma_addr_t first_frag_addr, - bool b_last_fragment, bool b_last_packet) +static void qedr_ll2_complete_tx_packet(void *cxt, u8 connection_handle, + void *cookie, + dma_addr_t first_frag_addr, + bool b_last_fragment, + bool b_last_packet) { struct qedr_dev *dev = (struct qedr_dev *)cxt; struct qed_roce_ll2_packet *pkt = cookie; @@ -93,8 +93,8 @@ void qedr_ll2_complete_tx_packet(void *cxt, (*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context); } -void qedr_ll2_complete_rx_packet(void *cxt, - struct qed_ll2_comp_rx_data *data) +static void qedr_ll2_complete_rx_packet(void *cxt, + struct qed_ll2_comp_rx_data *data) { struct qedr_dev *dev = (struct qedr_dev *)cxt; struct qedr_cq *cq = dev->gsi_rqcq; @@ -122,10 +122,9 @@ void qedr_ll2_complete_rx_packet(void *cxt, (*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context); } -void qedr_ll2_release_rx_packet(void *cxt, - u8 connection_handle, - void *cookie, - dma_addr_t rx_buf_addr, bool b_last_packet) +static void qedr_ll2_release_rx_packet(void *cxt, u8 connection_handle, + void *cookie, dma_addr_t rx_buf_addr, + bool b_last_packet) { /* Do nothing... */ } @@ -237,7 +236,7 @@ static int qedr_ll2_post_tx(struct qedr_dev *dev, return 0; } -int qedr_ll2_stop(struct qedr_dev *dev) +static int qedr_ll2_stop(struct qedr_dev *dev) { int rc; @@ -260,8 +259,8 @@ int qedr_ll2_stop(struct qedr_dev *dev) return rc; } -int qedr_ll2_start(struct qedr_dev *dev, - struct ib_qp_init_attr *attrs, struct qedr_qp *qp) +static int qedr_ll2_start(struct qedr_dev *dev, + struct ib_qp_init_attr *attrs, struct qedr_qp *qp) { struct qed_ll2_acquire_data data; struct qed_ll2_cbs cbs; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index a8a6e66767d5..5acb9eab64ed 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -2268,7 +2268,7 @@ int qedr_query_qp(struct ib_qp *ibqp, return rc; } -int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp) +static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp) { int rc = 0; From 99847b5c1b2de673052d8a0a089248b0dc5a37b2 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:49:18 -0700 Subject: [PATCH 147/296] RDMA/qedr: Annotate iomem pointers correctly Signed-off-by: Bart Van Assche Cc: Ram Amrani Cc: Michal Kalderon Cc: Ariel Elior Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index be282e4915d8..50812b33291b 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -819,7 +819,7 @@ static int qedr_init_hw(struct qedr_dev *dev) if (rc) goto out; - dev->db_addr = (void *)(uintptr_t)out_params.dpi_addr; + dev->db_addr = (void __iomem *)(uintptr_t)out_params.dpi_addr; dev->db_phys_addr = out_params.dpi_phys_addr; dev->db_size = out_params.dpi_size; dev->dpi = out_params.dpi; From 1b8a708bbaf3046037363c599f55ead669b5f056 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:49:19 -0700 Subject: [PATCH 148/296] RDMA/qedr: Remove set-but-not-used variables Signed-off-by: Bart Van Assche Cc: Ram Amrani Cc: Michal Kalderon Cc: Ariel Elior Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/verbs.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 5acb9eab64ed..b26aa88dab48 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1447,13 +1447,11 @@ static int qedr_create_user_qp(struct qedr_dev *dev, struct qed_rdma_create_qp_out_params out_params; struct qedr_pd *pd = get_qedr_pd(ibpd); struct ib_ucontext *ib_ctx = NULL; - struct qedr_ucontext *ctx = NULL; struct qedr_create_qp_ureq ureq; int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1); int rc = -EINVAL; ib_ctx = ibpd->uobject->context; - ctx = get_qedr_ucontext(ib_ctx); memset(&ureq, 0, sizeof(ureq)); rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq)); @@ -2602,7 +2600,6 @@ static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd, struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, u32 max_num_sg) { - struct qedr_dev *dev; struct qedr_mr *mr; if (mr_type != IB_MR_TYPE_MEM_REG) @@ -2613,8 +2610,6 @@ struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd, if (IS_ERR(mr)) return ERR_PTR(-EINVAL); - dev = mr->dev; - return &mr->ibmr; } @@ -3109,7 +3104,7 @@ static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case IB_WR_RDMA_READ_WITH_INV: SET_FIELD2(wqe->flags, RDMA_SQ_RDMA_WQE_1ST_READ_INV_FLG, 1); - /* fallthrough... same is identical to RDMA READ */ + /* fallthrough -- same is identical to RDMA READ */ case IB_WR_RDMA_READ: wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD; From 66392615dcf92558d5f5b17d01aa5ffa212d8959 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:49:20 -0700 Subject: [PATCH 149/296] IB/qib: Remove remaining code related to writing the EEPROM Due to removal of the EEPROM writing code, the qib_inc_eeprom_err() macro became a no-op. Remove the code that calls it. Since that change removes all code that reads the eep_st_masks array, also remove the code that updates that array and the array itself. References: commit 18c0b82a3e45 ("IB/qib: Do not write EEPROM") Signed-off-by: Bart Van Assche Cc: Mike Marciniszyn Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib.h | 18 ------------------ drivers/infiniband/hw/qib/qib_iba6120.c | 25 ------------------------- drivers/infiniband/hw/qib/qib_iba7220.c | 20 -------------------- drivers/infiniband/hw/qib/qib_iba7322.c | 6 +----- 4 files changed, 1 insertion(+), 68 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 1167a9c1776b..689f5d001892 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -101,18 +101,6 @@ extern const struct pci_error_handlers qib_pci_err_handler; */ #define QIB_TRAFFIC_ACTIVE_THRESHOLD (2000) -/* - * Struct used to indicate which errors are logged in each of the - * error-counters that are logged to EEPROM. A counter is incremented - * _once_ (saturating at 255) for each event with any bits set in - * the error or hwerror register masks below. - */ -#define QIB_EEP_LOG_CNT (4) -struct qib_eep_log_mask { - u64 errs_to_log; - u64 hwerrs_to_log; -}; - /* * Below contains all data related to a single context (formerly called port). */ @@ -1079,11 +1067,6 @@ struct qib_devdata { /* control high-level access to EEPROM */ struct mutex eep_lock; uint64_t traffic_wds; - /* - * masks for which bits of errs, hwerrs that cause - * each of the counters to increment. - */ - struct qib_eep_log_mask eep_st_masks[QIB_EEP_LOG_CNT]; struct qib_diag_client *diag_client; spinlock_t qib_diag_trans_lock; /* protect diag observer ops */ struct diag_observer_list_elt *diag_observer_list; @@ -1297,7 +1280,6 @@ int qib_twsi_blk_rd(struct qib_devdata *dd, int dev, int addr, void *buffer, int qib_twsi_blk_wr(struct qib_devdata *dd, int dev, int addr, const void *buffer, int len); void qib_get_eeprom_info(struct qib_devdata *); -#define qib_inc_eeprom_err(dd, eidx, incr) void qib_dump_lookup_output_queue(struct qib_devdata *); void qib_force_pio_avail_update(struct qib_devdata *); void qib_clear_symerror_on_linkup(unsigned long opaque); diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index c4a4c57e0f0a..3c5adfa104a0 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -748,7 +748,6 @@ static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg, u32 bits, ctrl; int isfatal = 0; char *bitsmsg; - int log_idx; hwerrs = qib_read_kreg64(dd, kr_hwerrstatus); if (!hwerrs) @@ -770,11 +769,6 @@ static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg, hwerrs &= dd->cspec->hwerrmask; - /* We log some errors to EEPROM, check if we have any of those. */ - for (log_idx = 0; log_idx < QIB_EEP_LOG_CNT; ++log_idx) - if (hwerrs & dd->eep_st_masks[log_idx].hwerrs_to_log) - qib_inc_eeprom_err(dd, log_idx, 1); - /* * Make sure we get this much out, unless told to be quiet, * or it's occurred within the last 5 seconds. @@ -1004,7 +998,6 @@ static void handle_6120_errors(struct qib_devdata *dd, u64 errs) char *msg; u64 ignore_this_time = 0; u64 iserr = 0; - int log_idx; struct qib_pportdata *ppd = dd->pport; u64 mask; @@ -1015,10 +1008,6 @@ static void handle_6120_errors(struct qib_devdata *dd, u64 errs) /* do these first, they are most important */ if (errs & ERR_MASK(HardwareErr)) qib_handle_6120_hwerrors(dd, msg, sizeof(dd->cspec->emsgbuf)); - else - for (log_idx = 0; log_idx < QIB_EEP_LOG_CNT; ++log_idx) - if (errs & dd->eep_st_masks[log_idx].errs_to_log) - qib_inc_eeprom_err(dd, log_idx, 1); if (errs & ~IB_E_BITSEXTANT) qib_dev_err(dd, @@ -3227,20 +3216,6 @@ static int init_6120_variables(struct qib_devdata *dd) if (qib_unordered_wc()) dd->flags |= QIB_PIO_FLUSH_WC; - /* - * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity. - * 2 is Some Misc, 3 is reserved for future. - */ - dd->eep_st_masks[0].hwerrs_to_log = HWE_MASK(TXEMemParityErr); - - /* Ignore errors in PIO/PBC on systems with unordered write-combining */ - if (qib_unordered_wc()) - dd->eep_st_masks[0].hwerrs_to_log &= ~TXE_PIO_PARITY; - - dd->eep_st_masks[1].hwerrs_to_log = HWE_MASK(RXEMemParityErr); - - dd->eep_st_masks[2].errs_to_log = ERR_MASK(ResetNegated); - ret = qib_init_pportdata(ppd, dd, 0, 1); if (ret) goto bail; diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 78ce79be4120..ceb2259a5efb 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -1094,7 +1094,6 @@ static void handle_7220_errors(struct qib_devdata *dd, u64 errs) char *msg; u64 ignore_this_time = 0; u64 iserr = 0; - int log_idx; struct qib_pportdata *ppd = dd->pport; u64 mask; @@ -1105,10 +1104,6 @@ static void handle_7220_errors(struct qib_devdata *dd, u64 errs) /* do these first, they are most important */ if (errs & ERR_MASK(HardwareErr)) qib_7220_handle_hwerrors(dd, msg, sizeof(dd->cspec->emsgbuf)); - else - for (log_idx = 0; log_idx < QIB_EEP_LOG_CNT; ++log_idx) - if (errs & dd->eep_st_masks[log_idx].errs_to_log) - qib_inc_eeprom_err(dd, log_idx, 1); if (errs & QLOGIC_IB_E_SDMAERRS) sdma_7220_errors(ppd, errs); @@ -1302,7 +1297,6 @@ static void qib_7220_handle_hwerrors(struct qib_devdata *dd, char *msg, u32 bits, ctrl; int isfatal = 0; char *bitsmsg; - int log_idx; hwerrs = qib_read_kreg64(dd, kr_hwerrstatus); if (!hwerrs) @@ -1326,10 +1320,6 @@ static void qib_7220_handle_hwerrors(struct qib_devdata *dd, char *msg, hwerrs &= dd->cspec->hwerrmask; - /* We log some errors to EEPROM, check if we have any of those. */ - for (log_idx = 0; log_idx < QIB_EEP_LOG_CNT; ++log_idx) - if (hwerrs & dd->eep_st_masks[log_idx].hwerrs_to_log) - qib_inc_eeprom_err(dd, log_idx, 1); if (hwerrs & ~(TXEMEMPARITYERR_PIOBUF | TXEMEMPARITYERR_PIOPBC | RXE_PARITY)) qib_devinfo(dd->pcidev, @@ -4016,16 +4006,6 @@ static int qib_init_7220_variables(struct qib_devdata *dd) dd->flags |= qib_special_trigger ? QIB_USE_SPCL_TRIG : QIB_HAS_SEND_DMA; - /* - * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity. - * 2 is Some Misc, 3 is reserved for future. - */ - dd->eep_st_masks[0].hwerrs_to_log = HWE_MASK(TXEMemParityErr); - - dd->eep_st_masks[1].hwerrs_to_log = HWE_MASK(RXEMemParityErr); - - dd->eep_st_masks[2].errs_to_log = ERR_MASK(ResetNegated); - init_waitqueue_head(&cpspec->autoneg_wait); INIT_DELAYED_WORK(&cpspec->autoneg_work, autoneg_7220_work); diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 4d02cffe4e03..77217fae9c06 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -1644,7 +1644,6 @@ static noinline void handle_7322_errors(struct qib_devdata *dd) u64 iserr = 0; u64 errs; u64 mask; - int log_idx; qib_stats.sps_errints++; errs = qib_read_kreg64(dd, kr_errstatus); @@ -1662,10 +1661,7 @@ static noinline void handle_7322_errors(struct qib_devdata *dd) if (errs & QIB_E_HARDWARE) { *msg = '\0'; qib_7322_handle_hwerrors(dd, msg, sizeof(dd->cspec->emsgbuf)); - } else - for (log_idx = 0; log_idx < QIB_EEP_LOG_CNT; ++log_idx) - if (errs & dd->eep_st_masks[log_idx].errs_to_log) - qib_inc_eeprom_err(dd, log_idx, 1); + } if (errs & QIB_E_SPKTERRS) { qib_disarm_7322_senderrbufs(dd->pport); From 658115b0909c48ab0228f4a0894ce3c0d98ca397 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:49:21 -0700 Subject: [PATCH 150/296] IB/qib: Suppress gcc 7 fall-through complaints Avoid that gcc 7 reports the following warning when building with W=1: warning: this statement may fall through [-Wimplicit-fallthrough=] Signed-off-by: Bart Van Assche Acked-by: Mike Marciniszyn Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_mad.c | 1 + drivers/infiniband/hw/qib/qib_rc.c | 2 +- drivers/infiniband/hw/qib/qib_sdma.c | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 1237952c144e..04367e0f6f9b 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -434,6 +434,7 @@ static int check_mkey(struct qib_ibport *ibp, struct ib_smp *smp, int mad_flags) /* Bad mkey not a violation below level 2 */ if (ibp->rvp.mkeyprot < 2) break; + /* fall through */ case IB_MGMT_METHOD_SET: case IB_MGMT_METHOD_TRAP_REPRESS: if (ibp->rvp.mkey_violations != 0xFFFF) diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index e9a91736b12d..8f5754fb8579 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -1869,7 +1869,7 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr, qp->r_rcv_len = 0; if (opcode == OP(SEND_ONLY)) goto no_immediate_data; - /* FALLTHROUGH for SEND_ONLY_WITH_IMMEDIATE */ + /* fall through -- for SEND_ONLY_WITH_IMMEDIATE */ case OP(SEND_LAST_WITH_IMMEDIATE): send_last_imm: wc.ex.imm_data = ohdr->u.imm_data; diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c index 891873b38a1e..c3690bd51582 100644 --- a/drivers/infiniband/hw/qib/qib_sdma.c +++ b/drivers/infiniband/hw/qib/qib_sdma.c @@ -808,7 +808,7 @@ void __qib_sdma_process_event(struct qib_pportdata *ppd, * bringing the link up with traffic active on * 7220, e.g. */ ss->go_s99_running = 1; - /* fall through and start dma engine */ + /* fall through -- and start dma engine */ case qib_sdma_event_e10_go_hw_start: /* This reference means the state machine is started */ sdma_get(&ppd->sdma_state); From 2055d1f067509e2dac75faf275a8ee59dcd2b97a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:49:22 -0700 Subject: [PATCH 151/296] IB/qib: Remove set-but-not-used variables Signed-off-by: Bart Van Assche Tested-by: Mike Marciniszyn Acked-by: Mike Marciniszyn Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_diag.c | 6 ------ drivers/infiniband/hw/qib/qib_file_ops.c | 9 --------- drivers/infiniband/hw/qib/qib_iba6120.c | 2 -- drivers/infiniband/hw/qib/qib_iba7220.c | 3 --- drivers/infiniband/hw/qib/qib_iba7322.c | 15 ++++----------- drivers/infiniband/hw/qib/qib_mad.c | 3 +-- drivers/infiniband/hw/qib/qib_pcie.c | 3 +-- drivers/infiniband/hw/qib/qib_sd7220.c | 2 -- drivers/infiniband/hw/qib/qib_tx.c | 4 ---- drivers/infiniband/hw/qib/qib_verbs.c | 3 +-- 10 files changed, 7 insertions(+), 43 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_diag.c b/drivers/infiniband/hw/qib/qib_diag.c index 775018b32b0d..a9377eee8734 100644 --- a/drivers/infiniband/hw/qib/qib_diag.c +++ b/drivers/infiniband/hw/qib/qib_diag.c @@ -761,7 +761,6 @@ static ssize_t qib_diag_read(struct file *fp, char __user *data, { struct qib_diag_client *dc = fp->private_data; struct qib_devdata *dd = dc->dd; - void __iomem *kreg_base; ssize_t ret; if (dc->pid != current->pid) { @@ -769,8 +768,6 @@ static ssize_t qib_diag_read(struct file *fp, char __user *data, goto bail; } - kreg_base = dd->kregbase; - if (count == 0) ret = 0; else if ((count % 4) || (*off % 4)) @@ -838,7 +835,6 @@ static ssize_t qib_diag_write(struct file *fp, const char __user *data, { struct qib_diag_client *dc = fp->private_data; struct qib_devdata *dd = dc->dd; - void __iomem *kreg_base; ssize_t ret; if (dc->pid != current->pid) { @@ -846,8 +842,6 @@ static ssize_t qib_diag_write(struct file *fp, const char __user *data, goto bail; } - kreg_base = dd->kregbase; - if (count == 0) ret = 0; else if ((count % 4) || (*off % 4)) diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index 9396c1807cc3..2d6a191afec0 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -696,15 +696,8 @@ static void qib_clean_part_key(struct qib_ctxtdata *rcd, struct qib_devdata *dd) { int i, j, pchanged = 0; - u64 oldpkey; struct qib_pportdata *ppd = rcd->ppd; - /* for debugging only */ - oldpkey = (u64) ppd->pkeys[0] | - ((u64) ppd->pkeys[1] << 16) | - ((u64) ppd->pkeys[2] << 32) | - ((u64) ppd->pkeys[3] << 48); - for (i = 0; i < ARRAY_SIZE(rcd->pkeys); i++) { if (!rcd->pkeys[i]) continue; @@ -1817,7 +1810,6 @@ static int qib_close(struct inode *in, struct file *fp) struct qib_devdata *dd; unsigned long flags; unsigned ctxt; - pid_t pid; mutex_lock(&qib_mutex); @@ -1859,7 +1851,6 @@ static int qib_close(struct inode *in, struct file *fp) spin_lock_irqsave(&dd->uctxt_lock, flags); ctxt = rcd->ctxt; dd->rcd[ctxt] = NULL; - pid = rcd->pid; rcd->pid = 0; spin_unlock_irqrestore(&dd->uctxt_lock, flags); diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index 3c5adfa104a0..3a0a28e10b86 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -1903,7 +1903,6 @@ static void qib_6120_put_tid_2(struct qib_devdata *dd, u64 __iomem *tidptr, u32 type, unsigned long pa) { u32 __iomem *tidp32 = (u32 __iomem *)tidptr; - u32 tidx; if (!dd->kregbase) return; @@ -1927,7 +1926,6 @@ static void qib_6120_put_tid_2(struct qib_devdata *dd, u64 __iomem *tidptr, else /* for now, always full 4KB page */ pa |= 2 << 29; } - tidx = tidptr - dd->egrtidbase; writel(pa, tidp32); mmiowb(); } diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index ceb2259a5efb..d66d68e2cf24 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -3514,7 +3514,6 @@ static void autoneg_7220_work(struct work_struct *work) { struct qib_pportdata *ppd; struct qib_devdata *dd; - u64 startms; u32 i; unsigned long flags; @@ -3522,8 +3521,6 @@ static void autoneg_7220_work(struct work_struct *work) autoneg_work.work)->pportdata; dd = ppd->dd; - startms = jiffies_to_msecs(jiffies); - /* * Busy wait for this first part, it should be at most a * few hundred usec, since we scheduled ourselves for 2msec. diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 77217fae9c06..b0413f9ecbe3 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -3431,7 +3431,6 @@ static void qib_setup_7322_interrupt(struct qib_devdata *dd, int clearpend) for (i = 0; msixnum < dd->cspec->num_msix_entries; i++) { irq_handler_t handler; void *arg; - u64 val; int lsb, reg, sh; #ifdef CONFIG_INFINIBAND_QIB_DCA int dca = 0; @@ -3502,8 +3501,8 @@ static void qib_setup_7322_interrupt(struct qib_devdata *dd, int clearpend) mask &= ~(1ULL << lsb); redirect[reg] |= ((u64) msixnum) << sh; } - val = qib_read_kreg64(dd, 2 * msixnum + 1 + - (QIB_7322_MsixTable_OFFS / sizeof(u64))); + qib_read_kreg64(dd, 2 * msixnum + 1 + + (QIB_7322_MsixTable_OFFS / sizeof(u64))); if (firstcpu < nr_cpu_ids && zalloc_cpumask_var( &dd->cspec->msix_entries[msixnum].mask, @@ -5358,16 +5357,11 @@ static void try_7322_autoneg(struct qib_pportdata *ppd) static void autoneg_7322_work(struct work_struct *work) { struct qib_pportdata *ppd; - struct qib_devdata *dd; - u64 startms; u32 i; unsigned long flags; ppd = container_of(work, struct qib_chippport_specific, autoneg_work.work)->ppd; - dd = ppd->dd; - - startms = jiffies_to_msecs(jiffies); /* * Busy wait for this first part, it should be at most a @@ -7807,13 +7801,12 @@ static void ibsd_wr_allchans(struct qib_pportdata *ppd, int addr, unsigned data, { struct qib_devdata *dd = ppd->dd; int chan; - u32 rbc; for (chan = 0; chan < SERDES_CHANS; ++chan) { ahb_mod(dd, IBSD(ppd->hw_pidx), (chan + (chan >> 1)), addr, data, mask); - rbc = ahb_mod(dd, IBSD(ppd->hw_pidx), (chan + (chan >> 1)), - addr, 0, 0); + ahb_mod(dd, IBSD(ppd->hw_pidx), (chan + (chan >> 1)), addr, + 0, 0); } } diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 04367e0f6f9b..841312f60dfd 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -280,7 +280,7 @@ static int subn_get_nodeinfo(struct ib_smp *smp, struct ib_device *ibdev, { struct ib_node_info *nip = (struct ib_node_info *)&smp->data; struct qib_devdata *dd = dd_from_ibdev(ibdev); - u32 vendor, majrev, minrev; + u32 majrev, minrev; unsigned pidx = port - 1; /* IB number port from 1, hdw from 0 */ /* GUID 0 is illegal */ @@ -303,7 +303,6 @@ static int subn_get_nodeinfo(struct ib_smp *smp, struct ib_device *ibdev, minrev = dd->minrev; nip->revision = cpu_to_be32((majrev << 16) | minrev); nip->local_port_num = port; - vendor = dd->vendorid; nip->vendor_id[0] = QIB_SRC_OUI_1; nip->vendor_id[1] = QIB_SRC_OUI_2; nip->vendor_id[2] = QIB_SRC_OUI_3; diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c index 6f4cc268926c..5ac7b31c346b 100644 --- a/drivers/infiniband/hw/qib/qib_pcie.c +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -397,7 +397,6 @@ MODULE_PARM_DESC(pcie_coalesce, "tune PCIe colescing on some Intel chipsets"); */ static void qib_tune_pcie_coalesce(struct qib_devdata *dd) { - int r; struct pci_dev *parent; u16 devid; u32 mask, bits, val; @@ -452,7 +451,7 @@ static void qib_tune_pcie_coalesce(struct qib_devdata *dd) pci_read_config_dword(parent, 0x48, &val); val &= ~mask; val |= bits; - r = pci_write_config_dword(parent, 0x48, val); + pci_write_config_dword(parent, 0x48, val); } /* diff --git a/drivers/infiniband/hw/qib/qib_sd7220.c b/drivers/infiniband/hw/qib/qib_sd7220.c index c72775f27212..ad52f7ac620c 100644 --- a/drivers/infiniband/hw/qib/qib_sd7220.c +++ b/drivers/infiniband/hw/qib/qib_sd7220.c @@ -755,7 +755,6 @@ static int qib_sd7220_ram_xfer(struct qib_devdata *dd, int sdnum, u32 loc, int addr; int ret; unsigned long flags; - const char *op; /* Pick appropriate transaction reg and "Chip select" for this serdes */ switch (sdnum) { @@ -775,7 +774,6 @@ static int qib_sd7220_ram_xfer(struct qib_devdata *dd, int sdnum, u32 loc, return -1; } - op = rd_notwr ? "Rd" : "Wr"; spin_lock_irqsave(&dd->cspec->sdepb_lock, flags); owned = epb_access(dd, sdnum, 1); diff --git a/drivers/infiniband/hw/qib/qib_tx.c b/drivers/infiniband/hw/qib/qib_tx.c index eface3b3dacf..c9537cfc12ce 100644 --- a/drivers/infiniband/hw/qib/qib_tx.c +++ b/drivers/infiniband/hw/qib/qib_tx.c @@ -179,8 +179,6 @@ void qib_disarm_piobufs_set(struct qib_devdata *dd, unsigned long *mask, pppd[i] = NULL; for (i = 0; i < cnt; i++) { - int which; - if (!test_bit(i, mask)) continue; /* @@ -201,9 +199,7 @@ void qib_disarm_piobufs_set(struct qib_devdata *dd, unsigned long *mask, (!test_bit(i << 1, dd->pioavailkernel) && find_ctxt(dd, i))) { __set_bit(i, dd->pio_need_disarm); - which = 0; } else { - which = 1; dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_BUF(i)); } spin_unlock_irqrestore(&dd->pioavail_lock, flags); diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 9d92aeb8d9a1..46e2da09d05b 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -701,7 +701,7 @@ void qib_put_txreq(struct qib_verbs_txreq *tx) */ void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail) { - struct rvt_qp *qp, *nqp; + struct rvt_qp *qp; struct qib_qp_priv *qpp, *nqpp; struct rvt_qp *qps[20]; struct qib_ibdev *dev; @@ -714,7 +714,6 @@ void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail) /* Search wait list for first QP wanting DMA descriptors. */ list_for_each_entry_safe(qpp, nqpp, &dev->dmawait, iowait) { qp = qpp->owner; - nqp = nqpp->owner; if (qp->port_num != ppd->port) continue; if (n == ARRAY_SIZE(qps)) From 2caaa2335b3d3b3a750308baa8677454e5cf3ada Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:49:23 -0700 Subject: [PATCH 152/296] RDMA/rdmavt: Suppress gcc 7 fall-through complaints Avoid that gcc 7 reports the following warning when building with W=1: warning: this statement may fall through [-Wimplicit-fallthrough=] Signed-off-by: Bart Van Assche Reviewed-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 22df09ae809e..efcff7056496 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -807,6 +807,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, if (init_attr->port_num == 0 || init_attr->port_num > ibpd->device->phys_port_cnt) return ERR_PTR(-EINVAL); + /* fall through */ case IB_QPT_UC: case IB_QPT_RC: case IB_QPT_UD: From ea6ee93b4033089b6b22b693da3354359555dc69 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:49:24 -0700 Subject: [PATCH 153/296] RDMA/rxe: Suppress gcc 7 fall-through complaints Avoid that gcc 7 reports the following warning when building with W=1: warning: this statement may fall through [-Wimplicit-fallthrough=] Signed-off-by: Bart Van Assche Reviewed-by: Leon Romanovsky Cc: Moni Shoua Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_comp.c | 4 ++-- drivers/infiniband/sw/rxe/rxe_task.c | 2 +- drivers/infiniband/sw/rxe/rxe_verbs.c | 1 + 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 9eb12c2e3c74..ff64875e3d15 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -270,8 +270,8 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, if ((syn & AETH_TYPE_MASK) != AETH_ACK) return COMPST_ERROR; - /* Fall through (IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE - * doesn't have an AETH) + /* fall through */ + /* (IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE doesn't have an AETH) */ case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE: if (wqe->wr.opcode != IB_WR_RDMA_READ && diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c index ea3810b29273..08f05ac5f5d5 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.c +++ b/drivers/infiniband/sw/rxe/rxe_task.c @@ -71,7 +71,7 @@ void rxe_do_task(unsigned long data) case TASK_STATE_BUSY: task->state = TASK_STATE_ARMED; - /* fall through to */ + /* fall through */ case TASK_STATE_ARMED: spin_unlock_irqrestore(&task->state_lock, flags); return; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 0b362f49a10a..ff77f4f66970 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -644,6 +644,7 @@ static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, switch (wr->opcode) { case IB_WR_RDMA_WRITE_WITH_IMM: wr->ex.imm_data = ibwr->ex.imm_data; + /* fall through */ case IB_WR_RDMA_READ: case IB_WR_RDMA_WRITE: wr->wr.rdma.remote_addr = rdma_wr(ibwr)->remote_addr; From 19188436ea62336954625309d6630f7ad9ce592b Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:49:25 -0700 Subject: [PATCH 154/296] RDMA/usnic: Make the compiler check declaration consistency during compilation This patch avoids that sparse complains about missing declarations. Signed-off-by: Bart Van Assche Cc: Christian Benvenuti Cc: Dave Goodell Signed-off-by: Doug Ledford --- drivers/infiniband/hw/usnic/usnic_ib_sysfs.c | 1 + drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c index 32956f9f5715..685ef2293cb8 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c @@ -43,6 +43,7 @@ #include "usnic_ib_qp_grp.h" #include "usnic_vnic.h" #include "usnic_ib_verbs.h" +#include "usnic_ib_sysfs.h" #include "usnic_log.h" #include "usnic_ib_sysfs.h" diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index e4113ef09315..8372c32485ad 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -42,6 +42,7 @@ #include "usnic_ib.h" #include "usnic_common_util.h" #include "usnic_ib_qp_grp.h" +#include "usnic_ib_verbs.h" #include "usnic_fwd.h" #include "usnic_log.h" #include "usnic_uiom.h" From 4c23e5085fd53478a14bb743dc7386abb2f68ec6 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:49:26 -0700 Subject: [PATCH 155/296] RDMA/usnic: Remove a set-but-not-used variable Signed-off-by: Bart Van Assche Cc: Christian Benvenuti Cc: Dave Goodell Signed-off-by: Doug Ledford --- drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c index 092d4e11a633..912d8ef04352 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c @@ -392,14 +392,12 @@ int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp, void *data) { int status = 0; - int vnic_idx; struct ib_event ib_event; enum ib_qp_state old_state; struct usnic_transport_spec *trans_spec; struct usnic_ib_qp_grp_flow *qp_flow; old_state = qp_grp->state; - vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic); trans_spec = (struct usnic_transport_spec *) data; spin_lock(&qp_grp->lock); From bd8c2021ab1e2891e393e7cf2fb6f3842449c742 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:49:27 -0700 Subject: [PATCH 156/296] RDMA/usnic: Instantiate data structures once Data structures should not be defined in a header file. Hence move the min_transport_spec[] definition from a header file to a .c file. Signed-off-by: Bart Van Assche Cc: Christian Benvenuti Cc: Dave Goodell Signed-off-by: Doug Ledford --- drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h | 25 +------------------ drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 24 ++++++++++++++++++ 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h index b1458be1d402..a8a2314c9531 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h +++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h @@ -84,30 +84,7 @@ struct usnic_ib_qp_grp_flow { char dentry_name[32]; }; -static const struct -usnic_vnic_res_spec min_transport_spec[USNIC_TRANSPORT_MAX] = { - { /*USNIC_TRANSPORT_UNKNOWN*/ - .resources = { - {.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,}, - }, - }, - { /*USNIC_TRANSPORT_ROCE_CUSTOM*/ - .resources = { - {.type = USNIC_VNIC_RES_TYPE_WQ, .cnt = 1,}, - {.type = USNIC_VNIC_RES_TYPE_RQ, .cnt = 1,}, - {.type = USNIC_VNIC_RES_TYPE_CQ, .cnt = 1,}, - {.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,}, - }, - }, - { /*USNIC_TRANSPORT_IPV4_UDP*/ - .resources = { - {.type = USNIC_VNIC_RES_TYPE_WQ, .cnt = 1,}, - {.type = USNIC_VNIC_RES_TYPE_RQ, .cnt = 1,}, - {.type = USNIC_VNIC_RES_TYPE_CQ, .cnt = 1,}, - {.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,}, - }, - }, -}; +extern const struct usnic_vnic_res_spec min_transport_spec[USNIC_TRANSPORT_MAX]; const char *usnic_ib_qp_grp_state_to_string(enum ib_qp_state state); int usnic_ib_qp_grp_dump_hdr(char *buf, int buf_sz); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 8372c32485ad..aa2456a4f9bd 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -51,6 +51,30 @@ #define USNIC_DEFAULT_TRANSPORT USNIC_TRANSPORT_ROCE_CUSTOM +const struct usnic_vnic_res_spec min_transport_spec[USNIC_TRANSPORT_MAX] = { + { /*USNIC_TRANSPORT_UNKNOWN*/ + .resources = { + {.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,}, + }, + }, + { /*USNIC_TRANSPORT_ROCE_CUSTOM*/ + .resources = { + {.type = USNIC_VNIC_RES_TYPE_WQ, .cnt = 1,}, + {.type = USNIC_VNIC_RES_TYPE_RQ, .cnt = 1,}, + {.type = USNIC_VNIC_RES_TYPE_CQ, .cnt = 1,}, + {.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,}, + }, + }, + { /*USNIC_TRANSPORT_IPV4_UDP*/ + .resources = { + {.type = USNIC_VNIC_RES_TYPE_WQ, .cnt = 1,}, + {.type = USNIC_VNIC_RES_TYPE_RQ, .cnt = 1,}, + {.type = USNIC_VNIC_RES_TYPE_CQ, .cnt = 1,}, + {.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,}, + }, + }, +}; + static void usnic_ib_fw_string_to_u64(char *fw_ver_str, u64 *fw_ver) { *fw_ver = *((u64 *)fw_ver_str); From 242b494bf22be4056df1493664b00b5d0b238e53 Mon Sep 17 00:00:00 2001 From: Jan Sokolowski Date: Mon, 9 Oct 2017 13:08:28 -0700 Subject: [PATCH 157/296] IB/hfi1: Fix serdes loopback set-up Change serdes mode setting to use MISC_CONFIG_BITS in VERIFY_CAP_LOCAL_LINK_WIDTH register. This method of setting up serdes loopback is universally compatible across all firmware versions. Reviewed-by: Jakub Byczkowski Signed-off-by: Jan Sokolowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 42 +++++++++++-------------------- drivers/infiniband/hw/hfi1/chip.h | 3 +++ 2 files changed, 17 insertions(+), 28 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index b2ed4b9cda6e..160ada91e849 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -9156,25 +9156,6 @@ static int do_quick_linkup(struct hfi1_devdata *dd) return 0; /* success */ } -/* - * Set the SerDes to internal loopback mode. - * Returns 0 on success, -errno on error. - */ -static int set_serdes_loopback_mode(struct hfi1_devdata *dd) -{ - int ret; - - ret = set_physical_link_state(dd, PLS_INTERNAL_SERDES_LOOPBACK); - if (ret == HCMD_SUCCESS) - return 0; - dd_dev_err(dd, - "Set physical link state to SerDes Loopback failed with return %d\n", - ret); - if (ret >= 0) - ret = -EINVAL; - return ret; -} - /* * Do all special steps to set up loopback. */ @@ -9200,13 +9181,11 @@ static int init_loopback(struct hfi1_devdata *dd) return 0; } - /* handle serdes loopback */ - if (loopback == LOOPBACK_SERDES) { - /* internal serdes loopack needs quick linkup on RTL */ - if (dd->icode == ICODE_RTL_SILICON) - quick_linkup = 1; - return set_serdes_loopback_mode(dd); - } + /* + * SerDes loopback init sequence is handled in set_local_link_attributes + */ + if (loopback == LOOPBACK_SERDES) + return 0; /* LCB loopback - handled at poll time */ if (loopback == LOOPBACK_LCB) { @@ -9265,7 +9244,7 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd) u8 tx_polarity_inversion; u8 rx_polarity_inversion; int ret; - + u32 misc_bits = 0; /* reset our fabric serdes to clear any lingering problems */ fabric_serdes_reset(dd); @@ -9311,7 +9290,14 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd) if (ret != HCMD_SUCCESS) goto set_local_link_attributes_fail; - ret = write_vc_local_link_width(dd, 0, 0, + /* + * SerDes loopback init sequence requires + * setting bit 0 of MISC_CONFIG_BITS + */ + if (loopback == LOOPBACK_SERDES) + misc_bits |= 1 << LOOPBACK_SERDES_CONFIG_BIT_MASK_SHIFT; + + ret = write_vc_local_link_width(dd, misc_bits, 0, opa_to_vc_link_widths( ppd->link_width_enabled)); if (ret != HCMD_SUCCESS) diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index b8345a60a0fb..45b645e892c9 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -582,6 +582,9 @@ enum { #define LOOPBACK_LCB 2 #define LOOPBACK_CABLE 3 /* external cable */ +/* set up serdes bit in MISC_CONFIG_BITS */ +#define LOOPBACK_SERDES_CONFIG_BIT_MASK_SHIFT 0 + /* read and write hardware registers */ u64 read_csr(const struct hfi1_devdata *dd, u32 offset); void write_csr(const struct hfi1_devdata *dd, u32 offset, u64 value); From 621515da67f150eea0e7b6b05614bd0b65d9d16c Mon Sep 17 00:00:00 2001 From: Jakub Byczkowski Date: Mon, 9 Oct 2017 12:37:56 -0700 Subject: [PATCH 158/296] IB/hfi1: Allow meta version 4 for platform configuration Parsing of platform configuration format 4 will fail on meta version check. Allow meta version 4 during parsing. Reviewed-by: Jan Sokolowski Reviewed-by: Ira Weiny Signed-off-by: Jakub Byczkowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/firmware.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index 5aea8f47e670..001698b744ee 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -1733,7 +1733,7 @@ static int check_meta_version(struct hfi1_devdata *dd, u32 *system_table) ver_start /= 8; meta_ver = *((u8 *)system_table + ver_start) & ((1 << ver_len) - 1); - if (meta_ver < 5) { + if (meta_ver < 4) { dd_dev_info( dd, "%s:Please update platform config\n", __func__); return -EINVAL; From b65c2045cc5089bd3cfacc9fad63798fded64226 Mon Sep 17 00:00:00 2001 From: Grzegorz Morys Date: Mon, 9 Oct 2017 12:38:04 -0700 Subject: [PATCH 159/296] IB/hfi1: Correct unnecessary acquisition of HW mutex Avoid acquiring already acquired hardware mutex and releasing the unacquired one as these are redundant operations. Add printouts for such situations to help detect potential errors within the driver. Reviewed-by: Mike Marciniszyn Signed-off-by: Grzegorz Morys Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/firmware.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index 001698b744ee..cea0d4f3248f 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -1387,7 +1387,14 @@ int acquire_hw_mutex(struct hfi1_devdata *dd) unsigned long timeout; int try = 0; u8 mask = 1 << dd->hfi1_id; - u8 user; + u8 user = (u8)read_csr(dd, ASIC_CFG_MUTEX); + + if (user == mask) { + dd_dev_info(dd, + "Hardware mutex already acquired, mutex mask %u\n", + (u32)mask); + return 0; + } retry: timeout = msecs_to_jiffies(HM_TIMEOUT) + jiffies; @@ -1418,7 +1425,15 @@ int acquire_hw_mutex(struct hfi1_devdata *dd) void release_hw_mutex(struct hfi1_devdata *dd) { - write_csr(dd, ASIC_CFG_MUTEX, 0); + u8 mask = 1 << dd->hfi1_id; + u8 user = (u8)read_csr(dd, ASIC_CFG_MUTEX); + + if (user != mask) + dd_dev_warn(dd, + "Unable to release hardware mutex, mutex mask %u, my mask %u\n", + (u32)user, (u32)mask); + else + write_csr(dd, ASIC_CFG_MUTEX, 0); } /* return the given resource bit(s) as a mask for the given HFI */ From d0a2f454713a42447ee4007582c0e43c47bcf230 Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Mon, 9 Oct 2017 12:38:12 -0700 Subject: [PATCH 160/296] IB/hfi1: Mask out A bit from psn trace The trace logic prior to the fixes below used to mask the A bit from the psn. It now mistakenly displays the A bit, which is already displayed separately. Fix by adding the appropriate mask to the psn tracing. Fixes: 228d2af1b723 ("IB/hfi1: Separate input/output header tracing") Fixes: 863cf89d472f ("IB/hfi1: Add 16B trace support") Reviewed-by: Mike Marciniszyn Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c index 9938bb983ce6..9749ec9dd9f2 100644 --- a/drivers/infiniband/hw/hfi1/trace.c +++ b/drivers/infiniband/hw/hfi1/trace.c @@ -154,7 +154,7 @@ void hfi1_trace_parse_9b_bth(struct ib_other_headers *ohdr, *opcode = ib_bth_get_opcode(ohdr); *tver = ib_bth_get_tver(ohdr); *pkey = ib_bth_get_pkey(ohdr); - *psn = ib_bth_get_psn(ohdr); + *psn = mask_psn(ib_bth_get_psn(ohdr)); *qpn = ib_bth_get_qpn(ohdr); } @@ -169,7 +169,7 @@ void hfi1_trace_parse_16b_bth(struct ib_other_headers *ohdr, *pad = ib_bth_get_pad(ohdr); *se = ib_bth_get_se(ohdr); *tver = ib_bth_get_tver(ohdr); - *psn = ib_bth_get_psn(ohdr); + *psn = mask_psn(ib_bth_get_psn(ohdr)); *qpn = ib_bth_get_qpn(ohdr); } From f8195f3b14a046ae33d9c369ffb59b4192f29e08 Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Mon, 9 Oct 2017 12:38:19 -0700 Subject: [PATCH 161/296] IB/hfi1: Eliminate allocation while atomic The PIO trailing buffer was being dynamically allocated but the kcalloc return value was not being checked. Further, the GFP_KERNEL was being used even though the send engine might be called with interrupts disabled. Since the maximum size of the trailing buffer is only 12 bytes (CRC = 4, LT = 1, Pad = 0 to 7 bytes) just statically allocate the buffer, remove the alloc entirely and share it with the SDMA engine by making it global. Reported-by: Leon Romanovsky Fixes: 566d53a82644 ("IB/hfi1: Enhance PIO/SDMA send for 16B") Reviewed-by: Mike Marciniszyn Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/common.h | 1 + drivers/infiniband/hw/hfi1/verbs.c | 22 ++++++++-------------- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index 3e27794ec750..7108d4d92259 100644 --- a/drivers/infiniband/hw/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -328,6 +328,7 @@ struct diag_pkt { #define SC15_PACKET 0xF #define SIZE_OF_CRC 1 #define SIZE_OF_LT 1 +#define MAX_16B_PADDING 12 /* CRC = 4, LT = 1, Pad = 0 to 7 bytes */ #define LIM_MGMT_P_KEY 0x7FFF #define FULL_MGMT_P_KEY 0xFFFF diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index e232f3c608b4..726c064b22d8 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -146,6 +146,9 @@ static int pio_wait(struct rvt_qp *qp, /* Length of buffer to create verbs txreq cache name */ #define TXREQ_NAME_LEN 24 +/* 16B trailing buffer */ +static const u8 trail_buf[MAX_16B_PADDING]; + static uint wss_threshold; module_param(wss_threshold, uint, S_IRUGO); MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy"); @@ -814,7 +817,6 @@ static int build_verbs_tx_desc( u16 hdrbytes = tx->hdr_dwords << 2; u32 *hdr; u8 extra_bytes = 0; - static char trail_buf[12]; /* CRC = 4, LT = 1, Pad = 0 to 7 bytes */ if (tx->phdr.hdr.hdr_type) { /* @@ -869,9 +871,9 @@ static int build_verbs_tx_desc( } /* add icrc, lt byte, and padding to flit */ - if (extra_bytes != 0) + if (extra_bytes) ret = sdma_txadd_kvaddr(sde->dd, &tx->txreq, - trail_buf, extra_bytes); + (void *)trail_buf, extra_bytes); bail_txadd: return ret; @@ -1128,18 +1130,10 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, len -= slen; } } - /* - * Bypass packet will need to copy additional - * bytes to accommodate for CRC and LT bytes - */ - if (extra_bytes) { - u8 *empty_buf; + /* add icrc, lt byte, and padding to flit */ + if (extra_bytes) + seg_pio_copy_mid(pbuf, trail_buf, extra_bytes); - empty_buf = kcalloc(extra_bytes, sizeof(u8), - GFP_KERNEL); - seg_pio_copy_mid(pbuf, empty_buf, extra_bytes); - kfree(empty_buf); - } seg_pio_copy_end(pbuf); } From a8979cc55c0034fbe129904936cfc4b5bf41e59b Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Mon, 9 Oct 2017 12:38:26 -0700 Subject: [PATCH 162/296] IB/hfi1: Set hdr_type when tx req is allocated Setting the protocol type should be part of initializing the tx request. For UC and RC, the current protocol type is part of the qp priv structure. For ud requests, it needs to be adjusted dynamically, based on the AV posted with the WQE. This patch will simplify the initialization of the tx request. Fixes: 5b6cabb0db77 ("IB/hfi1: Add 16B RC/UC support") Reviewed-by: Don Hiatt Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/rc.c | 1 - drivers/infiniband/hw/hfi1/uc.c | 1 - drivers/infiniband/hw/hfi1/verbs_txreq.h | 2 ++ 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index e1cf0c08ca6f..bfe3e38d4b6c 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -276,7 +276,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) if (IS_ERR(ps->s_txreq)) goto bail_no_tx; - ps->s_txreq->phdr.hdr.hdr_type = priv->hdr_type; if (priv->hdr_type == HFI1_PKT_TYPE_9B) { /* header size in 32-bit words LRH+BTH = (8+12)/4. */ hwords = 5; diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 0b646173ca22..fcd81866e332 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -93,7 +93,6 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) goto done_free_tx; } - ps->s_txreq->phdr.hdr.hdr_type = priv->hdr_type; if (priv->hdr_type == HFI1_PKT_TYPE_9B) { /* header size in 32-bit words LRH+BTH = (8+12)/4. */ hwords = 5; diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h index 76216f2ef35a..cec7a4b34d16 100644 --- a/drivers/infiniband/hw/hfi1/verbs_txreq.h +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h @@ -92,6 +92,8 @@ static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev, tx->psc = priv->s_sendcontext; /* so that we can test if the sdma decriptors are there */ tx->txreq.num_desc = 0; + /* Set the header type */ + tx->phdr.hdr.hdr_type = priv->hdr_type; return tx; } From f9586abfa333143d67e3362dfb2a19ae45d82441 Mon Sep 17 00:00:00 2001 From: Alex Estrin Date: Mon, 9 Oct 2017 12:38:33 -0700 Subject: [PATCH 163/296] IB/rdmavt: Don't wait for resources in QP reset Per the IBTA spec, QP destroy shall fail if the QP is attached to multicast groups, although the spec is silent on modify_qp to reset state. It implies that ULP must deregister QP from all mcast groups for destroy to succeed. The faulty patch "IB/ipoib: Update broadcast object if PKey value was changed in index 0" exposed two issues in rdmavt: 1. Rvt QP reset waits for qp references to go to zero. This will hang if QP is attached to multicast groups. 2. The mcast group detach will fail for a QP in reset state therefore preventing ULP from correcting the issue. This patch moves the reference count wait to the the destroy QP path and allows a QP mcast detach to work in the reset state. Reviewed-by: Mike Marciniszyn Signed-off-by: Alex Estrin Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/mcast.c | 2 +- drivers/infiniband/sw/rdmavt/qp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c index 1f12b69a0d07..b3a38c5e4cad 100644 --- a/drivers/infiniband/sw/rdmavt/mcast.c +++ b/drivers/infiniband/sw/rdmavt/mcast.c @@ -351,7 +351,7 @@ int rvt_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) int last = 0; int ret = 0; - if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) + if (ibqp->qp_num <= 1) return -EINVAL; spin_lock_irq(&ibp->lock); diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 22df09ae809e..367c78618019 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -717,7 +717,6 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, /* take qp out the hash and wait for it to be unused */ rvt_remove_qp(rdi, qp); - wait_event(qp->wait, !atomic_read(&qp->refcount)); /* grab the lock b/c it was locked at call time */ spin_lock_irq(&qp->r_lock); @@ -1443,6 +1442,7 @@ int rvt_destroy_qp(struct ib_qp *ibqp) spin_unlock(&qp->s_hlock); spin_unlock_irq(&qp->r_lock); + wait_event(qp->wait, !atomic_read(&qp->refcount)); /* qpn is now available for use again */ rvt_free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num); From 978cb69672d050a1266efb44d8861751cd361512 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 10 Oct 2017 16:14:51 +0100 Subject: [PATCH 164/296] RDMA/hns: fix spelling mistake: "Reseved" -> "Reserved" Trivial fix to spelling mistake in dev_err error message Signed-off-by: Colin Ian King Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 658e3a7464b3..a5693d484421 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -1005,7 +1005,7 @@ static void hns_roce_v1_mr_free_work_fn(struct work_struct *work) } if (!ne) { - dev_err(dev, "Reseved loop qp is absent!\n"); + dev_err(dev, "Reserved loop qp is absent!\n"); goto free_work; } From 6a28d5a92cdda1e290f333c95fc020d1b93a729c Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Wed, 11 Oct 2017 14:41:34 +0200 Subject: [PATCH 165/296] IB/hfi1: Add MODULE_FIRMWARE statements Provide information about used firmware files via modinfo. Signed-off-by: Thomas Bogendoerfer Reviewed-by: Dennis Dalessandro Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/firmware.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index 148a2c7bddac..98868df78a7e 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -70,6 +70,11 @@ #define ALT_FW_PCIE_NAME "hfi1_pcie_d.fw" #define HOST_INTERFACE_VERSION 1 +MODULE_FIRMWARE(DEFAULT_FW_8051_NAME_ASIC); +MODULE_FIRMWARE(DEFAULT_FW_FABRIC_NAME); +MODULE_FIRMWARE(DEFAULT_FW_SBUS_NAME); +MODULE_FIRMWARE(DEFAULT_FW_PCIE_NAME); + static uint fw_8051_load = 1; static uint fw_fabric_serdes_load = 1; static uint fw_pcie_serdes_load = 1; From a0ddc2ec8f2912a738165b0ce47b9d945a9e3709 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Fri, 13 Oct 2017 11:38:00 +0530 Subject: [PATCH 166/296] bnxt_re: Fix incorrect usage of test_bit() test_bit() takes a bit number while the 'flags' field in struct bnxt_qplib_rcfw was using actual BIT position converted values. Fix this by assigning bit numbers and use consistent APIs all the flag values. Also logging a message in case of failure. Thanks to Dan Carpenter for pointing this out. Suggested-by: Dan Carpenter Signed-off-by: Somnath Kotur Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/main.c | 5 +++-- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 8 ++++---- drivers/infiniband/hw/bnxt_re/qplib_rcfw.h | 6 +++--- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index e7450ea92aa9..87f8a5d5d78f 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -1071,9 +1071,10 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) */ rc = bnxt_qplib_alloc_rcfw_channel(rdev->en_dev->pdev, &rdev->rcfw, BNXT_RE_MAX_QPC_COUNT); - if (rc) + if (rc) { + pr_err("Failed to allocate RCFW Channel: %#x\n", rc); goto fail; - + } rc = bnxt_re_net_ring_alloc (rdev, rdev->rcfw.creq.pbl[PBL_LVL_0].pg_map_arr, rdev->rcfw.creq.pbl[rdev->rcfw.creq.level].pg_count, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 000e6a1940b9..6d116146fa3c 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -168,14 +168,14 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req, rcfw->seq_num++; cmdq_prod = cmdq->prod; - if (rcfw->flags & FIRMWARE_FIRST_FLAG) { + if (test_bit(FIRMWARE_FIRST_FLAG, &rcfw->flags)) { /* The very first doorbell write * is required to set this flag * which prompts the FW to reset * its internal pointers */ - cmdq_prod |= FIRMWARE_FIRST_FLAG; - rcfw->flags &= ~FIRMWARE_FIRST_FLAG; + cmdq_prod |= BIT(FIRMWARE_FIRST_FLAG); + clear_bit(FIRMWARE_FIRST_FLAG, &rcfw->flags); } /* ring CMDQ DB */ @@ -618,7 +618,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev, /* General */ rcfw->seq_num = 0; - rcfw->flags = FIRMWARE_FIRST_FLAG; + set_bit(FIRMWARE_FIRST_FLAG, &rcfw->flags); bmap_size = BITS_TO_LONGS(RCFW_MAX_OUTSTANDING_CMD * sizeof(unsigned long)); rcfw->cmdq_bitmap = kzalloc(bmap_size, GFP_KERNEL); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index 85b16da287f9..2946a7cfae82 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -162,9 +162,9 @@ struct bnxt_qplib_rcfw { unsigned long *cmdq_bitmap; u32 bmap_size; unsigned long flags; -#define FIRMWARE_INITIALIZED_FLAG BIT(0) -#define FIRMWARE_FIRST_FLAG BIT(31) -#define FIRMWARE_TIMED_OUT BIT(3) +#define FIRMWARE_INITIALIZED_FLAG 0 +#define FIRMWARE_FIRST_FLAG 31 +#define FIRMWARE_TIMED_OUT 3 wait_queue_head_t waitq; int (*aeq_handler)(struct bnxt_qplib_rcfw *, struct creq_func_event *); From e575a9bba0c0b820dc9f7c094340813db7f5c80f Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Tue, 17 Oct 2017 14:01:35 +0530 Subject: [PATCH 167/296] bnxt_re: Make room for mapping beyond 32 entries Latest chip requires indexes 32 to 47 be used for the internal HW block that manages queue mapping. Signed-off-by: Devesh Sharma Signed-off-by: Somnath Kotur Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/qplib_res.h | 2 +- drivers/infiniband/hw/bnxt_re/roce_hsi.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index e87207526d2c..2e5c052da5a9 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -169,7 +169,7 @@ struct bnxt_qplib_ctx { u32 cq_count; struct bnxt_qplib_hwq cq_tbl; struct bnxt_qplib_hwq tim_tbl; -#define MAX_TQM_ALLOC_REQ 32 +#define MAX_TQM_ALLOC_REQ 48 #define MAX_TQM_ALLOC_BLK_SIZE 8 u8 tqm_count[MAX_TQM_ALLOC_REQ]; struct bnxt_qplib_hwq tqm_pde; diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h index eeb55b2db57e..c3cba6063a03 100644 --- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h +++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h @@ -2644,7 +2644,7 @@ struct creq_query_func_resp_sb { u8 l2_db_space_size; __le16 max_srq; __le32 max_gid; - __le32 tqm_alloc_reqs[8]; + __le32 tqm_alloc_reqs[12]; }; /* Set resources command response (16 bytes) */ From 69abc735f45538fd3b3740a5293cd16cc09799cf Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 16 Oct 2017 08:47:45 -0700 Subject: [PATCH 168/296] RDMA/uverbs: Make the code in ib_uverbs_cmd_verbs() less confusing This patch reduces the number of #ifdefs and also avoids that smatch reports the following: drivers/infiniband/core/uverbs_ioctl.c:276: ib_uverbs_cmd_verbs() warn: if statement not indented drivers/infiniband/core/uverbs_ioctl.c:280: ib_uverbs_cmd_verbs() warn: possible memory leak of 'ctx' drivers/infiniband/core/uverbs_ioctl.c:315: ib_uverbs_cmd_verbs() warn: if statement not indented References: commit fac9658cabb9 ("IB/core: Add new ioctl interface") Signed-off-by: Bart Van Assche Acked-by: Matan Barak Cc: Yishai Hadas Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_ioctl.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 5286ad57d903..71ff2644e053 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -241,9 +241,7 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev, struct uverbs_attr *curr_attr; unsigned long *curr_bitmap; size_t ctx_size; -#ifdef UVERBS_OPTIMIZE_USING_STACK_SZ uintptr_t data[UVERBS_OPTIMIZE_USING_STACK_SZ / sizeof(uintptr_t)]; -#endif if (hdr->reserved) return -EINVAL; @@ -269,13 +267,10 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev, (method_spec->num_child_attrs / BITS_PER_LONG + method_spec->num_buckets); -#ifdef UVERBS_OPTIMIZE_USING_STACK_SZ if (ctx_size <= UVERBS_OPTIMIZE_USING_STACK_SZ) ctx = (void *)data; - if (!ctx) -#endif - ctx = kmalloc(ctx_size, GFP_KERNEL); + ctx = kmalloc(ctx_size, GFP_KERNEL); if (!ctx) return -ENOMEM; @@ -311,10 +306,8 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev, err = uverbs_handle_method(buf, ctx->uattrs, hdr->num_attrs, ib_dev, file, method_spec, ctx->uverbs_attr_bundle); out: -#ifdef UVERBS_OPTIMIZE_USING_STACK_SZ - if (ctx_size > UVERBS_OPTIMIZE_USING_STACK_SZ) -#endif - kfree(ctx); + if (ctx != (void *)data) + kfree(ctx); return err; } From c70ca38960399a63d5c048b7b700612ea321d17e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:27:22 -0700 Subject: [PATCH 169/296] IB/srpt: Do not accept invalid initiator port names Make srpt_parse_i_port_id() return a negative value if hex2bin() fails. Fixes: commit a42d985bd5b2 ("ib_srpt: Initial SRP Target merge for v3.3-rc1") Signed-off-by: Bart Van Assche Cc: Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srpt/ib_srpt.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 9e8e9220f816..95178b4e3565 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2777,7 +2777,7 @@ static int srpt_parse_i_port_id(u8 i_port_id[16], const char *name) { const char *p; unsigned len, count, leading_zero_bytes; - int ret, rc; + int ret; p = name; if (strncasecmp(p, "0x", 2) == 0) @@ -2789,10 +2789,9 @@ static int srpt_parse_i_port_id(u8 i_port_id[16], const char *name) count = min(len / 2, 16U); leading_zero_bytes = 16 - count; memset(i_port_id, 0, leading_zero_bytes); - rc = hex2bin(i_port_id + leading_zero_bytes, p, count); - if (rc < 0) - pr_debug("hex2bin failed for srpt_parse_i_port_id: %d\n", rc); - ret = 0; + ret = hex2bin(i_port_id + leading_zero_bytes, p, count); + if (ret < 0) + pr_debug("hex2bin failed for srpt_parse_i_port_id: %d\n", ret); out: return ret; } From 7a01d05cd5bed9594fbc4dff5e944652ff23876c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:27:23 -0700 Subject: [PATCH 170/296] IB/srpt: Limit the send and receive queue sizes to what the HCA supports Additionally, correct the comment about ch->rq_size. Signed-off-by: Bart Van Assche Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srpt/ib_srpt.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 95178b4e3565..76370e39857d 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1650,7 +1650,7 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) * both both, as RDMA contexts will also post completions for the * RDMA READ case. */ - qp_init->cap.max_send_wr = srp_sq_size / 2; + qp_init->cap.max_send_wr = min(srp_sq_size / 2, attrs->max_qp_wr + 0U); qp_init->cap.max_rdma_ctxs = srp_sq_size / 2; qp_init->cap.max_send_sge = min(attrs->max_sge, SRPT_MAX_SG_PER_WQE); qp_init->port_num = ch->sport->port; @@ -1953,10 +1953,11 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, ch->cm_id = cm_id; cm_id->context = ch; /* - * Avoid QUEUE_FULL conditions by limiting the number of buffers used - * for the SRP protocol to the command queue size. + * ch->rq_size should be at least as large as the initiator queue + * depth to avoid that the initiator driver has to report QUEUE_FULL + * to the SCSI mid-layer. */ - ch->rq_size = SRPT_RQ_SIZE; + ch->rq_size = min(SRPT_RQ_SIZE, sdev->device->attrs.max_qp_wr); spin_lock_init(&ch->spinlock); ch->state = CH_CONNECTING; INIT_LIST_HEAD(&ch->cmd_wait_list); From 74333f122388437cf1e2296ee9a3ae19d8858bc4 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:27:24 -0700 Subject: [PATCH 171/296] IB/srpt: Cache global L_Key This patch is a micro-optimization for the hot path. Signed-off-by: Bart Van Assche Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srpt/ib_srpt.c | 6 ++++-- drivers/infiniband/ulp/srpt/ib_srpt.h | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 76370e39857d..6cf95ad870cc 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -766,7 +766,7 @@ static int srpt_post_recv(struct srpt_device *sdev, BUG_ON(!sdev); list.addr = ioctx->ioctx.dma; list.length = srp_max_req_size; - list.lkey = sdev->pd->local_dma_lkey; + list.lkey = sdev->lkey; ioctx->ioctx.cqe.done = srpt_recv_done; wr.wr_cqe = &ioctx->ioctx.cqe; @@ -2343,7 +2343,7 @@ static void srpt_queue_response(struct se_cmd *cmd) sge.addr = ioctx->ioctx.dma; sge.length = resp_len; - sge.lkey = sdev->pd->local_dma_lkey; + sge.lkey = sdev->lkey; ioctx->ioctx.cqe.done = srpt_send_done; send_wr.next = NULL; @@ -2491,6 +2491,8 @@ static void srpt_add_one(struct ib_device *device) if (IS_ERR(sdev->pd)) goto free_dev; + sdev->lkey = sdev->pd->local_dma_lkey; + sdev->srq_size = min(srpt_srq_size, sdev->device->attrs.max_srq_wr); srq_attr.event_handler = srpt_srq_event; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index 1b817e51b84b..976e924d7400 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -343,7 +343,7 @@ struct srpt_port { * struct srpt_device - Information associated by SRPT with a single HCA. * @device: Backpointer to the struct ib_device managed by the IB core. * @pd: IB protection domain. - * @mr: L_Key (local key) with write access to all local memory. + * @lkey: L_Key (local key) with write access to all local memory. * @srq: Per-HCA SRQ (shared receive queue). * @cm_id: Connection identifier. * @srq_size: SRQ size. @@ -358,6 +358,7 @@ struct srpt_port { struct srpt_device { struct ib_device *device; struct ib_pd *pd; + u32 lkey; struct ib_srq *srq; struct ib_cm_id *cm_id; int srq_size; From dea262094cdf629a3380061722b205de04057799 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:27:25 -0700 Subject: [PATCH 172/296] IB/srpt: Change default behavior from using SRQ to using RC Although in the RC mode more resources are needed that mode has three advantages over SRQ: - It works with all RDMA adapters, even those that do not support SRQ. - Posting WRs and polling WCs does not trigger lock contention because only one thread at a time accesses a WR or WC queue in non-SRQ mode. - The end-to-end flow control mechanism is used. >From the IB spec: C9-150.2.1: For QPs that are not associated with an SRQ, each HCA receive queue shall generate end-to-end flow control credits. If a QP is associated with an SRQ, the HCA receive queue shall not generate end-to-end flow control credits. Add new configfs attributes that allow to configure which mode to use (/sys/kernel/config/target/srpt/$GUID/$GUID/attrib/use_srq). Note: only the attribute for port 1 is relevant on multi-port adapters. Signed-off-by: Bart Van Assche Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srpt/ib_srpt.c | 152 ++++++++++++++++++++------ drivers/infiniband/ulp/srpt/ib_srpt.h | 6 + 2 files changed, 123 insertions(+), 35 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 6cf95ad870cc..304855b9b537 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -295,6 +295,7 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot, { struct srpt_device *sdev = sport->sdev; struct ib_dm_ioc_profile *iocp; + int send_queue_depth; iocp = (struct ib_dm_ioc_profile *)mad->data; @@ -310,6 +311,12 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot, return; } + if (sdev->use_srq) + send_queue_depth = sdev->srq_size; + else + send_queue_depth = min(SRPT_RQ_SIZE, + sdev->device->attrs.max_qp_wr); + memset(iocp, 0, sizeof(*iocp)); strcpy(iocp->id_string, SRPT_ID_STRING); iocp->guid = cpu_to_be64(srpt_service_guid); @@ -322,7 +329,7 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot, iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS); iocp->protocol = cpu_to_be16(SRP_PROTOCOL); iocp->protocol_version = cpu_to_be16(SRP_PROTOCOL_VERSION); - iocp->send_queue_depth = cpu_to_be16(sdev->srq_size); + iocp->send_queue_depth = cpu_to_be16(send_queue_depth); iocp->rdma_read_depth = 4; iocp->send_size = cpu_to_be32(srp_max_req_size); iocp->rdma_size = cpu_to_be32(min(sport->port_attrib.srp_max_rdma_size, @@ -686,6 +693,9 @@ static void srpt_free_ioctx_ring(struct srpt_ioctx **ioctx_ring, { int i; + if (!ioctx_ring) + return; + for (i = 0; i < ring_size; ++i) srpt_free_ioctx(sdev, ioctx_ring[i], dma_size, dir); kfree(ioctx_ring); @@ -757,7 +767,7 @@ static bool srpt_test_and_set_cmd_state(struct srpt_send_ioctx *ioctx, /** * srpt_post_recv() - Post an IB receive request. */ -static int srpt_post_recv(struct srpt_device *sdev, +static int srpt_post_recv(struct srpt_device *sdev, struct srpt_rdma_ch *ch, struct srpt_recv_ioctx *ioctx) { struct ib_sge list; @@ -774,7 +784,10 @@ static int srpt_post_recv(struct srpt_device *sdev, wr.sg_list = &list; wr.num_sge = 1; - return ib_post_srq_recv(sdev->srq, &wr, &bad_wr); + if (sdev->use_srq) + return ib_post_srq_recv(sdev->srq, &wr, &bad_wr); + else + return ib_post_recv(ch->qp, &wr, &bad_wr); } /** @@ -1517,7 +1530,7 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch, break; } - srpt_post_recv(ch->sport->sdev, recv_ioctx); + srpt_post_recv(ch->sport->sdev, ch, recv_ioctx); return; out_wait: @@ -1616,7 +1629,7 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) struct srpt_device *sdev = sport->sdev; const struct ib_device_attr *attrs = &sdev->device->attrs; u32 srp_sq_size = sport->port_attrib.srp_sq_size; - int ret; + int i, ret; WARN_ON(ch->rq_size < 1); @@ -1640,7 +1653,6 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) = (void(*)(struct ib_event *, void*))srpt_qp_event; qp_init->send_cq = ch->cq; qp_init->recv_cq = ch->cq; - qp_init->srq = sdev->srq; qp_init->sq_sig_type = IB_SIGNAL_REQ_WR; qp_init->qp_type = IB_QPT_RC; /* @@ -1654,6 +1666,12 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) qp_init->cap.max_rdma_ctxs = srp_sq_size / 2; qp_init->cap.max_send_sge = min(attrs->max_sge, SRPT_MAX_SG_PER_WQE); qp_init->port_num = ch->sport->port; + if (sdev->use_srq) { + qp_init->srq = sdev->srq; + } else { + qp_init->cap.max_recv_wr = ch->rq_size; + qp_init->cap.max_recv_sge = qp_init->cap.max_send_sge; + } ch->qp = ib_create_qp(sdev->pd, qp_init); if (IS_ERR(ch->qp)) { @@ -1669,6 +1687,10 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) goto err_destroy_cq; } + if (!sdev->use_srq) + for (i = 0; i < ch->rq_size; i++) + srpt_post_recv(sdev, ch, ch->ioctx_recv_ring[i]); + atomic_set(&ch->sq_wr_avail, qp_init->cap.max_send_wr); pr_debug("%s: max_cqe= %d max_sge= %d sq_size = %d cm_id= %p\n", @@ -1818,6 +1840,10 @@ static void srpt_release_channel_work(struct work_struct *w) ch->sport->sdev, ch->rq_size, ch->rsp_size, DMA_TO_DEVICE); + srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_recv_ring, + sdev, ch->rq_size, + srp_max_req_size, DMA_FROM_DEVICE); + mutex_lock(&sdev->mutex); list_del_init(&ch->list); if (ch->release_done) @@ -1975,6 +2001,19 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, ch->ioctx_ring[i]->ch = ch; list_add_tail(&ch->ioctx_ring[i]->free_list, &ch->free_list); } + if (!sdev->use_srq) { + ch->ioctx_recv_ring = (struct srpt_recv_ioctx **) + srpt_alloc_ioctx_ring(ch->sport->sdev, ch->rq_size, + sizeof(*ch->ioctx_recv_ring[0]), + srp_max_req_size, + DMA_FROM_DEVICE); + if (!ch->ioctx_recv_ring) { + pr_err("rejected SRP_LOGIN_REQ because creating a new QP RQ ring failed.\n"); + rej->reason = + cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + goto free_ring; + } + } ret = srpt_create_ch_ib(ch); if (ret) { @@ -1982,7 +2021,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); pr_err("rejected SRP_LOGIN_REQ because creating" " a new RDMA channel failed.\n"); - goto free_ring; + goto free_recv_ring; } ret = srpt_ch_qp_rtr(ch, ch->qp); @@ -2073,6 +2112,11 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, destroy_ib: srpt_destroy_ch_ib(ch); +free_recv_ring: + srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_recv_ring, + ch->sport->sdev, ch->rq_size, + srp_max_req_size, DMA_FROM_DEVICE); + free_ring: srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring, ch->sport->sdev, ch->rq_size, @@ -2502,20 +2546,38 @@ static void srpt_add_one(struct ib_device *device) srq_attr.attr.srq_limit = 0; srq_attr.srq_type = IB_SRQT_BASIC; - sdev->srq = ib_create_srq(sdev->pd, &srq_attr); - if (IS_ERR(sdev->srq)) - goto err_pd; + sdev->srq = sdev->port[0].port_attrib.use_srq ? + ib_create_srq(sdev->pd, &srq_attr) : ERR_PTR(-ENOTSUPP); + if (IS_ERR(sdev->srq)) { + pr_debug("ib_create_srq() failed: %ld\n", PTR_ERR(sdev->srq)); - pr_debug("%s: create SRQ #wr= %d max_allow=%d dev= %s\n", - __func__, sdev->srq_size, sdev->device->attrs.max_srq_wr, - device->name); + /* SRQ not supported. */ + sdev->use_srq = false; + } else { + pr_debug("create SRQ #wr= %d max_allow=%d dev= %s\n", + sdev->srq_size, sdev->device->attrs.max_srq_wr, + device->name); + + sdev->use_srq = true; + + sdev->ioctx_ring = (struct srpt_recv_ioctx **) + srpt_alloc_ioctx_ring(sdev, sdev->srq_size, + sizeof(*sdev->ioctx_ring[0]), + srp_max_req_size, + DMA_FROM_DEVICE); + if (!sdev->ioctx_ring) + goto err_pd; + + for (i = 0; i < sdev->srq_size; ++i) + srpt_post_recv(sdev, NULL, sdev->ioctx_ring[i]); + } if (!srpt_service_guid) srpt_service_guid = be64_to_cpu(device->node_guid); sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev); if (IS_ERR(sdev->cm_id)) - goto err_srq; + goto err_ring; /* print out target login information */ pr_debug("Target login info: id_ext=%016llx,ioc_guid=%016llx," @@ -2535,16 +2597,6 @@ static void srpt_add_one(struct ib_device *device) srpt_event_handler); ib_register_event_handler(&sdev->event_handler); - sdev->ioctx_ring = (struct srpt_recv_ioctx **) - srpt_alloc_ioctx_ring(sdev, sdev->srq_size, - sizeof(*sdev->ioctx_ring[0]), - srp_max_req_size, DMA_FROM_DEVICE); - if (!sdev->ioctx_ring) - goto err_event; - - for (i = 0; i < sdev->srq_size; ++i) - srpt_post_recv(sdev, sdev->ioctx_ring[i]); - WARN_ON(sdev->device->phys_port_cnt > ARRAY_SIZE(sdev->port)); for (i = 1; i <= sdev->device->phys_port_cnt; i++) { @@ -2554,12 +2606,13 @@ static void srpt_add_one(struct ib_device *device) sport->port_attrib.srp_max_rdma_size = DEFAULT_MAX_RDMA_SIZE; sport->port_attrib.srp_max_rsp_size = DEFAULT_MAX_RSP_SIZE; sport->port_attrib.srp_sq_size = DEF_SRPT_SQ_SIZE; + sport->port_attrib.use_srq = false; INIT_WORK(&sport->work, srpt_refresh_port_work); if (srpt_refresh_port(sport)) { pr_err("MAD registration failed for %s-%d.\n", sdev->device->name, i); - goto err_ring; + goto err_event; } } @@ -2572,16 +2625,16 @@ static void srpt_add_one(struct ib_device *device) pr_debug("added %s.\n", device->name); return; -err_ring: - srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev, - sdev->srq_size, srp_max_req_size, - DMA_FROM_DEVICE); err_event: ib_unregister_event_handler(&sdev->event_handler); err_cm: ib_destroy_cm_id(sdev->cm_id); -err_srq: - ib_destroy_srq(sdev->srq); +err_ring: + if (sdev->use_srq) + ib_destroy_srq(sdev->srq); + srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev, + sdev->srq_size, srp_max_req_size, + DMA_FROM_DEVICE); err_pd: ib_dealloc_pd(sdev->pd); free_dev: @@ -2625,12 +2678,12 @@ static void srpt_remove_one(struct ib_device *device, void *client_data) spin_unlock(&srpt_dev_lock); srpt_release_sdev(sdev); - ib_destroy_srq(sdev->srq); - ib_dealloc_pd(sdev->pd); - + if (sdev->use_srq) + ib_destroy_srq(sdev->srq); srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev, sdev->srq_size, srp_max_req_size, DMA_FROM_DEVICE); - sdev->ioctx_ring = NULL; + ib_dealloc_pd(sdev->pd); + kfree(sdev); } @@ -2928,14 +2981,43 @@ static ssize_t srpt_tpg_attrib_srp_sq_size_store(struct config_item *item, return count; } +static ssize_t srpt_tpg_attrib_use_srq_show(struct config_item *item, + char *page) +{ + struct se_portal_group *se_tpg = attrib_to_tpg(item); + struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); + + return sprintf(page, "%d\n", sport->port_attrib.use_srq); +} + +static ssize_t srpt_tpg_attrib_use_srq_store(struct config_item *item, + const char *page, size_t count) +{ + struct se_portal_group *se_tpg = attrib_to_tpg(item); + struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); + unsigned long val; + int ret; + + ret = kstrtoul(page, 0, &val); + if (ret < 0) + return ret; + if (val != !!val) + return -EINVAL; + sport->port_attrib.use_srq = val; + + return count; +} + CONFIGFS_ATTR(srpt_tpg_attrib_, srp_max_rdma_size); CONFIGFS_ATTR(srpt_tpg_attrib_, srp_max_rsp_size); CONFIGFS_ATTR(srpt_tpg_attrib_, srp_sq_size); +CONFIGFS_ATTR(srpt_tpg_attrib_, use_srq); static struct configfs_attribute *srpt_tpg_attrib_attrs[] = { &srpt_tpg_attrib_attr_srp_max_rdma_size, &srpt_tpg_attrib_attr_srp_max_rsp_size, &srpt_tpg_attrib_attr_srp_sq_size, + &srpt_tpg_attrib_attr_use_srq, NULL, }; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index 976e924d7400..673387d365a3 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -252,6 +252,7 @@ enum rdma_ch_state { * @free_list: Head of list with free send I/O contexts. * @state: channel state. See also enum rdma_ch_state. * @ioctx_ring: Send ring. + * @ioctx_recv_ring: Receive I/O context ring. * @list: Node for insertion in the srpt_device.rch_list list. * @cmd_wait_list: List of SCSI commands that arrived before the RTU event. This * list contains struct srpt_ioctx elements and is protected @@ -281,6 +282,7 @@ struct srpt_rdma_ch { struct list_head free_list; enum rdma_ch_state state; struct srpt_send_ioctx **ioctx_ring; + struct srpt_recv_ioctx **ioctx_recv_ring; struct list_head list; struct list_head cmd_wait_list; struct se_session *sess; @@ -295,11 +297,13 @@ struct srpt_rdma_ch { * @srp_max_rdma_size: Maximum size of SRP RDMA transfers for new connections. * @srp_max_rsp_size: Maximum size of SRP response messages in bytes. * @srp_sq_size: Shared receive queue (SRQ) size. + * @use_srq: Whether or not to use SRQ. */ struct srpt_port_attrib { u32 srp_max_rdma_size; u32 srp_max_rsp_size; u32 srp_sq_size; + bool use_srq; }; /** @@ -347,6 +351,7 @@ struct srpt_port { * @srq: Per-HCA SRQ (shared receive queue). * @cm_id: Connection identifier. * @srq_size: SRQ size. + * @use_srq: Whether or not to use SRQ. * @ioctx_ring: Per-HCA SRQ. * @rch_list: Per-device channel list -- see also srpt_rdma_ch.list. * @ch_releaseQ: Enables waiting for removal from rch_list. @@ -362,6 +367,7 @@ struct srpt_device { struct ib_srq *srq; struct ib_cm_id *cm_id; int srq_size; + bool use_srq; struct srpt_recv_ioctx **ioctx_ring; struct list_head rch_list; wait_queue_head_t ch_releaseQ; From 8a0d18c62121d3c554a83eb96e2752861d84d937 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:27:26 -0700 Subject: [PATCH 173/296] IB/srp: Avoid that a cable pull can trigger a kernel crash This patch fixes the following kernel crash: general protection fault: 0000 [#1] PREEMPT SMP Workqueue: ib_mad2 timeout_sends [ib_core] Call Trace: ib_sa_path_rec_callback+0x1c4/0x1d0 [ib_core] send_handler+0xb2/0xd0 [ib_core] timeout_sends+0x14d/0x220 [ib_core] process_one_work+0x200/0x630 worker_thread+0x4e/0x3b0 kthread+0x113/0x150 Fixes: commit aef9ec39c47f ("IB: Add SCSI RDMA Protocol (SRP) initiator") Signed-off-by: Bart Van Assche Reviewed-by: Sagi Grimberg Cc: Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srp/ib_srp.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 778a08250f16..8ceb4bb011f1 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -665,12 +665,19 @@ static void srp_path_rec_completion(int status, static int srp_lookup_path(struct srp_rdma_ch *ch) { struct srp_target_port *target = ch->target; - int ret; + int ret = -ENODEV; ch->path.numb_path = 1; init_completion(&ch->done); + /* + * Avoid that the SCSI host can be removed by srp_remove_target() + * before srp_path_rec_completion() is called. + */ + if (!scsi_host_get(target->scsi_host)) + goto out; + ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client, target->srp_host->srp_dev->dev, target->srp_host->port, @@ -684,18 +691,24 @@ static int srp_lookup_path(struct srp_rdma_ch *ch) GFP_KERNEL, srp_path_rec_completion, ch, &ch->path_query); - if (ch->path_query_id < 0) - return ch->path_query_id; + ret = ch->path_query_id; + if (ret < 0) + goto put; ret = wait_for_completion_interruptible(&ch->done); if (ret < 0) - return ret; + goto put; - if (ch->status < 0) + ret = ch->status; + if (ret < 0) shost_printk(KERN_WARNING, target->scsi_host, PFX "Path record query failed\n"); - return ch->status; +put: + scsi_host_put(target->scsi_host); + +out: + return ret; } static int srp_send_req(struct srp_rdma_ch *ch, bool multich) From 9566b054926b718994c65eab55985408046668c3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:27:27 -0700 Subject: [PATCH 174/296] IB/srp: Remove second argument of srp_destroy_qp() This patch does not change any functionality. Signed-off-by: Bart Van Assche Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srp/ib_srp.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 8ceb4bb011f1..96750ad253c4 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -464,20 +464,20 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target) /** * srp_destroy_qp() - destroy an RDMA queue pair - * @qp: RDMA queue pair. + * @ch: SRP RDMA channel. * * Drain the qp before destroying it. This avoids that the receive * completion handler can access the queue pair while it is * being destroyed. */ -static void srp_destroy_qp(struct srp_rdma_ch *ch, struct ib_qp *qp) +static void srp_destroy_qp(struct srp_rdma_ch *ch) { spin_lock_irq(&ch->lock); ib_process_cq_direct(ch->send_cq, -1); spin_unlock_irq(&ch->lock); - ib_drain_qp(qp); - ib_destroy_qp(qp); + ib_drain_qp(ch->qp); + ib_destroy_qp(ch->qp); } static int srp_create_ch_ib(struct srp_rdma_ch *ch) @@ -550,7 +550,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) } if (ch->qp) - srp_destroy_qp(ch, ch->qp); + srp_destroy_qp(ch); if (ch->recv_cq) ib_free_cq(ch->recv_cq); if (ch->send_cq) @@ -617,7 +617,7 @@ static void srp_free_ch_ib(struct srp_target_port *target, ib_destroy_fmr_pool(ch->fmr_pool); } - srp_destroy_qp(ch, ch->qp); + srp_destroy_qp(ch); ib_free_cq(ch->send_cq); ib_free_cq(ch->recv_cq); From cee687b68dbc718b5e10f2181102dc0bc3e4f006 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:27:28 -0700 Subject: [PATCH 175/296] IB/srp: Cache global rkey This is a micro-optimization for the hot path. Signed-off-by: Bart Van Assche Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srp/ib_srp.c | 27 ++++++++++++++------------- drivers/infiniband/ulp/srp/ib_srp.h | 3 ++- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 96750ad253c4..463c3ed440eb 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1292,7 +1292,6 @@ static int srp_map_finish_fmr(struct srp_map_state *state, { struct srp_target_port *target = ch->target; struct srp_device *dev = target->srp_host->srp_dev; - struct ib_pd *pd = target->pd; struct ib_pool_fmr *fmr; u64 io_addr = 0; @@ -1308,9 +1307,9 @@ static int srp_map_finish_fmr(struct srp_map_state *state, if (state->npages == 0) return 0; - if (state->npages == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) { + if (state->npages == 1 && target->global_rkey) { srp_map_desc(state, state->base_dma_addr, state->dma_len, - pd->unsafe_global_rkey); + target->global_rkey); goto reset_state; } @@ -1350,7 +1349,6 @@ static int srp_map_finish_fr(struct srp_map_state *state, { struct srp_target_port *target = ch->target; struct srp_device *dev = target->srp_host->srp_dev; - struct ib_pd *pd = target->pd; struct ib_send_wr *bad_wr; struct ib_reg_wr wr; struct srp_fr_desc *desc; @@ -1366,12 +1364,12 @@ static int srp_map_finish_fr(struct srp_map_state *state, WARN_ON_ONCE(!dev->use_fast_reg); - if (sg_nents == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) { + if (sg_nents == 1 && target->global_rkey) { unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; srp_map_desc(state, sg_dma_address(state->sg) + sg_offset, sg_dma_len(state->sg) - sg_offset, - pd->unsafe_global_rkey); + target->global_rkey); if (sg_offset_p) *sg_offset_p = 0; return 1; @@ -1533,7 +1531,7 @@ static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch, for_each_sg(scat, sg, count, i) { srp_map_desc(state, ib_sg_dma_address(dev->dev, sg), ib_sg_dma_len(dev->dev, sg), - target->pd->unsafe_global_rkey); + target->global_rkey); } return 0; @@ -1631,7 +1629,6 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, struct srp_request *req) { struct srp_target_port *target = ch->target; - struct ib_pd *pd = target->pd; struct scatterlist *scat; struct srp_cmd *cmd = req->cmd->buf; int len, nents, count, ret; @@ -1667,7 +1664,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, fmt = SRP_DATA_DESC_DIRECT; len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf); - if (count == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) { + if (count == 1 && target->global_rkey) { /* * The midlayer only generated a single gather/scatter * entry, or DMA mapping coalesced everything to a @@ -1677,7 +1674,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, struct srp_direct_buf *buf = (void *) cmd->add_data; buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat)); - buf->key = cpu_to_be32(pd->unsafe_global_rkey); + buf->key = cpu_to_be32(target->global_rkey); buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat)); req->nmdesc = 0; @@ -1748,14 +1745,14 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, memcpy(indirect_hdr->desc_list, req->indirect_desc, count * sizeof (struct srp_direct_buf)); - if (!(pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) { + if (!target->global_rkey) { ret = srp_map_idb(ch, req, state.gen.next, state.gen.end, idb_len, &idb_rkey); if (ret < 0) goto unmap; req->nmdesc++; } else { - idb_rkey = cpu_to_be32(pd->unsafe_global_rkey); + idb_rkey = cpu_to_be32(target->global_rkey); } indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr); @@ -3331,8 +3328,8 @@ static ssize_t srp_create_target(struct device *dev, target->io_class = SRP_REV16A_IB_IO_CLASS; target->scsi_host = target_host; target->srp_host = host; - target->pd = host->srp_dev->pd; target->lkey = host->srp_dev->pd->local_dma_lkey; + target->global_rkey = host->srp_dev->global_rkey; target->cmd_sg_cnt = cmd_sg_entries; target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries; target->allow_ext_sg = allow_ext_sg; @@ -3651,6 +3648,10 @@ static void srp_add_one(struct ib_device *device) if (IS_ERR(srp_dev->pd)) goto free_dev; + if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) { + srp_dev->global_rkey = srp_dev->pd->unsafe_global_rkey; + WARN_ON_ONCE(srp_dev->global_rkey == 0); + } for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) { host = srp_add_port(srp_dev, p); diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index ab9077b81d5a..a814f5ef16f9 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -90,6 +90,7 @@ struct srp_device { struct list_head dev_list; struct ib_device *dev; struct ib_pd *pd; + u32 global_rkey; u64 mr_page_mask; int mr_page_size; int mr_max_size; @@ -179,7 +180,7 @@ struct srp_target_port { spinlock_t lock; /* read only in the hot path */ - struct ib_pd *pd; + u32 global_rkey; struct srp_rdma_ch *ch; u32 ch_count; u32 lkey; From 4c532d6ce14bb3fb4e1cb2d29fafdd7d6bded51c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:27:29 -0700 Subject: [PATCH 176/296] IB/srp: Make CM timeout dependent on subnet timeout For small networks it is safe to reduce the subnet timeout from its default value (18 for opensm) to 16. Make the SRP CM timeout dependent on the subnet timeout such that decreasing the subnet timeout also causes SRP failover and failback to occur faster. Signed-off-by: Bart Van Assche Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srp/ib_srp.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 463c3ed440eb..972d4b3c5223 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -711,6 +711,23 @@ static int srp_lookup_path(struct srp_rdma_ch *ch) return ret; } +static u8 srp_get_subnet_timeout(struct srp_host *host) +{ + struct ib_port_attr attr; + int ret; + u8 subnet_timeout = 18; + + ret = ib_query_port(host->srp_dev->dev, host->port, &attr); + if (ret == 0) + subnet_timeout = attr.subnet_timeout; + + if (unlikely(subnet_timeout < 15)) + pr_warn("%s: subnet timeout %d may cause SRP login to fail.\n", + dev_name(&host->srp_dev->dev->dev), subnet_timeout); + + return subnet_timeout; +} + static int srp_send_req(struct srp_rdma_ch *ch, bool multich) { struct srp_target_port *target = ch->target; @@ -719,6 +736,9 @@ static int srp_send_req(struct srp_rdma_ch *ch, bool multich) struct srp_login_req priv; } *req = NULL; int status; + u8 subnet_timeout; + + subnet_timeout = srp_get_subnet_timeout(target->srp_host); req = kzalloc(sizeof *req, GFP_KERNEL); if (!req) @@ -741,8 +761,8 @@ static int srp_send_req(struct srp_rdma_ch *ch, bool multich) * module parameters if anyone cared about setting them. */ req->param.responder_resources = 4; - req->param.remote_cm_response_timeout = 20; - req->param.local_cm_response_timeout = 20; + req->param.remote_cm_response_timeout = subnet_timeout + 2; + req->param.local_cm_response_timeout = subnet_timeout + 2; req->param.retry_count = target->tl_retry_count; req->param.rnr_retry_count = 7; req->param.max_cm_retries = 15; From a2930e5c44951902b63f61aa3d65f4312744de39 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 15:51:13 -0700 Subject: [PATCH 177/296] IB/rdmavt: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. setup_timer() was already being called before the open-coded init_timer() and .data assignment. These are removed as well. Cc: Dennis Dalessandro Cc: Doug Ledford Cc: Sean Hefty Cc: Hal Rosenstock Cc: linux-rdma@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 22df09ae809e..74f85194db31 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -57,7 +57,7 @@ #include "vt.h" #include "trace.h" -static void rvt_rc_timeout(unsigned long arg); +static void rvt_rc_timeout(struct timer_list *t); /* * Convert the AETH RNR timeout code into the number of microseconds. @@ -845,7 +845,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, goto bail_qp; } /* initialize timers needed for rc qp */ - setup_timer(&qp->s_timer, rvt_rc_timeout, (unsigned long)qp); + timer_setup(&qp->s_timer, rvt_rc_timeout, 0); hrtimer_init(&qp->s_rnr_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); qp->s_rnr_timer.function = rvt_rc_rnr_retry; @@ -894,8 +894,6 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, atomic_set(&qp->refcount, 0); atomic_set(&qp->local_ops_pending, 0); init_waitqueue_head(&qp->wait); - init_timer(&qp->s_timer); - qp->s_timer.data = (unsigned long)qp; INIT_LIST_HEAD(&qp->rspwait); qp->state = IB_QPS_RESET; qp->s_wq = swq; @@ -2132,9 +2130,9 @@ EXPORT_SYMBOL(rvt_del_timers_sync); /** * This is called from s_timer for missing responses. */ -static void rvt_rc_timeout(unsigned long arg) +static void rvt_rc_timeout(struct timer_list *t) { - struct rvt_qp *qp = (struct rvt_qp *)arg; + struct rvt_qp *qp = from_timer(qp, t, s_timer); struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); unsigned long flags; From 8064135e8a758f0db9a24dac0157c47f9bdb1c13 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 15:51:54 -0700 Subject: [PATCH 178/296] IB/hfi1: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Switches test of .data field to .function, since .data will be going away. Cc: Mike Marciniszyn Cc: Dennis Dalessandro Cc: Doug Ledford Cc: Sean Hefty Cc: Hal Rosenstock Cc: linux-rdma@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/aspm.h | 7 +++---- drivers/infiniband/hw/hfi1/chip.c | 19 ++++++++----------- drivers/infiniband/hw/hfi1/driver.c | 7 +++---- drivers/infiniband/hw/hfi1/init.c | 2 +- drivers/infiniband/hw/hfi1/mad.c | 4 ++-- drivers/infiniband/hw/hfi1/mad.h | 2 +- drivers/infiniband/hw/hfi1/sdma.c | 8 ++++---- drivers/infiniband/hw/hfi1/verbs.c | 9 ++++----- 8 files changed, 26 insertions(+), 32 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/aspm.h b/drivers/infiniband/hw/hfi1/aspm.h index 522b40ed9937..e8133870ee87 100644 --- a/drivers/infiniband/hw/hfi1/aspm.h +++ b/drivers/infiniband/hw/hfi1/aspm.h @@ -218,9 +218,9 @@ static inline void aspm_ctx_disable(struct hfi1_ctxtdata *rcd) } /* Timer function for re-enabling ASPM in the absence of interrupt activity */ -static inline void aspm_ctx_timer_function(unsigned long data) +static inline void aspm_ctx_timer_function(struct timer_list *t) { - struct hfi1_ctxtdata *rcd = (struct hfi1_ctxtdata *)data; + struct hfi1_ctxtdata *rcd = from_timer(rcd, t, aspm_timer); unsigned long flags; spin_lock_irqsave(&rcd->aspm_lock, flags); @@ -281,8 +281,7 @@ static inline void aspm_enable_all(struct hfi1_devdata *dd) static inline void aspm_ctx_init(struct hfi1_ctxtdata *rcd) { spin_lock_init(&rcd->aspm_lock); - setup_timer(&rcd->aspm_timer, aspm_ctx_timer_function, - (unsigned long)rcd); + timer_setup(&rcd->aspm_timer, aspm_ctx_timer_function, 0); rcd->aspm_intr_supported = rcd->dd->aspm_supported && aspm_mode == ASPM_MODE_DYNAMIC && rcd->ctxt < rcd->dd->first_dyn_alloc_ctxt; diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 0be42787759f..1e6e181538d3 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -5538,9 +5538,9 @@ static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg) * associated with them. */ #define RCVERR_CHECK_TIME 10 -static void update_rcverr_timer(unsigned long opaque) +static void update_rcverr_timer(struct timer_list *t) { - struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque; + struct hfi1_devdata *dd = from_timer(dd, t, rcverr_timer); struct hfi1_pportdata *ppd = dd->pport; u32 cur_ovfl_cnt = read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL); @@ -5559,7 +5559,7 @@ static void update_rcverr_timer(unsigned long opaque) static int init_rcverr(struct hfi1_devdata *dd) { - setup_timer(&dd->rcverr_timer, update_rcverr_timer, (unsigned long)dd); + timer_setup(&dd->rcverr_timer, update_rcverr_timer, 0); /* Assume the hardware counter has been reset */ dd->rcv_ovfl_cnt = 0; return mod_timer(&dd->rcverr_timer, jiffies + HZ * RCVERR_CHECK_TIME); @@ -5567,9 +5567,8 @@ static int init_rcverr(struct hfi1_devdata *dd) static void free_rcverr(struct hfi1_devdata *dd) { - if (dd->rcverr_timer.data) + if (dd->rcverr_timer.function) del_timer_sync(&dd->rcverr_timer); - dd->rcverr_timer.data = 0; } static void handle_rxe_err(struct hfi1_devdata *dd, u32 unused, u64 reg) @@ -12089,9 +12088,8 @@ static void free_cntrs(struct hfi1_devdata *dd) struct hfi1_pportdata *ppd; int i; - if (dd->synth_stats_timer.data) + if (dd->synth_stats_timer.function) del_timer_sync(&dd->synth_stats_timer); - dd->synth_stats_timer.data = 0; ppd = (struct hfi1_pportdata *)(dd + 1); for (i = 0; i < dd->num_pports; i++, ppd++) { kfree(ppd->cntrs); @@ -12367,9 +12365,9 @@ static void do_update_synth_timer(struct work_struct *work) } } -static void update_synth_timer(unsigned long opaque) +static void update_synth_timer(struct timer_list *t) { - struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque; + struct hfi1_devdata *dd = from_timer(dd, t, synth_stats_timer); queue_work(dd->update_cntr_wq, &dd->update_cntr_work); mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME); @@ -12387,8 +12385,7 @@ static int init_cntrs(struct hfi1_devdata *dd) const int bit_type_32_sz = strlen(bit_type_32); /* set up the stats timer; the add_timer is done at the end */ - setup_timer(&dd->synth_stats_timer, update_synth_timer, - (unsigned long)dd); + timer_setup(&dd->synth_stats_timer, update_synth_timer, 0); /***********************/ /* per device counters */ diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 7372cc00cb2d..fdaf5caebd35 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -1252,9 +1252,9 @@ void shutdown_led_override(struct hfi1_pportdata *ppd) write_csr(dd, DCC_CFG_LED_CNTRL, 0); } -static void run_led_override(unsigned long opaque) +static void run_led_override(struct timer_list *t) { - struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)opaque; + struct hfi1_pportdata *ppd = from_timer(ppd, t, led_override_timer); struct hfi1_devdata *dd = ppd->dd; unsigned long timeout; int phase_idx; @@ -1298,8 +1298,7 @@ void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon, * timeout so the handler will be called soon to look at our request. */ if (!timer_pending(&ppd->led_override_timer)) { - setup_timer(&ppd->led_override_timer, run_led_override, - (unsigned long)ppd); + timer_setup(&ppd->led_override_timer, run_led_override, 0); ppd->led_override_timer.expires = jiffies + 1; add_timer(&ppd->led_override_timer); atomic_set(&ppd->led_override_timer_active, 1); diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index fba77001c3a7..e7042e8b5e5a 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1006,7 +1006,7 @@ static void stop_timers(struct hfi1_devdata *dd) for (pidx = 0; pidx < dd->num_pports; ++pidx) { ppd = dd->pport + pidx; - if (ppd->led_override_timer.data) { + if (ppd->led_override_timer.function) { del_timer_sync(&ppd->led_override_timer); atomic_set(&ppd->led_override_timer_active, 0); } diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index f4c0ffc040cc..dc37433c49cf 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -399,9 +399,9 @@ static void send_trap(struct hfi1_ibport *ibp, struct trap_node *trap) ib_free_send_mad(send_buf); } -void hfi1_handle_trap_timer(unsigned long data) +void hfi1_handle_trap_timer(struct timer_list *t) { - struct hfi1_ibport *ibp = (struct hfi1_ibport *)data; + struct hfi1_ibport *ibp = from_timer(ibp, t, rvp.trap_timer); struct trap_node *trap = NULL; unsigned long flags; int i; diff --git a/drivers/infiniband/hw/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h index 4c1245072093..a4f8ac16332c 100644 --- a/drivers/infiniband/hw/hfi1/mad.h +++ b/drivers/infiniband/hw/hfi1/mad.h @@ -428,6 +428,6 @@ struct sc2vlnt { COUNTER_MASK(1, 4)) void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port); -void hfi1_handle_trap_timer(unsigned long data); +void hfi1_handle_trap_timer(struct timer_list *t); #endif /* _HFI1_MAD_H */ diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 6781bcdb10b3..512c263d99d9 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -491,10 +491,10 @@ static void sdma_err_progress_check_schedule(struct sdma_engine *sde) } } -static void sdma_err_progress_check(unsigned long data) +static void sdma_err_progress_check(struct timer_list *t) { unsigned index; - struct sdma_engine *sde = (struct sdma_engine *)data; + struct sdma_engine *sde = from_timer(sde, t, err_progress_check_timer); dd_dev_err(sde->dd, "SDE progress check event\n"); for (index = 0; index < sde->dd->num_sdma; index++) { @@ -1453,8 +1453,8 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) sde->progress_check_head = 0; - setup_timer(&sde->err_progress_check_timer, - sdma_err_progress_check, (unsigned long)sde); + timer_setup(&sde->err_progress_check_timer, + sdma_err_progress_check, 0); sde->descq = dma_zalloc_coherent( &dd->pcidev->dev, diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index e232f3c608b4..038fe221c27f 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -667,9 +667,9 @@ void hfi1_16B_rcv(struct hfi1_packet *packet) * This is called from a timer to check for QPs * which need kernel memory in order to send a packet. */ -static void mem_timer(unsigned long data) +static void mem_timer(struct timer_list *t) { - struct hfi1_ibdev *dev = (struct hfi1_ibdev *)data; + struct hfi1_ibdev *dev = from_timer(dev, t, mem_timer); struct list_head *list = &dev->memwait; struct rvt_qp *qp = NULL; struct iowait *wait; @@ -1636,8 +1636,7 @@ static void init_ibport(struct hfi1_pportdata *ppd) for (i = 0; i < RVT_MAX_TRAP_LISTS ; i++) INIT_LIST_HEAD(&ibp->rvp.trap_lists[i].list); - setup_timer(&ibp->rvp.trap_timer, hfi1_handle_trap_timer, - (unsigned long)ibp); + timer_setup(&ibp->rvp.trap_timer, hfi1_handle_trap_timer, 0); spin_lock_init(&ibp->rvp.lock); /* Set the prefix to the default value (see ch. 4.1.1) */ @@ -1844,7 +1843,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) /* Only need to initialize non-zero fields. */ - setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev); + timer_setup(&dev->mem_timer, mem_timer, 0); seqlock_init(&dev->iowait_lock); seqlock_init(&dev->txwait_lock); From 051947b3b2a2c8487affafaf86587770e1048b78 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 15:54:11 -0700 Subject: [PATCH 179/296] RDMA/cxgb3: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Also removes an unused timer. Cc: Steve Wise Cc: Doug Ledford Cc: Sean Hefty Cc: Hal Rosenstock Cc: linux-rdma@vger.kernel.org Signed-off-by: Kees Cook Acked-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb3/iwch_cm.c | 12 +++++------- drivers/infiniband/hw/cxgb3/iwch_provider.c | 1 - drivers/infiniband/hw/cxgb3/iwch_provider.h | 1 - 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 86975370a4c0..638ac9a06053 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -107,7 +107,7 @@ static struct workqueue_struct *workq; static struct sk_buff_head rxq; static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp); -static void ep_timeout(unsigned long arg); +static void ep_timeout(struct timer_list *t); static void connect_reply_upcall(struct iwch_ep *ep, int status); static void start_ep_timer(struct iwch_ep *ep) @@ -119,8 +119,6 @@ static void start_ep_timer(struct iwch_ep *ep) } else get_ep(&ep->com); ep->timer.expires = jiffies + ep_timeout_secs * HZ; - ep->timer.data = (unsigned long)ep; - ep->timer.function = ep_timeout; add_timer(&ep->timer); } @@ -1399,7 +1397,7 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) child_ep->l2t = l2t; child_ep->dst = dst; child_ep->hwtid = hwtid; - init_timer(&child_ep->timer); + timer_setup(&child_ep->timer, ep_timeout, 0); cxgb3_insert_tid(tdev, &t3c_client, child_ep, hwtid); accept_cr(child_ep, req->peer_ip, skb); goto out; @@ -1719,9 +1717,9 @@ static int ec_status(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) return CPL_RET_BUF_DONE; } -static void ep_timeout(unsigned long arg) +static void ep_timeout(struct timer_list *t) { - struct iwch_ep *ep = (struct iwch_ep *)arg; + struct iwch_ep *ep = from_timer(ep, t, timer); struct iwch_qp_attributes attrs; unsigned long flags; int abort = 1; @@ -1899,7 +1897,7 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) err = -ENOMEM; goto out; } - init_timer(&ep->timer); + timer_setup(&ep->timer, ep_timeout, 0); ep->plen = conn_param->private_data_len; if (ep->plen) memcpy(ep->mpa_pkt + sizeof(struct mpa_message), diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 099e76f3758a..a578ca559e11 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -969,7 +969,6 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, insert_mmap(ucontext, mm2); } qhp->ibqp.qp_num = qhp->wq.qpid; - init_timer(&(qhp->timer)); pr_debug("%s sq_num_entries %d, rq_num_entries %d qpid 0x%0x qhp %p dma_addr 0x%llx size %d rq_addr 0x%x\n", __func__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries, qhp->wq.qpid, qhp, (unsigned long long)qhp->wq.dma_addr, diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h index 9e216edec4c0..2e38ddefea8a 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.h +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h @@ -168,7 +168,6 @@ struct iwch_qp { atomic_t refcnt; wait_queue_head_t wait; enum IWCH_QP_FLAGS flags; - struct timer_list timer; }; static inline int qp_quiesced(struct iwch_qp *qhp) From 2ec46d68464c26fc524ebf75afe42964266ea73e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 17 Oct 2017 11:37:54 -0700 Subject: [PATCH 180/296] RDMA/i40iw: Convert timers to use timer_setup() (part 2) In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. This includes the remaining timers missed in the earlier i40iw patch. Cc: Faisal Latif Cc: Shiraz Saleem Cc: Doug Ledford Cc: Sean Hefty Cc: Hal Rosenstock Cc: linux-rdma@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_cm.c | 7 +++---- drivers/infiniband/hw/i40iw/i40iw_utils.c | 7 +++---- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 5230dd3c938c..af7526a275b8 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -1188,7 +1188,7 @@ static void i40iw_handle_close_entry(struct i40iw_cm_node *cm_node, u32 rem_node * i40iw_cm_timer_tick - system's timer expired callback * @pass: Pointing to cm_core */ -static void i40iw_cm_timer_tick(unsigned long pass) +static void i40iw_cm_timer_tick(struct timer_list *t) { unsigned long nexttimeout = jiffies + I40IW_LONG_TIME; struct i40iw_cm_node *cm_node; @@ -1196,7 +1196,7 @@ static void i40iw_cm_timer_tick(unsigned long pass) struct list_head *list_core_temp; struct i40iw_sc_vsi *vsi; struct list_head *list_node; - struct i40iw_cm_core *cm_core = (struct i40iw_cm_core *)pass; + struct i40iw_cm_core *cm_core = from_timer(cm_core, t, tcp_timer); u32 settimer = 0; unsigned long timetosend; struct i40iw_sc_dev *dev; @@ -3195,8 +3195,7 @@ void i40iw_setup_cm_core(struct i40iw_device *iwdev) INIT_LIST_HEAD(&cm_core->connected_nodes); INIT_LIST_HEAD(&cm_core->listen_nodes); - setup_timer(&cm_core->tcp_timer, i40iw_cm_timer_tick, - (unsigned long)cm_core); + timer_setup(&cm_core->tcp_timer, i40iw_cm_timer_tick, 0); spin_lock_init(&cm_core->ht_lock); spin_lock_init(&cm_core->listen_list_lock); diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index f6c76595e834..2ed31004b19c 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -870,9 +870,9 @@ void i40iw_terminate_done(struct i40iw_sc_qp *qp, int timeout_occurred) * i40iw_terminate_imeout - timeout happened * @context: points to iwarp qp */ -static void i40iw_terminate_timeout(unsigned long context) +static void i40iw_terminate_timeout(struct timer_list *t) { - struct i40iw_qp *iwqp = (struct i40iw_qp *)context; + struct i40iw_qp *iwqp = from_timer(iwqp, t, terminate_timer); struct i40iw_sc_qp *qp = (struct i40iw_sc_qp *)&iwqp->sc_qp; i40iw_terminate_done(qp, 1); @@ -889,8 +889,7 @@ void i40iw_terminate_start_timer(struct i40iw_sc_qp *qp) iwqp = (struct i40iw_qp *)qp->back_qp; i40iw_add_ref(&iwqp->ibqp); - setup_timer(&iwqp->terminate_timer, i40iw_terminate_timeout, - (unsigned long)iwqp); + timer_setup(&iwqp->terminate_timer, i40iw_terminate_timeout, 0); iwqp->terminate_timer.expires = jiffies + HZ; add_timer(&iwqp->terminate_timer); } From a9346abed52f08e3e0ceb66d51f527ea11698d3c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 15:52:31 -0700 Subject: [PATCH 181/296] RDMA/cxgb4: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Also removes an unused timer and drops a redundant initialization. Cc: Steve Wise Cc: Doug Ledford Cc: Sean Hefty Cc: Hal Rosenstock Cc: linux-rdma@vger.kernel.org Signed-off-by: Kees Cook Acked-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 13 +++++-------- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 1 - drivers/infiniband/hw/cxgb4/qp.c | 1 - 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index daf7a56e5d7e..fc981e68f0b2 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -144,7 +144,7 @@ static struct workqueue_struct *workq; static struct sk_buff_head rxq; static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp); -static void ep_timeout(unsigned long arg); +static void ep_timeout(struct timer_list *t); static void connect_reply_upcall(struct c4iw_ep *ep, int status); static int sched(struct c4iw_dev *dev, struct sk_buff *skb); @@ -189,8 +189,6 @@ static void start_ep_timer(struct c4iw_ep *ep) clear_bit(TIMEOUT, &ep->com.flags); c4iw_get_ep(&ep->com); ep->timer.expires = jiffies + ep_timeout_secs * HZ; - ep->timer.data = (unsigned long)ep; - ep->timer.function = ep_timeout; add_timer(&ep->timer); } @@ -2101,7 +2099,6 @@ static int c4iw_reconnect(struct c4iw_ep *ep) __u8 *ra; pr_debug("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id); - init_timer(&ep->timer); c4iw_init_wr_wait(&ep->com.wr_wait); /* When MPA revision is different on nodes, the node with MPA_rev=2 @@ -2579,7 +2576,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) pr_debug("%s tx_chan %u smac_idx %u rss_qid %u\n", __func__, child_ep->tx_chan, child_ep->smac_idx, child_ep->rss_qid); - init_timer(&child_ep->timer); + timer_setup(&child_ep->timer, ep_timeout, 0); cxgb4_insert_tid(t, child_ep, hwtid, child_ep->com.local_addr.ss_family); insert_ep_tid(child_ep); @@ -3204,7 +3201,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) goto fail1; } - init_timer(&ep->timer); + timer_setup(&ep->timer, ep_timeout, 0); ep->plen = conn_param->private_data_len; if (ep->plen) memcpy(ep->mpa_pkt + sizeof(struct mpa_message), @@ -4119,9 +4116,9 @@ static void process_work(struct work_struct *work) static DECLARE_WORK(skb_work, process_work); -static void ep_timeout(unsigned long arg) +static void ep_timeout(struct timer_list *t) { - struct c4iw_ep *ep = (struct c4iw_ep *)arg; + struct c4iw_ep *ep = from_timer(ep, t, timer); int kickit = 0; spin_lock(&timeout_lock); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 819a30635d53..c22eeee20df9 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -480,7 +480,6 @@ struct c4iw_qp { struct mutex mutex; struct kref kref; wait_queue_head_t wait; - struct timer_list timer; int sq_sig_all; struct work_struct free_work; struct c4iw_ucontext *ucontext; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index cb7fc0d35d1d..c306def0501c 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1935,7 +1935,6 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, qhp->ucontext = ucontext; } qhp->ibqp.qp_num = qhp->wq.sq.qid; - init_timer(&(qhp->timer)); INIT_LIST_HEAD(&qhp->db_fc_entry); pr_debug("%s sq id %u size %u memsize %zu num_entries %u rq id %u size %u memsize %zu num_entries %u\n", __func__, From 5cda6587feec790a089703dde2e6e1f82de50bbd Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 16 Oct 2017 08:45:12 +0300 Subject: [PATCH 182/296] IB/core: Introduce and use rdma_create_user_ah Introduce rdma_create_user_ah API which allows passing udata to provider driver and additionally which resolves DMAC for RoCE. ib_resolve_eth_dmac() resolves destination mac address for unicast, multicast, link local ipv4 mapped ipv6 and ipv6 destination gid entry. This allows all RoCE provider drivers to avoid duplicating such code. Such change brings consistency where IB core always resolves dmac and pass it to RoCE provider drivers for user and kernel consumers, with this ah_attr->roce.dmac is always an input field for provider drivers. This uniformity avoids exporting ib_resolve_eth_dmac symbol to providers or other modules. Therefore its removed as exported symbol at later in the patch series. Now uverbs and umad both makes use of rdma_create_user_ah API which fixes the issue where umad has invalid DMAC for address. Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/user_mad.c | 2 +- drivers/infiniband/core/uverbs_cmd.c | 10 +------ drivers/infiniband/core/verbs.c | 40 ++++++++++++++++++++++++++-- include/rdma/ib_verbs.h | 15 +++++++++++ 4 files changed, 55 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index c1696e6084b2..38809962c105 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -506,7 +506,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, rdma_ah_set_dgid_raw(&ah_attr, packet->mad.hdr.gid); } - ah = rdma_create_ah(agent->qp->pd, &ah_attr); + ah = rdma_create_user_ah(agent->qp->pd, &ah_attr, NULL); if (IS_ERR(ah)) { ret = PTR_ERR(ah); goto err_up; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 52a2cf2d83aa..0cef1863e2dc 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2537,7 +2537,6 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, struct rdma_ah_attr attr; int ret; struct ib_udata udata; - u8 *dmac; if (out_len < sizeof resp) return -ENOSPC; @@ -2580,20 +2579,13 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, } else { rdma_ah_set_ah_flags(&attr, 0); } - dmac = rdma_ah_retrieve_dmac(&attr); - if (dmac) - memset(dmac, 0, ETH_ALEN); - - ah = pd->device->create_ah(pd, &attr, &udata); + ah = rdma_create_user_ah(pd, &attr, &udata); if (IS_ERR(ah)) { ret = PTR_ERR(ah); goto err_put; } - ah->device = pd->device; - ah->pd = pd; - atomic_inc(&pd->usecnt); ah->uobject = uobj; uobj->user_handle = cmd.user_handle; uobj->object = ah; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index de57d6c11a25..4dcfe47c479d 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -302,11 +302,13 @@ EXPORT_SYMBOL(ib_dealloc_pd); /* Address handles */ -struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr) +static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, + struct rdma_ah_attr *ah_attr, + struct ib_udata *udata) { struct ib_ah *ah; - ah = pd->device->create_ah(pd, ah_attr, NULL); + ah = pd->device->create_ah(pd, ah_attr, udata); if (!IS_ERR(ah)) { ah->device = pd->device; @@ -318,8 +320,42 @@ struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr) return ah; } + +struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr) +{ + return _rdma_create_ah(pd, ah_attr, NULL); +} EXPORT_SYMBOL(rdma_create_ah); +/** + * rdma_create_user_ah - Creates an address handle for the + * given address vector. + * It resolves destination mac address for ah attribute of RoCE type. + * @pd: The protection domain associated with the address handle. + * @ah_attr: The attributes of the address vector. + * @udata: pointer to user's input output buffer information need by + * provider driver. + * + * It returns 0 on success and returns appropriate error code on error. + * The address handle is used to reference a local or global destination + * in all UD QP post sends. + */ +struct ib_ah *rdma_create_user_ah(struct ib_pd *pd, + struct rdma_ah_attr *ah_attr, + struct ib_udata *udata) +{ + int err; + + if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { + err = ib_resolve_eth_dmac(pd->device, ah_attr); + if (err) + return ERR_PTR(err); + } + + return _rdma_create_ah(pd, ah_attr, udata); +} +EXPORT_SYMBOL(rdma_create_user_ah); + int ib_get_rdma_header_version(const union rdma_network_hdr *hdr) { const struct iphdr *ip4h = (struct iphdr *)&hdr->roce4grh; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index e8608b2dc844..09c4a695155e 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2857,6 +2857,21 @@ void ib_dealloc_pd(struct ib_pd *pd); */ struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr); +/** + * rdma_create_user_ah - Creates an address handle for the given address vector. + * It resolves destination mac address for ah attribute of RoCE type. + * @pd: The protection domain associated with the address handle. + * @ah_attr: The attributes of the address vector. + * @udata: pointer to user's input output buffer information need by + * provider driver. + * + * It returns 0 on success and returns appropriate error code on error. + * The address handle is used to reference a local or global destination + * in all UD QP post sends. + */ +struct ib_ah *rdma_create_user_ah(struct ib_pd *pd, + struct rdma_ah_attr *ah_attr, + struct ib_udata *udata); /** * ib_get_gids_from_rdma_hdr - Get sgid and dgid from GRH or IPv4 header * work completion. From c0348eb069687a2f27c0cd23dafb35918edf9e75 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 16 Oct 2017 08:45:13 +0300 Subject: [PATCH 183/296] IB: Let ib_core resolve destination mac address Since IB/core resolves the destination mac address for user and kernel consumers, avoid resolving in multiple provider drivers. Only ib_core resolves DMAC now, therefore resolve_eth_dmac is removed as exported symbol. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 8 +++++--- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 8 -------- drivers/infiniband/hw/hns/hns_roce_ah.c | 14 +------------- drivers/infiniband/hw/mlx4/ah.c | 8 +++----- drivers/infiniband/hw/mlx5/ah.c | 4 ---- drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 15 --------------- include/rdma/ib_verbs.h | 2 -- 7 files changed, 9 insertions(+), 50 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 4dcfe47c479d..d8f1a5d34f4f 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -53,6 +53,9 @@ #include "core_priv.h" +static int ib_resolve_eth_dmac(struct ib_device *device, + struct rdma_ah_attr *ah_attr); + static const char * const ib_events[] = { [IB_EVENT_CQ_ERR] = "CQ error", [IB_EVENT_QP_FATAL] = "QP fatal error", @@ -1257,8 +1260,8 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, } EXPORT_SYMBOL(ib_modify_qp_is_ok); -int ib_resolve_eth_dmac(struct ib_device *device, - struct rdma_ah_attr *ah_attr) +static int ib_resolve_eth_dmac(struct ib_device *device, + struct rdma_ah_attr *ah_attr) { int ret = 0; struct ib_global_route *grh; @@ -1317,7 +1320,6 @@ int ib_resolve_eth_dmac(struct ib_device *device, out: return ret; } -EXPORT_SYMBOL(ib_resolve_eth_dmac); /** * ib_modify_qp_with_udata - Modifies the attributes for the specified QP. diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 0d89621d9fe8..20e5eb9290ad 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -729,14 +729,6 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd, ah->qplib_ah.nw_type = CMDQ_CREATE_AH_TYPE_V1; break; } - rc = rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid, - ah_attr->roce.dmac, &vlan_tag, - &sgid_attr.ndev->ifindex, - NULL); - if (rc) { - dev_err(rdev_to_dev(rdev), "Failed to get dmac\n"); - goto fail; - } } memcpy(ah->qplib_ah.dmac, ah_attr->roce.dmac, ETH_ALEN); diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index d545302b8ef8..05bab4a39d91 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -48,7 +48,6 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct ib_gid_attr gid_attr; struct hns_roce_ah *ah; u16 vlan_tag = 0xffff; - struct in6_addr in6; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); union ib_gid sgid; int ret; @@ -58,18 +57,7 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, return ERR_PTR(-ENOMEM); /* Get mac address */ - memcpy(&in6, grh->dgid.raw, sizeof(grh->dgid.raw)); - if (rdma_is_multicast_addr(&in6)) { - rdma_get_mcast_mac(&in6, ah->av.mac); - } else { - u8 *dmac = rdma_ah_retrieve_dmac(ah_attr); - - if (!dmac) { - kfree(ah); - return ERR_PTR(-EINVAL); - } - memcpy(ah->av.mac, dmac, ETH_ALEN); - } + memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); /* Get source gid */ ret = ib_get_cached_gid(ibpd->device, rdma_ah_get_port_num(ah_attr), diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 538c46a73248..6dee4fdc5d67 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -92,12 +92,10 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, int ret; memcpy(&in6, grh->dgid.raw, sizeof(in6)); - if (rdma_is_multicast_addr(&in6)) { + if (rdma_is_multicast_addr(&in6)) is_mcast = 1; - rdma_get_mcast_mac(&in6, ah->av.eth.mac); - } else { - memcpy(ah->av.eth.mac, ah_attr->roce.dmac, ETH_ALEN); - } + + memcpy(ah->av.eth.mac, ah_attr->roce.dmac, ETH_ALEN); ret = ib_get_cached_gid(pd->device, rdma_ah_get_port_num(ah_attr), grh->sgid_index, &sgid, &gid_attr); if (ret) diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index 3363e29157f6..fe269f680103 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -89,10 +89,6 @@ struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, resp.response_length = min_resp_len; - err = ib_resolve_eth_dmac(pd->device, ah_attr); - if (err) - return ERR_PTR(err); - memcpy(resp.dmac, ah_attr->roce.dmac, ETH_ALEN); err = ib_copy_to_udata(udata, &resp, resp.response_length); if (err) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index d0249e463338..dec650930ca6 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -201,21 +201,6 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, /* Get network header type for this GID */ ah->hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid); - if ((pd->uctx) && - (!rdma_is_multicast_addr((struct in6_addr *)grh->dgid.raw)) && - (!rdma_link_local_addr((struct in6_addr *)grh->dgid.raw))) { - status = rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid, - attr->roce.dmac, - &vlan_tag, - &sgid_attr.ndev->ifindex, - NULL); - if (status) { - pr_err("%s(): Failed to resolve dmac from gid." - "status = %d\n", __func__, status); - goto av_conf_err; - } - } - status = set_av_attr(dev, ah, attr, &sgid, pd->id, &isvlan, vlan_tag); if (status) goto av_conf_err; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 09c4a695155e..9810e4568635 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3622,8 +3622,6 @@ void ib_drain_rq(struct ib_qp *qp); void ib_drain_sq(struct ib_qp *qp); void ib_drain_qp(struct ib_qp *qp); -int ib_resolve_eth_dmac(struct ib_device *device, - struct rdma_ah_attr *ah_attr); int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width); static inline u8 *rdma_ah_retrieve_dmac(struct rdma_ah_attr *attr) From 79c4d80b43b8e43684894574a508a871f0c196bf Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 16 Oct 2017 08:45:14 +0300 Subject: [PATCH 184/296] IB/core: Fix unable to change lifespan entry for hw_counters This patch fixes the case where 'lifespan' entry of the hw_counters is not writable. Currently write callback is not exposed for for the hw_counters sysfs operation. Due to this, modifying lifespan value results into permission denied error in below example. echo 10 > /sys/class/infiniband/mlx5_0/ports/1/hw_counters/lifespan -bash: /sys/class/infiniband/mlx5_0/ports/1/hw_counters/lifespan: Permission denied This patch adds the hook to modify any attribute which implements store() operation. Fixes: b40f4757daa1 ("IB/core: Make device counter infrastructure dynamic") Signed-off-by: Parav Pandit Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/sysfs.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index abc5ab581f82..e30d86fa1855 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -108,8 +108,22 @@ static ssize_t port_attr_show(struct kobject *kobj, return port_attr->show(p, port_attr, buf); } +static ssize_t port_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t count) +{ + struct port_attribute *port_attr = + container_of(attr, struct port_attribute, attr); + struct ib_port *p = container_of(kobj, struct ib_port, kobj); + + if (!port_attr->store) + return -EIO; + return port_attr->store(p, port_attr, buf, count); +} + static const struct sysfs_ops port_sysfs_ops = { - .show = port_attr_show + .show = port_attr_show, + .store = port_attr_store }; static ssize_t gid_attr_show(struct kobject *kobj, From 39baf10310e6669564a485b55267fae70a4e44ae Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 16 Oct 2017 08:45:15 +0300 Subject: [PATCH 185/296] IB/core: Fix use workqueue without WQ_MEM_RECLAIM The IB/core provides address resolution service and invokes callback handler when address resolve request completes of requester in worker thread context. Such caller might allocate or free memory in callback handler depending on the completion status to make further progress or to terminate a connection. Most ULPs resolve route which involves allocating route entry and path record elements in callback event handler. It has been noticed that WQ_MEM_RECLAIM flag should not be used for workers that tend to allocate memory in this [1] thread discussion. In order to mitigate this situation, WQ_MEM_RECLAIM flag was dropped for other such WQs in this [2] patch. Similar problem might arise with address resolution path, though its not yet noticed. The ib_addr workqueue is not memory reclaim path due to its nature of invoking callback that might allocate memory or don't free any memory under memory pressure. [1] https://www.spinics.net/lists/linux-rdma/msg53239.html [2] https://www.spinics.net/lists/linux-rdma/msg53416.html Fixes: f54816261c2b ("IB/addr: Remove deprecated create_singlethread_workqueue") Fixes: 5fff41e1f89d ("IB/core: Fix race condition in resolving IP to MAC") Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/addr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 12523f630b61..d2f74721b3ba 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -852,7 +852,7 @@ static struct notifier_block nb = { int addr_init(void) { - addr_wq = alloc_ordered_workqueue("ib_addr", WQ_MEM_RECLAIM); + addr_wq = alloc_ordered_workqueue("ib_addr", 0); if (!addr_wq) return -ENOMEM; From 99260132fde7bddc6e0132ce53da94d1c9ccabcb Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 16 Oct 2017 08:45:16 +0300 Subject: [PATCH 186/296] IB/core: Fix calculation of maximum RoCE MTU The original code only took into consideration the largest header possible after the IB_BTH_BYTES. This was incorrect, as the largest possible header size is the largest possible combination of headers we might run into. The new code accounts for all possible headers in the largest possible combination and subtracts that from the MTU to make sure that all packets will fit on the wire. Link: https://www.spinics.net/lists/linux-rdma/msg54558.html Fixes: 3c86aa70bf67 ("RDMA/cm: Add RDMA CM support for IBoE devices") Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Reported-by: Roland Dreier Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_addr.h | 7 ++++--- include/rdma/ib_pack.h | 19 +++++++++++-------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index ec5008cf5d51..8815989301ab 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -245,10 +245,11 @@ static inline void rdma_addr_set_dgid(struct rdma_dev_addr *dev_addr, union ib_g static inline enum ib_mtu iboe_get_mtu(int mtu) { /* - * reduce IB headers from effective IBoE MTU. 28 stands for - * atomic header which is the biggest possible header after BTH + * Reduce IB headers from effective IBoE MTU. */ - mtu = mtu - IB_GRH_BYTES - IB_BTH_BYTES - 28; + mtu = mtu - (IB_GRH_BYTES + IB_UDP_BYTES + IB_BTH_BYTES + + IB_EXT_XRC_BYTES + IB_EXT_ATOMICETH_BYTES + + IB_ICRC_BYTES); if (mtu >= ib_mtu_enum_to_int(IB_MTU_4096)) return IB_MTU_4096; diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h index 36655899ee02..7ea1382ad0e5 100644 --- a/include/rdma/ib_pack.h +++ b/include/rdma/ib_pack.h @@ -37,14 +37,17 @@ #include enum { - IB_LRH_BYTES = 8, - IB_ETH_BYTES = 14, - IB_VLAN_BYTES = 4, - IB_GRH_BYTES = 40, - IB_IP4_BYTES = 20, - IB_UDP_BYTES = 8, - IB_BTH_BYTES = 12, - IB_DETH_BYTES = 8 + IB_LRH_BYTES = 8, + IB_ETH_BYTES = 14, + IB_VLAN_BYTES = 4, + IB_GRH_BYTES = 40, + IB_IP4_BYTES = 20, + IB_UDP_BYTES = 8, + IB_BTH_BYTES = 12, + IB_DETH_BYTES = 8, + IB_EXT_ATOMICETH_BYTES = 28, + IB_EXT_XRC_BYTES = 4, + IB_ICRC_BYTES = 4 }; struct ib_field { From e980b44134c89afb65176e70aaf293d608002e83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Lefaure?= Date: Mon, 16 Oct 2017 08:45:17 +0300 Subject: [PATCH 187/296] IB/mlx5: Use ARRAY_SIZE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using the ARRAY_SIZE macro improves the readability of the code. Found with Coccinelle with the following semantic patch: @r depends on (org || report)@ type T; T[] E; position p; @@ ( (sizeof(E)@p /sizeof(*E)) | (sizeof(E)@p /sizeof(E[...])) | (sizeof(E)@p /sizeof(T)) ) Signed-off-by: Jérémy Lefaure Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/odp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 3d701c7a4c91..e2197bdda89c 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -32,6 +32,7 @@ #include #include +#include #include "mlx5_ib.h" #include "cmd.h" @@ -929,9 +930,8 @@ static int mlx5_ib_mr_initiator_pfault_handler( return -EFAULT; } - if (unlikely(opcode >= sizeof(mlx5_ib_odp_opcode_cap) / - sizeof(mlx5_ib_odp_opcode_cap[0]) || - !(transport_caps & mlx5_ib_odp_opcode_cap[opcode]))) { + if (unlikely(opcode >= ARRAY_SIZE(mlx5_ib_odp_opcode_cap) || + !(transport_caps & mlx5_ib_odp_opcode_cap[opcode]))) { mlx5_ib_err(dev, "ODP fault on QP of an unsupported opcode 0x%x\n", opcode); return -EFAULT; From 2d7099fc3daa2e898424c2cb1bad4484d6c7e116 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Mon, 16 Oct 2017 15:45:54 -0500 Subject: [PATCH 188/296] i40iw: Cleanup AE processing Remove unimplemented Asynchronous Events (AE) and correct names. Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_ctrl.c | 5 ----- drivers/infiniband/hw/i40iw/i40iw_d.h | 10 ++-------- drivers/infiniband/hw/i40iw/i40iw_hw.c | 2 +- 3 files changed, 3 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index 1e9f18c86e2b..c46b5d10e306 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -4376,10 +4376,6 @@ static int i40iw_bld_terminate_hdr(struct i40iw_sc_qp *qp, i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR, (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_TO_WRAP); break; - case I40IW_AE_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH: - i40iw_setup_termhdr(qp, termhdr, FLUSH_LOC_LEN_ERR, - (LAYER_MPA << 4) | DDP_LLP, MPA_MARKER); - break; case I40IW_AE_LLP_RECEIVED_MPA_CRC_ERROR: i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR, (LAYER_MPA << 4) | DDP_LLP, MPA_CRC); @@ -4395,7 +4391,6 @@ static int i40iw_bld_terminate_hdr(struct i40iw_sc_qp *qp, (LAYER_DDP << 4) | DDP_CATASTROPHIC, DDP_CATASTROPHIC_LOCAL); break; case I40IW_AE_DDP_INVALID_MSN_GAP_IN_MSN: - case I40IW_AE_DDP_INVALID_MSN_RANGE_IS_NOT_VALID: i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR, (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_MSN_RANGE); break; diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h index 2ebaadbed379..dc319e987cde 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_d.h +++ b/drivers/infiniband/hw/i40iw/i40iw_d.h @@ -1636,7 +1636,8 @@ enum i40iw_alignment { #define I40IW_AE_AMP_MWBIND_INVALID_BOUNDS 0x0119 #define I40IW_AE_AMP_MWBIND_TO_INVALID_PARENT 0x011a #define I40IW_AE_AMP_MWBIND_BIND_DISABLED 0x011b -#define I40IW_AE_AMP_WQE_INVALID_PARAMETER 0x0130 +#define I40IW_AE_UDA_XMIT_DGRAM_TOO_LONG 0x0132 +#define I40IW_AE_UDA_XMIT_DGRAM_TOO_SHORT 0x0134 #define I40IW_AE_BAD_CLOSE 0x0201 #define I40IW_AE_RDMAP_ROE_BAD_LLP_CLOSE 0x0202 #define I40IW_AE_CQ_OPERATION_ERROR 0x0203 @@ -1644,12 +1645,10 @@ enum i40iw_alignment { #define I40IW_AE_RDMA_READ_WHILE_ORD_ZERO 0x0205 #define I40IW_AE_STAG_ZERO_INVALID 0x0206 #define I40IW_AE_IB_RREQ_AND_Q1_FULL 0x0207 -#define I40IW_AE_SRQ_LIMIT 0x0209 #define I40IW_AE_WQE_UNEXPECTED_OPCODE 0x020a #define I40IW_AE_WQE_INVALID_PARAMETER 0x020b #define I40IW_AE_WQE_LSMM_TOO_LONG 0x0220 #define I40IW_AE_DDP_INVALID_MSN_GAP_IN_MSN 0x0301 -#define I40IW_AE_DDP_INVALID_MSN_RANGE_IS_NOT_VALID 0x0302 #define I40IW_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER 0x0303 #define I40IW_AE_DDP_UBE_INVALID_DDP_VERSION 0x0304 #define I40IW_AE_DDP_UBE_INVALID_MO 0x0305 @@ -1663,12 +1662,10 @@ enum i40iw_alignment { #define I40IW_AE_INVALID_ARP_ENTRY 0x0401 #define I40IW_AE_INVALID_TCP_OPTION_RCVD 0x0402 #define I40IW_AE_STALE_ARP_ENTRY 0x0403 -#define I40IW_AE_INVALID_WQE_LENGTH 0x0404 #define I40IW_AE_INVALID_MAC_ENTRY 0x0405 #define I40IW_AE_LLP_CLOSE_COMPLETE 0x0501 #define I40IW_AE_LLP_CONNECTION_RESET 0x0502 #define I40IW_AE_LLP_FIN_RECEIVED 0x0503 -#define I40IW_AE_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH 0x0504 #define I40IW_AE_LLP_RECEIVED_MPA_CRC_ERROR 0x0505 #define I40IW_AE_LLP_SEGMENT_TOO_LARGE 0x0506 #define I40IW_AE_LLP_SEGMENT_TOO_SMALL 0x0507 @@ -1685,9 +1682,6 @@ enum i40iw_alignment { #define I40IW_AE_LCE_QP_CATASTROPHIC 0x0700 #define I40IW_AE_LCE_FUNCTION_CATASTROPHIC 0x0701 #define I40IW_AE_LCE_CQ_CATASTROPHIC 0x0702 -#define I40IW_AE_UDA_XMIT_FRAG_SEQ 0x0800 -#define I40IW_AE_UDA_XMIT_DGRAM_TOO_LONG 0x0801 -#define I40IW_AE_UDA_XMIT_IPADDR_MISMATCH 0x0802 #define I40IW_AE_QP_SUSPEND_COMPLETE 0x0900 #define OP_DELETE_LOCAL_MAC_IPADDR_ENTRY 1 diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c index 7e9de845a350..e96bdafbcbb3 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_hw.c +++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c @@ -408,7 +408,7 @@ void i40iw_process_aeq(struct i40iw_device *iwdev) case I40IW_AE_LCE_FUNCTION_CATASTROPHIC: case I40IW_AE_LCE_CQ_CATASTROPHIC: case I40IW_AE_UDA_XMIT_DGRAM_TOO_LONG: - case I40IW_AE_UDA_XMIT_IPADDR_MISMATCH: + case I40IW_AE_UDA_XMIT_DGRAM_TOO_SHORT: ctx_info->err_rq_idx_valid = false; /* fall through */ default: From 4236f4b99ee0dcefea909ab2a179dc6a0abcb8d6 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Mon, 16 Oct 2017 15:45:55 -0500 Subject: [PATCH 189/296] i40iw: Ignore AE source field in AEQE for some AEs The AE source field in Asynchronous Event Queue Entry (AEQE) is not set by the hardware for some AEs, but the code assumes it does. This results in incorrect processing of some AEs. Fix this by setting ae_src to I40IW_AE_SOURCE_RSVD for those AEs. Fixes: 86dbcd0f12e9 ("i40iw: add file to handle cqp calls") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_ctrl.c | 49 ++++++++++++++++++++++++ drivers/infiniband/hw/i40iw/i40iw_d.h | 1 + 2 files changed, 50 insertions(+) diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index c46b5d10e306..04284e819011 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -1774,6 +1774,53 @@ static enum i40iw_status_code i40iw_sc_get_next_aeqe(struct i40iw_sc_aeq *aeq, info->iwarp_state = (u8)RS_64(temp, I40IW_AEQE_IWSTATE); info->q2_data_written = (u8)RS_64(temp, I40IW_AEQE_Q2DATA); info->aeqe_overflow = (bool)RS_64(temp, I40IW_AEQE_OVERFLOW); + + switch (info->ae_id) { + case I40IW_AE_PRIV_OPERATION_DENIED: + case I40IW_AE_UDA_XMIT_DGRAM_TOO_LONG: + case I40IW_AE_UDA_XMIT_DGRAM_TOO_SHORT: + case I40IW_AE_BAD_CLOSE: + case I40IW_AE_RDMAP_ROE_BAD_LLP_CLOSE: + case I40IW_AE_RDMA_READ_WHILE_ORD_ZERO: + case I40IW_AE_STAG_ZERO_INVALID: + case I40IW_AE_IB_RREQ_AND_Q1_FULL: + case I40IW_AE_WQE_UNEXPECTED_OPCODE: + case I40IW_AE_DDP_UBE_INVALID_DDP_VERSION: + case I40IW_AE_DDP_UBE_INVALID_MO: + case I40IW_AE_DDP_UBE_INVALID_QN: + case I40IW_AE_DDP_NO_L_BIT: + case I40IW_AE_RDMAP_ROE_INVALID_RDMAP_VERSION: + case I40IW_AE_RDMAP_ROE_UNEXPECTED_OPCODE: + case I40IW_AE_ROE_INVALID_RDMA_READ_REQUEST: + case I40IW_AE_ROE_INVALID_RDMA_WRITE_OR_READ_RESP: + case I40IW_AE_INVALID_ARP_ENTRY: + case I40IW_AE_INVALID_TCP_OPTION_RCVD: + case I40IW_AE_STALE_ARP_ENTRY: + case I40IW_AE_LLP_CLOSE_COMPLETE: + case I40IW_AE_LLP_CONNECTION_RESET: + case I40IW_AE_LLP_FIN_RECEIVED: + case I40IW_AE_LLP_RECEIVED_MPA_CRC_ERROR: + case I40IW_AE_LLP_SEGMENT_TOO_SMALL: + case I40IW_AE_LLP_SYN_RECEIVED: + case I40IW_AE_LLP_TERMINATE_RECEIVED: + case I40IW_AE_LLP_TOO_MANY_RETRIES: + case I40IW_AE_LLP_DOUBT_REACHABILITY: + case I40IW_AE_RESET_SENT: + case I40IW_AE_TERMINATE_SENT: + case I40IW_AE_RESET_NOT_SENT: + case I40IW_AE_LCE_QP_CATASTROPHIC: + case I40IW_AE_QP_SUSPEND_COMPLETE: + info->qp = true; + info->compl_ctx = compl_ctx; + ae_src = I40IW_AE_SOURCE_RSVD; + break; + case I40IW_AE_LCE_CQ_CATASTROPHIC: + info->cq = true; + info->compl_ctx = LS_64_1(compl_ctx, 1); + ae_src = I40IW_AE_SOURCE_RSVD; + break; + } + switch (ae_src) { case I40IW_AE_SOURCE_RQ: case I40IW_AE_SOURCE_RQ_0011: @@ -1807,6 +1854,8 @@ static enum i40iw_status_code i40iw_sc_get_next_aeqe(struct i40iw_sc_aeq *aeq, info->compl_ctx = compl_ctx; info->out_rdrsp = true; break; + case I40IW_AE_SOURCE_RSVD: + /* fallthrough */ default: break; } diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h index dc319e987cde..60c785a68b5e 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_d.h +++ b/drivers/infiniband/hw/i40iw/i40iw_d.h @@ -128,6 +128,7 @@ &_ceq->ceqe_base[I40IW_RING_GETCURRENT_TAIL(_ceq->ceq_ring)] \ ) +#define I40IW_AE_SOURCE_RSVD 0x0 #define I40IW_AE_SOURCE_RQ 0x1 #define I40IW_AE_SOURCE_RQ_0011 0x3 From de9f063468d00fe961723f72ac7d60f51f528d98 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Mon, 16 Oct 2017 15:45:56 -0500 Subject: [PATCH 190/296] i40iw: Remove unused static_rsrc from i40iw_create_qp_info The field static_rsrc in i40iw_create_qp_info is unused and not needed. Remove it. Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_ctrl.c | 2 -- drivers/infiniband/hw/i40iw/i40iw_d.h | 3 --- drivers/infiniband/hw/i40iw/i40iw_type.h | 2 -- 3 files changed, 7 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index 04284e819011..a4bd968f8bc1 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -2448,7 +2448,6 @@ static enum i40iw_status_code i40iw_sc_qp_create( LS_64(qp->qp_type, I40IW_CQPSQ_QP_QPTYPE) | LS_64(qp->virtual_map, I40IW_CQPSQ_QP_VQ) | LS_64(info->cq_num_valid, I40IW_CQPSQ_QP_CQNUMVALID) | - LS_64(info->static_rsrc, I40IW_CQPSQ_QP_STATRSRC) | LS_64(info->arp_cache_idx_valid, I40IW_CQPSQ_QP_ARPTABIDXVALID) | LS_64(info->next_iwarp_state, I40IW_CQPSQ_QP_NEXTIWSTATE) | LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID); @@ -2511,7 +2510,6 @@ static enum i40iw_status_code i40iw_sc_qp_modify( LS_64(info->cq_num_valid, I40IW_CQPSQ_QP_CQNUMVALID) | LS_64(info->force_loopback, I40IW_CQPSQ_QP_FORCELOOPBACK) | LS_64(qp->qp_type, I40IW_CQPSQ_QP_QPTYPE) | - LS_64(info->static_rsrc, I40IW_CQPSQ_QP_STATRSRC) | LS_64(info->remove_hash_idx, I40IW_CQPSQ_QP_REMOVEHASHENTRY) | LS_64(term_actions, I40IW_CQPSQ_QP_TERMACT) | LS_64(info->reset_tcp_conn, I40IW_CQPSQ_QP_RESETCON) | diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h index 60c785a68b5e..40d474384a4e 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_d.h +++ b/drivers/infiniband/hw/i40iw/i40iw_d.h @@ -540,9 +540,6 @@ #define I40IW_CQPSQ_QP_MSSCHANGE_SHIFT 52 #define I40IW_CQPSQ_QP_MSSCHANGE_MASK (1ULL << I40IW_CQPSQ_QP_MSSCHANGE_SHIFT) -#define I40IW_CQPSQ_QP_STATRSRC_SHIFT 53 -#define I40IW_CQPSQ_QP_STATRSRC_MASK (1ULL << I40IW_CQPSQ_QP_STATRSRC_SHIFT) - #define I40IW_CQPSQ_QP_IGNOREMWBOUND_SHIFT 54 #define I40IW_CQPSQ_QP_IGNOREMWBOUND_MASK \ (1ULL << I40IW_CQPSQ_QP_IGNOREMWBOUND_SHIFT) diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h index 7798e8e2f9bd..608164250c65 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_type.h +++ b/drivers/infiniband/hw/i40iw/i40iw_type.h @@ -535,7 +535,6 @@ struct i40iw_create_qp_info { bool ord_valid; bool tcp_ctx_valid; bool cq_num_valid; - bool static_rsrc; bool arp_cache_idx_valid; }; @@ -547,7 +546,6 @@ struct i40iw_modify_qp_info { bool ord_valid; bool tcp_ctx_valid; bool cq_num_valid; - bool static_rsrc; bool arp_cache_idx_valid; bool reset_tcp_conn; bool remove_hash_idx; From 66f49f88aef8388da03753ce9c60758bacd730bf Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Mon, 16 Oct 2017 15:45:57 -0500 Subject: [PATCH 191/296] i40iw: Move exception_lan_queue to VSI structure Consolidate exception_lan_queue under VSI structure where it belongs. Remove it from device and QP structures. Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_ctrl.c | 6 +++--- drivers/infiniband/hw/i40iw/i40iw_main.c | 4 ++-- drivers/infiniband/hw/i40iw/i40iw_type.h | 5 ++--- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index a4bd968f8bc1..a3ea42888576 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -2406,7 +2406,6 @@ static enum i40iw_status_code i40iw_sc_qp_init(struct i40iw_sc_qp *qp, qp->rcv_tph_en = info->rcv_tph_en; qp->xmit_tph_en = info->xmit_tph_en; qp->qs_handle = qp->vsi->qos[qp->user_pri].qs_handle; - qp->exception_lan_queue = qp->pd->dev->exception_lan_queue; return 0; } @@ -2741,7 +2740,7 @@ static enum i40iw_status_code i40iw_sc_qp_setctx( LS_64(qp->sq_tph_val, I40IWQPC_SQTPHVAL) | LS_64(qp->rq_tph_val, I40IWQPC_RQTPHVAL) | LS_64(qp->qs_handle, I40IWQPC_QSHANDLE) | - LS_64(qp->exception_lan_queue, I40IWQPC_EXCEPTION_LAN_QUEUE)); + LS_64(vsi->exception_lan_queue, I40IWQPC_EXCEPTION_LAN_QUEUE)); if (info->iwarp_info_valid) { qw0 |= LS_64(iw->ddp_ver, I40IWQPC_DDP_VER) | @@ -4584,6 +4583,8 @@ void i40iw_sc_vsi_init(struct i40iw_sc_vsi *vsi, struct i40iw_vsi_init_info *inf vsi->dev = info->dev; vsi->back_vsi = info->back_vsi; vsi->mss = info->params->mss; + vsi->exception_lan_queue = info->exception_lan_queue; + i40iw_fill_qos_list(info->params->qs_handle_list); for (i = 0; i < I40IW_MAX_USER_PRIORITY; i++) { @@ -5068,7 +5069,6 @@ enum i40iw_status_code i40iw_device_init(struct i40iw_sc_dev *dev, dev->debug_mask = info->debug_mask; dev->hmc_fn_id = info->hmc_fn_id; - dev->exception_lan_queue = info->exception_lan_queue; dev->is_pf = info->is_pf; dev->fpm_query_buf_pa = info->fpm_query_buf_pa; diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 27590ae21881..ee0323fe765d 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -958,7 +958,7 @@ static enum i40iw_status_code i40iw_initialize_ieq(struct i40iw_device *iwdev) memset(&info, 0, sizeof(info)); info.type = I40IW_PUDA_RSRC_TYPE_IEQ; info.cq_id = 2; - info.qp_id = iwdev->sc_dev.exception_lan_queue; + info.qp_id = iwdev->vsi.exception_lan_queue; info.count = 1; info.pd_id = 2; info.sq_size = 8192; @@ -1338,7 +1338,6 @@ static enum i40iw_status_code i40iw_initialize_dev(struct i40iw_device *iwdev, iwdev->dcb = true; } i40iw_pr_info("DCB is set/clear = %d\n", iwdev->dcb); - info.exception_lan_queue = 1; info.vchnl_send = i40iw_virtchnl_send; status = i40iw_device_init(&iwdev->sc_dev, &info); @@ -1348,6 +1347,7 @@ static enum i40iw_status_code i40iw_initialize_dev(struct i40iw_device *iwdev, vsi_info.dev = &iwdev->sc_dev; vsi_info.back_vsi = (void *)iwdev; vsi_info.params = &l2params; + vsi_info.exception_lan_queue = 1; i40iw_sc_vsi_init(&iwdev->vsi, &vsi_info); if (dev->is_pf) { diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h index 608164250c65..e8d153b963ec 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_type.h +++ b/drivers/infiniband/hw/i40iw/i40iw_type.h @@ -381,7 +381,6 @@ struct i40iw_sc_qp { u8 *q2_buf; u64 qp_compl_ctx; u16 qs_handle; - u16 exception_lan_queue; u16 push_idx; u8 sq_tph_val; u8 rq_tph_val; @@ -460,6 +459,7 @@ struct i40iw_sc_vsi { u32 ieq_count; struct i40iw_virt_mem ieq_mem; struct i40iw_puda_rsrc *ieq; + u16 exception_lan_queue; u16 mss; u8 fcn_id; bool stats_fcn_id_alloc; @@ -502,7 +502,6 @@ struct i40iw_sc_dev { struct i40iw_hmc_fpm_misc hmc_fpm_misc; u32 debug_mask; - u16 exception_lan_queue; u8 hmc_fn_id; bool is_pf; bool vchnl_up; @@ -574,6 +573,7 @@ struct i40iw_vsi_init_info { struct i40iw_sc_dev *dev; void *back_vsi; struct i40iw_l2params *params; + u16 exception_lan_queue; }; struct i40iw_vsi_stats_info { @@ -591,7 +591,6 @@ struct i40iw_device_init_info { struct i40iw_hw *hw; void __iomem *bar0; enum i40iw_status_code (*vchnl_send)(struct i40iw_sc_dev *, u32, u8 *, u16); - u16 exception_lan_queue; u8 hmc_fn_id; bool is_pf; u32 debug_mask; From b0d4f703693aabf727924d1568cc36e1b7484c66 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Mon, 16 Oct 2017 15:45:58 -0500 Subject: [PATCH 192/296] i40iw: Remove unused structures Some structures for post SQ operation are not used. Remove the following: i40iw_post_send_w_inv i40iw_post_inline_send_w_inv send_w_sol send_w_inv send_w_sol_inv inline_send_w_sol inline_send_w_inv inline_send_w_sol_inv Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_user.h | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_user.h b/drivers/infiniband/hw/i40iw/i40iw_user.h index 15e8f55983fd..a898f9923a6f 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_user.h +++ b/drivers/infiniband/hw/i40iw/i40iw_user.h @@ -204,18 +204,6 @@ struct i40iw_post_inline_send { u32 len; }; -struct i40iw_post_send_w_inv { - i40iw_sgl sg_list; - u32 num_sges; - i40iw_stag remote_stag_to_inv; -}; - -struct i40iw_post_inline_send_w_inv { - void *data; - u32 len; - i40iw_stag remote_stag_to_inv; -}; - struct i40iw_rdma_write { i40iw_sgl lo_sg_list; u32 num_lo_sges; @@ -257,9 +245,6 @@ struct i40iw_post_sq_info { bool defer_flag; union { struct i40iw_post_send send; - struct i40iw_post_send send_w_sol; - struct i40iw_post_send_w_inv send_w_inv; - struct i40iw_post_send_w_inv send_w_sol_inv; struct i40iw_rdma_write rdma_write; struct i40iw_rdma_read rdma_read; struct i40iw_rdma_read rdma_read_inv; @@ -267,9 +252,6 @@ struct i40iw_post_sq_info { struct i40iw_inv_local_stag inv_local_stag; struct i40iw_inline_rdma_write inline_rdma_write; struct i40iw_post_inline_send inline_send; - struct i40iw_post_inline_send inline_send_w_sol; - struct i40iw_post_inline_send_w_inv inline_send_w_inv; - struct i40iw_post_inline_send_w_inv inline_send_w_sol_inv; } op; }; From 343d86bd45d2d5770acc8cab2e4c4b4f6595d1f6 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Mon, 16 Oct 2017 15:45:59 -0500 Subject: [PATCH 193/296] i40iw: Account for IPv6 header when setting MSS The IPv6 header size is not subtracted from MTU when MSS is set for QPs. Save MTU opposed to MSS in the vsi struct during initialization and calculate the MSS based on IPv4 vs IPv6 connection. Fixes: f27b4746f378 ("i40iw: add connection management code") Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw.h | 3 --- drivers/infiniband/hw/i40iw/i40iw_cm.c | 3 ++- drivers/infiniband/hw/i40iw/i40iw_ctrl.c | 5 ++--- drivers/infiniband/hw/i40iw/i40iw_d.h | 4 ++++ drivers/infiniband/hw/i40iw/i40iw_main.c | 6 +++--- drivers/infiniband/hw/i40iw/i40iw_puda.c | 3 ++- drivers/infiniband/hw/i40iw/i40iw_type.h | 4 ++-- 7 files changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h index a65e4cbdce2f..4ae9131b6350 100644 --- a/drivers/infiniband/hw/i40iw/i40iw.h +++ b/drivers/infiniband/hw/i40iw/i40iw.h @@ -119,9 +119,6 @@ #define I40IW_CQP_COMPL_SQ_WQE_FLUSHED 3 #define I40IW_CQP_COMPL_RQ_SQ_WQE_FLUSHED 4 -#define I40IW_MTU_TO_MSS 40 -#define I40IW_DEFAULT_MSS 1460 - struct i40iw_cqp_compl_info { u32 op_ret_val; u16 maj_err_code; diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 4bd54b367532..493d6ef3d2d5 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -2193,7 +2193,8 @@ static struct i40iw_cm_node *i40iw_make_cm_node( I40IW_CM_DEFAULT_RCV_WND_SCALED >> I40IW_CM_DEFAULT_RCV_WND_SCALE; ts = current_kernel_time(); cm_node->tcp_cntxt.loc_seq_num = ts.tv_nsec; - cm_node->tcp_cntxt.mss = iwdev->vsi.mss; + cm_node->tcp_cntxt.mss = (cm_node->ipv4) ? (iwdev->vsi.mtu - I40IW_MTU_TO_MSS_IPV4) : + (iwdev->vsi.mtu - I40IW_MTU_TO_MSS_IPV6); cm_node->iwdev = iwdev; cm_node->dev = &iwdev->sc_dev; diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index a3ea42888576..fbc0f95b0ebd 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -348,7 +348,7 @@ void i40iw_change_l2params(struct i40iw_sc_vsi *vsi, struct i40iw_l2params *l2pa u16 qs_handle; int i; - vsi->mss = l2params->mss; + vsi->mtu = l2params->mtu; i40iw_fill_qos_list(l2params->qs_handle_list); for (i = 0; i < I40IW_MAX_USER_PRIORITY; i++) { @@ -4582,9 +4582,8 @@ void i40iw_sc_vsi_init(struct i40iw_sc_vsi *vsi, struct i40iw_vsi_init_info *inf vsi->dev = info->dev; vsi->back_vsi = info->back_vsi; - vsi->mss = info->params->mss; + vsi->mtu = info->params->mtu; vsi->exception_lan_queue = info->exception_lan_queue; - i40iw_fill_qos_list(info->params->qs_handle_list); for (i = 0; i < I40IW_MAX_USER_PRIORITY; i++) { diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h index 40d474384a4e..17ed45b42df4 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_d.h +++ b/drivers/infiniband/hw/i40iw/i40iw_d.h @@ -73,6 +73,10 @@ #define I40IW_FIRST_NON_PF_STAT 4 +#define I40IW_MTU_TO_MSS_IPV4 40 +#define I40IW_MTU_TO_MSS_IPV6 60 +#define I40IW_DEFAULT_MTU 1500 + #define LS_64_1(val, bits) ((u64)(uintptr_t)val << bits) #define RS_64_1(val, bits) ((u64)(uintptr_t)val >> bits) #define LS_32_1(val, bits) (u32)(val << bits) diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index ee0323fe765d..438f5269548b 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -1327,8 +1327,8 @@ static enum i40iw_status_code i40iw_initialize_dev(struct i40iw_device *iwdev, info.bar0 = ldev->hw_addr; info.hw = &iwdev->hw; info.debug_mask = debug; - l2params.mss = - (ldev->params.mtu) ? ldev->params.mtu - I40IW_MTU_TO_MSS : I40IW_DEFAULT_MSS; + l2params.mtu = + (ldev->params.mtu) ? ldev->params.mtu : I40IW_DEFAULT_MTU; for (i = 0; i < I40E_CLIENT_MAX_USER_PRIORITY; i++) { qset = ldev->params.qos.prio_qos[i].qs_handle; l2params.qs_handle_list[i] = qset; @@ -1748,7 +1748,7 @@ static void i40iw_l2param_change(struct i40e_info *ldev, struct i40e_client *cli for (i = 0; i < I40E_CLIENT_MAX_USER_PRIORITY; i++) l2params->qs_handle_list[i] = params->qos.prio_qos[i].qs_handle; - l2params->mss = (params->mtu) ? params->mtu - I40IW_MTU_TO_MSS : iwdev->vsi.mss; + l2params->mtu = (params->mtu) ? params->mtu : iwdev->vsi.mtu; INIT_WORK(&work->work, i40iw_l2params_worker); queue_work(iwdev->param_wq, &work->work); diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c index dfbade0ad003..af6d0dc82b89 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_puda.c +++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c @@ -1401,7 +1401,8 @@ static void i40iw_ieq_handle_exception(struct i40iw_puda_rsrc *ieq, pfpdu->rcv_nxt = fps; pfpdu->fps = fps; pfpdu->mode = true; - pfpdu->max_fpdu_data = ieq->vsi->mss; + pfpdu->max_fpdu_data = (buf->ipv4) ? (ieq->vsi->mtu - I40IW_MTU_TO_MSS_IPV4) : + (ieq->vsi->mtu - I40IW_MTU_TO_MSS_IPV6); pfpdu->pmode_count++; INIT_LIST_HEAD(rxlist); i40iw_ieq_check_first_buf(buf, fps); diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h index e8d153b963ec..f443f2075d6f 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_type.h +++ b/drivers/infiniband/hw/i40iw/i40iw_type.h @@ -460,7 +460,7 @@ struct i40iw_sc_vsi { struct i40iw_virt_mem ieq_mem; struct i40iw_puda_rsrc *ieq; u16 exception_lan_queue; - u16 mss; + u16 mtu; u8 fcn_id; bool stats_fcn_id_alloc; struct i40iw_qos qos[I40IW_MAX_USER_PRIORITY]; @@ -566,7 +566,7 @@ struct i40iw_ccq_cqe_info { struct i40iw_l2params { u16 qs_handle_list[I40IW_MAX_USER_PRIORITY]; - u16 mss; + u16 mtu; }; struct i40iw_vsi_init_info { From 9381699eedd1b5dd5b490e993b870dff7571ddcf Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Mon, 16 Oct 2017 15:46:00 -0500 Subject: [PATCH 194/296] i40iw: Move ceq_valid to i40iw_sc_dev structure Completion Event Queues are created and destroyed on a per device basis as opposed to per User-space Direct Access resource. Move ceq_valid to the correct place in i40iw_sc_dev from i40iw_puda_rsrc. Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_main.c | 19 ++++++++++--------- drivers/infiniband/hw/i40iw/i40iw_puda.c | 9 ++++----- drivers/infiniband/hw/i40iw/i40iw_puda.h | 2 -- drivers/infiniband/hw/i40iw/i40iw_type.h | 1 + 4 files changed, 15 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 438f5269548b..dc6b338772e9 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -353,6 +353,8 @@ static void i40iw_dele_ceqs(struct i40iw_device *iwdev) i40iw_disable_irq(dev, msix_vec, (void *)iwceq); i40iw_destroy_ceq(iwdev, iwceq); } + + iwdev->sc_dev.ceq_valid = false; } /** @@ -810,17 +812,16 @@ static enum i40iw_status_code i40iw_setup_ceqs(struct i40iw_device *iwdev, i40iw_enable_intr(&iwdev->sc_dev, msix_vec->idx); iwdev->ceqs_count++; } - exit: - if (status) { - if (!iwdev->ceqs_count) { - kfree(iwdev->ceqlist); - iwdev->ceqlist = NULL; - } else { - status = 0; - } + if (status && !iwdev->ceqs_count) { + kfree(iwdev->ceqlist); + iwdev->ceqlist = NULL; + return status; + } else { + iwdev->sc_dev.ceq_valid = true; + return 0; } - return status; + } /** diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c index af6d0dc82b89..fee6643d6626 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_puda.c +++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c @@ -611,7 +611,7 @@ static enum i40iw_status_code i40iw_puda_qp_create(struct i40iw_puda_rsrc *rsrc) qp->user_pri = 0; i40iw_qp_add_qos(qp); i40iw_puda_qp_setctx(rsrc); - if (rsrc->ceq_valid) + if (rsrc->dev->ceq_valid) ret = i40iw_cqp_qp_create_cmd(rsrc->dev, qp); else ret = i40iw_puda_qp_wqe(rsrc->dev, qp); @@ -704,7 +704,7 @@ static enum i40iw_status_code i40iw_puda_cq_create(struct i40iw_puda_rsrc *rsrc) ret = dev->iw_priv_cq_ops->cq_init(cq, &info); if (ret) goto error; - if (rsrc->ceq_valid) + if (rsrc->dev->ceq_valid) ret = i40iw_cqp_cq_create_cmd(dev, cq); else ret = i40iw_puda_cq_wqe(dev, cq); @@ -724,7 +724,7 @@ static void i40iw_puda_free_qp(struct i40iw_puda_rsrc *rsrc) struct i40iw_ccq_cqe_info compl_info; struct i40iw_sc_dev *dev = rsrc->dev; - if (rsrc->ceq_valid) { + if (rsrc->dev->ceq_valid) { i40iw_cqp_qp_destroy_cmd(dev, &rsrc->qp); return; } @@ -757,7 +757,7 @@ static void i40iw_puda_free_cq(struct i40iw_puda_rsrc *rsrc) struct i40iw_ccq_cqe_info compl_info; struct i40iw_sc_dev *dev = rsrc->dev; - if (rsrc->ceq_valid) { + if (rsrc->dev->ceq_valid) { i40iw_cqp_cq_destroy_cmd(dev, &rsrc->cq); return; } @@ -922,7 +922,6 @@ enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_vsi *vsi, rsrc->xmit_complete = i40iw_ieq_tx_compl; } - rsrc->ceq_valid = info->ceq_valid; rsrc->type = info->type; rsrc->sq_wrtrk_array = (struct i40iw_sq_uk_wr_trk_info *)((u8 *)vmem->va + pudasize); rsrc->rq_wrid_array = (u64 *)((u8 *)vmem->va + pudasize + sqwridsize); diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.h b/drivers/infiniband/hw/i40iw/i40iw_puda.h index dba05ce7d392..660aa3edae56 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_puda.h +++ b/drivers/infiniband/hw/i40iw/i40iw_puda.h @@ -100,7 +100,6 @@ struct i40iw_puda_rsrc_info { enum puda_resource_type type; /* ILQ or IEQ */ u32 count; u16 pd_id; - bool ceq_valid; u32 cq_id; u32 qp_id; u32 sq_size; @@ -125,7 +124,6 @@ struct i40iw_puda_rsrc { enum puda_resource_type type; u16 buf_size; /*buffer must be max datalen + tcpip hdr + mac */ u16 mss; - bool ceq_valid; u32 cq_id; u32 qp_id; u32 sq_size; diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h index f443f2075d6f..a27d392c92a2 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_type.h +++ b/drivers/infiniband/hw/i40iw/i40iw_type.h @@ -505,6 +505,7 @@ struct i40iw_sc_dev { u8 hmc_fn_id; bool is_pf; bool vchnl_up; + bool ceq_valid; u8 vf_id; wait_queue_head_t vf_reqs; u64 cqp_cmd_stats[OP_SIZE_CQP_STAT_ARRAY]; From 5b4a1a8b87fda31c1a1d44aba7e6926167610a20 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Mon, 16 Oct 2017 15:46:01 -0500 Subject: [PATCH 195/296] i40iw: Reinitialize IEQ on MTU change On a netdev MTU change event, the iWARP Exception Queue (IEQ) buffers may not be sized properly to handle the new MTU. Reinitialize the IEQ with new MTU size on MTU change event. Also, add define for the max ethernet frame size field in IEQ QP context instead of the snd_mss define which is for iWARP QPs' MSS field. Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_ctrl.c | 5 ++++- drivers/infiniband/hw/i40iw/i40iw_d.h | 3 +++ drivers/infiniband/hw/i40iw/i40iw_main.c | 19 +++++++++++++++++-- drivers/infiniband/hw/i40iw/i40iw_p.h | 1 + drivers/infiniband/hw/i40iw/i40iw_puda.c | 2 +- 5 files changed, 26 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index fbc0f95b0ebd..3b6d12176413 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -348,7 +348,10 @@ void i40iw_change_l2params(struct i40iw_sc_vsi *vsi, struct i40iw_l2params *l2pa u16 qs_handle; int i; - vsi->mtu = l2params->mtu; + if (vsi->mtu != l2params->mtu) { + vsi->mtu = l2params->mtu; + i40iw_reinitialize_ieq(dev); + } i40iw_fill_qos_list(l2params->qs_handle_list); for (i = 0; i < I40IW_MAX_USER_PRIORITY; i++) { diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h index 17ed45b42df4..f1a6ce6c927c 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_d.h +++ b/drivers/infiniband/hw/i40iw/i40iw_d.h @@ -1107,6 +1107,9 @@ #define I40IWQPC_SNDMSS_SHIFT 16 #define I40IWQPC_SNDMSS_MASK (0x3fffUL << I40IWQPC_SNDMSS_SHIFT) +#define I40IW_UDA_QPC_MAXFRAMESIZE_SHIFT 16 +#define I40IW_UDA_QPC_MAXFRAMESIZE_MASK (0x3fffUL << I40IW_UDA_QPC_MAXFRAMESIZE_SHIFT) + #define I40IWQPC_VLANTAG_SHIFT 32 #define I40IWQPC_VLANTAG_MASK (0xffffULL << I40IWQPC_VLANTAG_SHIFT) diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index dc6b338772e9..e824296713e2 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -964,14 +964,29 @@ static enum i40iw_status_code i40iw_initialize_ieq(struct i40iw_device *iwdev) info.pd_id = 2; info.sq_size = 8192; info.rq_size = 8192; - info.buf_size = 2048; - info.tx_buf_cnt = 16384; + info.buf_size = iwdev->vsi.mtu + VLAN_ETH_HLEN; + info.tx_buf_cnt = 4096; status = i40iw_puda_create_rsrc(&iwdev->vsi, &info); if (status) i40iw_pr_err("ieq create fail\n"); return status; } +/** + * i40iw_reinitialize_ieq - destroy and re-create ieq + * @dev: iwarp device + */ +void i40iw_reinitialize_ieq(struct i40iw_sc_dev *dev) +{ + struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev; + + i40iw_puda_dele_resources(&iwdev->vsi, I40IW_PUDA_RSRC_TYPE_IEQ, false); + if (i40iw_initialize_ieq(iwdev)) { + iwdev->reset = true; + i40iw_request_reset(iwdev); + } +} + /** * i40iw_hmc_setup - create hmc objects for the device * @iwdev: iwarp device diff --git a/drivers/infiniband/hw/i40iw/i40iw_p.h b/drivers/infiniband/hw/i40iw/i40iw_p.h index 5498ad01c280..2ad5a8ebee7e 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_p.h +++ b/drivers/infiniband/hw/i40iw/i40iw_p.h @@ -123,5 +123,6 @@ enum i40iw_status_code i40iw_allocate_virt_mem(struct i40iw_hw *hw, enum i40iw_status_code i40iw_free_virt_mem(struct i40iw_hw *hw, struct i40iw_virt_mem *mem); u8 i40iw_get_encoded_wqe_size(u32 wqsize, bool cqpsq); +void i40iw_reinitialize_ieq(struct i40iw_sc_dev *dev); #endif diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c index fee6643d6626..19ced8bd64c0 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_puda.c +++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c @@ -488,7 +488,7 @@ static void i40iw_puda_qp_setctx(struct i40iw_puda_rsrc *rsrc) LS_64(qp->hw_rq_size, I40IWQPC_RQSIZE) | LS_64(qp->hw_sq_size, I40IWQPC_SQSIZE)); - set_64bit_val(qp_ctx, 48, LS_64(1514, I40IWQPC_SNDMSS)); + set_64bit_val(qp_ctx, 48, LS_64(rsrc->buf_size, I40IW_UDA_QPC_MAXFRAMESIZE)); set_64bit_val(qp_ctx, 56, 0); set_64bit_val(qp_ctx, 64, 1); From 1196923838cf1d2b7abcb9a1007eede09ad93acd Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Mon, 16 Oct 2017 15:46:02 -0500 Subject: [PATCH 196/296] i40iw: Refactor queue depth calculation Queue depth calculations use a mix of work requests and actual number of bytes. Consolidate all calculations using minimum WQE size to avoid confusion. Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_d.h | 9 +++- drivers/infiniband/hw/i40iw/i40iw_uk.c | 63 ++++++++++++++++++----- drivers/infiniband/hw/i40iw/i40iw_user.h | 4 +- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 34 +++--------- 4 files changed, 67 insertions(+), 43 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h index f1a6ce6c927c..65ec39e3746b 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_d.h +++ b/drivers/infiniband/hw/i40iw/i40iw_d.h @@ -1301,8 +1301,13 @@ (0xffffffffULL << I40IWQPC_LOCAL_IPADDR0_SHIFT) /* wqe size considering 32 bytes per wqe*/ -#define I40IWQP_SW_MIN_WQSIZE 4 /* 128 bytes */ -#define I40IWQP_SW_MAX_WQSIZE 2048 /* 2048 bytes */ +#define I40IW_QP_SW_MIN_WQSIZE 4 /*in WRs*/ +#define I40IW_SQ_RSVD 2 +#define I40IW_RQ_RSVD 1 +#define I40IW_MAX_QUANTAS_PER_WR 2 +#define I40IW_QP_SW_MAX_SQ_QUANTAS 2048 +#define I40IW_QP_SW_MAX_RQ_QUANTAS 16384 +#define I40IW_MAX_QP_WRS ((I40IW_QP_SW_MAX_SQ_QUANTAS / I40IW_MAX_QUANTAS_PER_WR) - 1) #define I40IWQP_OP_RDMA_WRITE 0 #define I40IWQP_OP_RDMA_READ 1 diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c index 42dde5924bb5..3ec5389a81a1 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_uk.c +++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c @@ -893,9 +893,22 @@ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq, return ret_code; } +/** + * i40iw_qp_roundup - return round up QP WQ depth + * @wqdepth: WQ depth in quantas to round up + */ +static int i40iw_qp_round_up(u32 wqdepth) +{ + int scount = 1; + + for (wqdepth--; scount <= 16; scount *= 2) + wqdepth |= wqdepth >> scount; + + return ++wqdepth; +} + /** * i40iw_get_wqe_shift - get shift count for maximum wqe size - * @wqdepth: depth of wq required. * @sge: Maximum Scatter Gather Elements wqe * @inline_data: Maximum inline data size * @shift: Returns the shift needed based on sge @@ -905,22 +918,48 @@ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq, * For 2 or 3 SGEs or inline data <= 48, shift = 1 (wqe size of 64 bytes). * Shift of 2 otherwise (wqe size of 128 bytes). */ -enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u32 sge, u32 inline_data, u8 *shift) +void i40iw_get_wqe_shift(u32 sge, u32 inline_data, u8 *shift) { - u32 size; - *shift = 0; if (sge > 1 || inline_data > 16) *shift = (sge < 4 && inline_data <= 48) ? 1 : 2; +} - /* check if wqdepth is multiple of 2 or not */ +/* + * i40iw_get_sqdepth - get SQ depth (quantas) + * @sq_size: SQ size + * @shift: shift which determines size of WQE + * @sqdepth: depth of SQ + * + */ +enum i40iw_status_code i40iw_get_sqdepth(u32 sq_size, u8 shift, u32 *sqdepth) +{ + *sqdepth = i40iw_qp_round_up((sq_size << shift) + I40IW_SQ_RSVD); - if ((wqdepth < I40IWQP_SW_MIN_WQSIZE) || (wqdepth & (wqdepth - 1))) + if (*sqdepth < (I40IW_QP_SW_MIN_WQSIZE << shift)) + *sqdepth = I40IW_QP_SW_MIN_WQSIZE << shift; + else if (*sqdepth > I40IW_QP_SW_MAX_SQ_QUANTAS) return I40IW_ERR_INVALID_SIZE; - size = wqdepth << *shift; /* multiple of 32 bytes count */ - if (size > I40IWQP_SW_MAX_WQSIZE) + return 0; +} + +/* + * i40iw_get_rq_depth - get RQ depth (quantas) + * @rq_size: RQ size + * @shift: shift which determines size of WQE + * @rqdepth: depth of RQ + * + */ +enum i40iw_status_code i40iw_get_rqdepth(u32 rq_size, u8 shift, u32 *rqdepth) +{ + *rqdepth = i40iw_qp_round_up((rq_size << shift) + I40IW_RQ_RSVD); + + if (*rqdepth < (I40IW_QP_SW_MIN_WQSIZE << shift)) + *rqdepth = I40IW_QP_SW_MIN_WQSIZE << shift; + else if (*rqdepth > I40IW_QP_SW_MAX_RQ_QUANTAS) return I40IW_ERR_INVALID_SIZE; + return 0; } @@ -974,9 +1013,7 @@ enum i40iw_status_code i40iw_qp_uk_init(struct i40iw_qp_uk *qp, if (info->max_rq_frag_cnt > I40IW_MAX_WQ_FRAGMENT_COUNT) return I40IW_ERR_INVALID_FRAG_COUNT; - ret_code = i40iw_get_wqe_shift(info->sq_size, info->max_sq_frag_cnt, info->max_inline_data, &sqshift); - if (ret_code) - return ret_code; + i40iw_get_wqe_shift(info->max_sq_frag_cnt, info->max_inline_data, &sqshift); qp->sq_base = info->sq; qp->rq_base = info->rq; @@ -1010,9 +1047,7 @@ enum i40iw_status_code i40iw_qp_uk_init(struct i40iw_qp_uk *qp, I40IW_RING_INIT(qp->rq_ring, qp->rq_size); switch (info->abi_ver) { case 4: - ret_code = i40iw_get_wqe_shift(info->rq_size, info->max_rq_frag_cnt, 0, &rqshift); - if (ret_code) - return ret_code; + i40iw_get_wqe_shift(info->max_rq_frag_cnt, 0, &rqshift); break; case 5: /* fallthrough until next ABI version */ default: diff --git a/drivers/infiniband/hw/i40iw/i40iw_user.h b/drivers/infiniband/hw/i40iw/i40iw_user.h index a898f9923a6f..e73efc59a0ab 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_user.h +++ b/drivers/infiniband/hw/i40iw/i40iw_user.h @@ -425,5 +425,7 @@ enum i40iw_status_code i40iw_fragcnt_to_wqesize_sq(u32 frag_cnt, u8 *wqe_size); enum i40iw_status_code i40iw_fragcnt_to_wqesize_rq(u32 frag_cnt, u8 *wqe_size); enum i40iw_status_code i40iw_inline_data_size_to_wqesize(u32 data_size, u8 *wqe_size); -enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u32 sge, u32 inline_data, u8 *shift); +void i40iw_get_wqe_shift(u32 sge, u32 inline_data, u8 *shift); +enum i40iw_status_code i40iw_get_sqdepth(u32 sq_size, u8 shift, u32 *sqdepth); +enum i40iw_status_code i40iw_get_rqdepth(u32 rq_size, u8 shift, u32 *rqdepth); #endif diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 5c60de6bd999..3c6f3ce88f89 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -69,7 +69,7 @@ static int i40iw_query_device(struct ib_device *ibdev, props->hw_ver = (u32)iwdev->sc_dev.hw_rev; props->max_mr_size = I40IW_MAX_OUTBOUND_MESSAGE_SIZE; props->max_qp = iwdev->max_qp - iwdev->used_qps; - props->max_qp_wr = (I40IW_MAX_WQ_ENTRIES >> 2) - 1; + props->max_qp_wr = I40IW_MAX_QP_WRS; props->max_sge = I40IW_MAX_WQ_FRAGMENT_COUNT; props->max_cq = iwdev->max_cq - iwdev->used_cqs; props->max_cqe = iwdev->max_cqe; @@ -380,22 +380,6 @@ static int i40iw_dealloc_pd(struct ib_pd *ibpd) return 0; } -/** - * i40iw_qp_roundup - return round up qp ring size - * @wr_ring_size: ring size to round up - */ -static int i40iw_qp_roundup(u32 wr_ring_size) -{ - int scount = 1; - - if (wr_ring_size < I40IWQP_SW_MIN_WQSIZE) - wr_ring_size = I40IWQP_SW_MIN_WQSIZE; - - for (wr_ring_size--; scount <= 16; scount *= 2) - wr_ring_size |= wr_ring_size >> scount; - return ++wr_ring_size; -} - /** * i40iw_get_pbl - Retrieve pbl from a list given a virtual * address @@ -515,21 +499,19 @@ static int i40iw_setup_kmode_qp(struct i40iw_device *iwdev, { struct i40iw_dma_mem *mem = &iwqp->kqp.dma_mem; u32 sqdepth, rqdepth; - u32 sq_size, rq_size; u8 sqshift; u32 size; enum i40iw_status_code status; struct i40iw_qp_uk_init_info *ukinfo = &info->qp_uk_init_info; - sq_size = i40iw_qp_roundup(ukinfo->sq_size + 1); - rq_size = i40iw_qp_roundup(ukinfo->rq_size + 1); - - status = i40iw_get_wqe_shift(sq_size, ukinfo->max_sq_frag_cnt, ukinfo->max_inline_data, &sqshift); + i40iw_get_wqe_shift(ukinfo->max_sq_frag_cnt, ukinfo->max_inline_data, &sqshift); + status = i40iw_get_sqdepth(ukinfo->sq_size, sqshift, &sqdepth); if (status) return -ENOMEM; - sqdepth = sq_size << sqshift; - rqdepth = rq_size << I40IW_MAX_RQ_WQE_SHIFT; + status = i40iw_get_rqdepth(ukinfo->rq_size, I40IW_MAX_RQ_WQE_SHIFT, &rqdepth); + if (status) + return -ENOMEM; size = sqdepth * sizeof(struct i40iw_sq_uk_wr_trk_info) + (rqdepth << 3); iwqp->kqp.wrid_mem = kzalloc(size, GFP_KERNEL); @@ -559,8 +541,8 @@ static int i40iw_setup_kmode_qp(struct i40iw_device *iwdev, ukinfo->shadow_area = ukinfo->rq[rqdepth].elem; info->shadow_area_pa = info->rq_pa + (rqdepth * I40IW_QP_WQE_MIN_SIZE); - ukinfo->sq_size = sq_size; - ukinfo->rq_size = rq_size; + ukinfo->sq_size = sqdepth >> sqshift; + ukinfo->rq_size = rqdepth >> I40IW_MAX_RQ_WQE_SHIFT; ukinfo->qp_id = iwqp->ibqp.qp_num; return 0; } From 56b2f52310f9fdfc1ea7b99702f827f84ad16801 Mon Sep 17 00:00:00 2001 From: Christopher Bednarz Date: Mon, 16 Oct 2017 15:46:03 -0500 Subject: [PATCH 197/296] i40iw: Clear CQP Head/Tail during initialization Clear Control Queue Pair (CQP) Head/Tail on CQP initialization as during an adapter reset, these values are not reinitialized. Tail is cleared by writing 0 to CQP's tail register. Head is cleared by writing 0 to CQP's doorbell register. Fixes: 86dbcd0f12e9 ("i40iw: add file to handle cqp calls") Signed-off-by: Christopher Bednarz Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_ctrl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index 3b6d12176413..e5c8f579e67b 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -483,6 +483,9 @@ static enum i40iw_status_code i40iw_sc_cqp_init(struct i40iw_sc_cqp *cqp, cqp->dev->cqp_cmd_stats[OP_REQUESTED_COMMANDS] = 0; cqp->dev->cqp_cmd_stats[OP_COMPLETED_COMMANDS] = 0; + i40iw_wr32(cqp->dev->hw, I40E_PFPE_CQPTAIL, 0); + i40iw_wr32(cqp->dev->hw, I40E_PFPE_CQPDB, 0); + i40iw_debug(cqp->dev, I40IW_DEBUG_WQE, "%s: sq_size[%04d] hw_sq_size[%04d] sq_base[%p] sq_pa[%llxh] cqp[%p] polarity[x%04X]\n", __func__, cqp->sq_size, cqp->hw_sq_size, From f535b56c39c81ba75c714b9163be540d39d3badd Mon Sep 17 00:00:00 2001 From: Ivan Barrera Date: Mon, 16 Oct 2017 15:46:04 -0500 Subject: [PATCH 198/296] i40iw: Remove UDA QP from QoS list if creation fails If User-space Direct Access (UDA) QP creation fails, the QP entry is not removed from QoS list. Fix this by removing QP from QoS list if create QP fails. Fixes: 0fc2dc58896f ("i40iw: Add Quality of Service support") Signed-off-by: Ivan Barrera Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_ctrl.c | 2 +- drivers/infiniband/hw/i40iw/i40iw_p.h | 2 +- drivers/infiniband/hw/i40iw/i40iw_puda.c | 4 +++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index e5c8f579e67b..c8a40b3fbc31 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -377,7 +377,7 @@ void i40iw_change_l2params(struct i40iw_sc_vsi *vsi, struct i40iw_l2params *l2pa * i40iw_qp_rem_qos - remove qp from qos lists during destroy qp * @qp: qp to be removed from qos */ -static void i40iw_qp_rem_qos(struct i40iw_sc_qp *qp) +void i40iw_qp_rem_qos(struct i40iw_sc_qp *qp) { struct i40iw_sc_vsi *vsi = qp->vsi; unsigned long flags; diff --git a/drivers/infiniband/hw/i40iw/i40iw_p.h b/drivers/infiniband/hw/i40iw/i40iw_p.h index 2ad5a8ebee7e..11d3a2a72100 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_p.h +++ b/drivers/infiniband/hw/i40iw/i40iw_p.h @@ -86,7 +86,7 @@ void i40iw_sc_vsi_init(struct i40iw_sc_vsi *vsi, struct i40iw_vsi_init_info *inf void i40iw_change_l2params(struct i40iw_sc_vsi *vsi, struct i40iw_l2params *l2params); void i40iw_qp_add_qos(struct i40iw_sc_qp *qp); - +void i40iw_qp_rem_qos(struct i40iw_sc_qp *qp); void i40iw_terminate_send_fin(struct i40iw_sc_qp *qp); void i40iw_terminate_connection(struct i40iw_sc_qp *qp, struct i40iw_aeqe_info *info); diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c index 19ced8bd64c0..796a815b53fd 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_puda.c +++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c @@ -615,8 +615,10 @@ static enum i40iw_status_code i40iw_puda_qp_create(struct i40iw_puda_rsrc *rsrc) ret = i40iw_cqp_qp_create_cmd(rsrc->dev, qp); else ret = i40iw_puda_qp_wqe(rsrc->dev, qp); - if (ret) + if (ret) { + i40iw_qp_rem_qos(qp); i40iw_free_dma_mem(rsrc->dev->hw, &rsrc->qpmem); + } return ret; } From d2782739c1498e4425dcdeb79f90f9291e58f6f6 Mon Sep 17 00:00:00 2001 From: Bob Sharp Date: Mon, 16 Oct 2017 15:46:05 -0500 Subject: [PATCH 199/296] i40iw: Move cqp_cmd_head init to CQP initialization Control QP (CQP) command backlog list is initialized at device initialization time. It is not reinitialized in the reset flow. Move the initialization to CQP creation time so the list can be initialized correctly for reset as well. Fixes: 86dbcd0f12e9 ("i40iw: add file to handle cqp calls") Signed-off-by: Bob Sharp Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_ctrl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index c8a40b3fbc31..d88c6cf47cf2 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -482,6 +482,7 @@ static enum i40iw_status_code i40iw_sc_cqp_init(struct i40iw_sc_cqp *cqp, I40IW_RING_INIT(cqp->sq_ring, cqp->sq_size); cqp->dev->cqp_cmd_stats[OP_REQUESTED_COMMANDS] = 0; cqp->dev->cqp_cmd_stats[OP_COMPLETED_COMMANDS] = 0; + INIT_LIST_HEAD(&cqp->dev->cqp_cmd_head); /* for the cqp commands backlog. */ i40iw_wr32(cqp->dev->hw, I40E_PFPE_CQPTAIL, 0); i40iw_wr32(cqp->dev->hw, I40E_PFPE_CQPDB, 0); @@ -5067,7 +5068,6 @@ enum i40iw_status_code i40iw_device_init(struct i40iw_sc_dev *dev, u8 db_size; spin_lock_init(&dev->cqp_lock); - INIT_LIST_HEAD(&dev->cqp_cmd_head); /* for the cqp commands backlog. */ i40iw_device_init_uk(&dev->dev_uk); From 5556176d8cb33652c702c3b6a629b4e1a0dd2a80 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 15 Oct 2017 09:56:48 +0300 Subject: [PATCH 200/296] RDMA: Remove Sean's and Hal's emails from MAINTAINER file RDMA subsystem has one active maintainer who can apply patches - Doug Ledford, but the RDMA entries in MAINTAINER file are not stating it. The following patch removes Sean's and Hal's emails from the maintainers list. It will allow clean get_maintaner.pl output which is needed for people outside of our community and various semi-automatic tools. Cc: Sean Hefty Cc: Hal Rosenstock Signed-off-by: Leon Romanovsky Acked-by: Sean Hefty Signed-off-by: Doug Ledford --- MAINTAINERS | 2 -- 1 file changed, 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index dafc9c3bbcfa..f1688510178a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6775,8 +6775,6 @@ F: drivers/ipack/ INFINIBAND SUBSYSTEM M: Doug Ledford -M: Sean Hefty -M: Hal Rosenstock L: linux-rdma@vger.kernel.org W: http://www.openfabrics.org/ Q: http://patchwork.kernel.org/project/linux-rdma/list/ From 2c104ea68350b7f49c4ae207afa6e7f7f5c81546 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Thu, 19 Oct 2017 07:56:42 +0300 Subject: [PATCH 201/296] IB/ipoib: Get rid of the tx_outstanding variable in all modes The first step toward using NAPI in the UD/TX flow is to separate between two flows, the NAPI and the xmit, meaning no use of shared variables between both flows. This patch takes out the tx_outstanding variable that was used in both flows and instead the driver uses the 2 cyclic ring variables: tx_head and tx_tail, tx_head used in the xmit flow and tx_tail in the NAPI flow. Cc: Kamal Heib Signed-off-by: Erez Shitrit Reviewed-by: Alex Vesker Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib.h | 1 - drivers/infiniband/ulp/ipoib/ipoib_cm.c | 10 ++++++---- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 10 ++++------ 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 7cc2b755413d..19c3ba2368d2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -381,7 +381,6 @@ struct ipoib_dev_priv { unsigned tx_tail; struct ib_sge tx_sge[MAX_SKB_FRAGS + 1]; struct ib_ud_wr tx_wr; - unsigned tx_outstanding; struct ib_wc send_wc[MAX_SEND_CQE]; struct ib_recv_wr rx_wr; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 7500c28eac6b..6e0fc592791e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -769,8 +769,8 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ } else { netif_trans_update(dev); ++tx->tx_head; - - if (++priv->tx_outstanding == ipoib_sendq_size) { + ++priv->tx_head; + if ((priv->tx_head - priv->tx_tail) == ipoib_sendq_size) { ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n", tx->qp->qp_num); netif_stop_queue(dev); @@ -814,7 +814,8 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) netif_tx_lock(dev); ++tx->tx_tail; - if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) && + ++priv->tx_tail; + if (unlikely((priv->tx_head - priv->tx_tail) == ipoib_sendq_size >> 1) && netif_queue_stopped(dev) && test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) netif_wake_queue(dev); @@ -1220,8 +1221,9 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p) ipoib_dma_unmap_tx(priv, tx_req); dev_kfree_skb_any(tx_req->skb); ++p->tx_tail; + ++priv->tx_tail; netif_tx_lock_bh(p->dev); - if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) && + if (unlikely(priv->tx_head - priv->tx_tail == ipoib_sendq_size >> 1) && netif_queue_stopped(p->dev) && test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) netif_wake_queue(p->dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 89d82e27a445..c978f8ffd2bb 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -406,7 +406,7 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) dev_kfree_skb_any(tx_req->skb); ++priv->tx_tail; - if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) && + if (unlikely((priv->tx_head - priv->tx_tail) == ipoib_sendq_size >> 1) && netif_queue_stopped(dev) && test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) netif_wake_queue(dev); @@ -611,8 +611,8 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb, priv->tx_wr.wr.send_flags |= IB_SEND_IP_CSUM; else priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM; - - if (++priv->tx_outstanding == ipoib_sendq_size) { + /* increase the tx_head after send success, but use it for queue state */ + if (priv->tx_head - priv->tx_tail == ipoib_sendq_size - 1) { ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP)) ipoib_warn(priv, "request notify on send CQ failed\n"); @@ -627,7 +627,6 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb, if (unlikely(rc)) { ipoib_warn(priv, "post_send failed, error %d\n", rc); ++dev->stats.tx_errors; - --priv->tx_outstanding; ipoib_dma_unmap_tx(priv, tx_req); dev_kfree_skb_any(skb); if (netif_queue_stopped(dev)) @@ -640,7 +639,7 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb, ++priv->tx_head; } - if (unlikely(priv->tx_outstanding > MAX_SEND_CQE)) + if (unlikely(priv->tx_head - priv->tx_tail > MAX_SEND_CQE)) while (poll_tx(priv)) ; /* nothing */ @@ -773,7 +772,6 @@ int ipoib_ib_dev_stop_default(struct net_device *dev) ipoib_dma_unmap_tx(priv, tx_req); dev_kfree_skb_any(tx_req->skb); ++priv->tx_tail; - --priv->tx_outstanding; } for (i = 0; i < ipoib_recvq_size; ++i) { From 8966e28d2e40cfc9f694bd02dabc49afb78d7160 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Thu, 19 Oct 2017 07:56:43 +0300 Subject: [PATCH 202/296] IB/ipoib: Use NAPI in UD/TX flows Instead of explicit call to poll_cq of the tx ring, use the NAPI mechanism to handle the completions of each packet that has been sent to the HW. The next major changes were taken: * The driver init completion function in the creation of the send CQ, that function triggers the napi scheduling. * The driver uses CQ for RX for both modes UD and CM, and CQ for TX for CM and UD. Cc: Kamal Heib Signed-off-by: Erez Shitrit Reviewed-by: Alex Vesker Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib.h | 11 +- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 40 ++++--- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 123 +++++++++++++-------- drivers/infiniband/ulp/ipoib/ipoib_main.c | 24 +++- drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 17 ++- 5 files changed, 136 insertions(+), 79 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 19c3ba2368d2..b534d9412a1e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -331,7 +331,8 @@ struct ipoib_dev_priv { struct net_device *dev; - struct napi_struct napi; + struct napi_struct send_napi; + struct napi_struct recv_napi; unsigned long flags; @@ -408,7 +409,6 @@ struct ipoib_dev_priv { #endif u64 hca_caps; struct ipoib_ethtool_st ethtool; - struct timer_list poll_timer; unsigned max_send_sge; bool sm_fullmember_sendonly_support; const struct net_device_ops *rn_ops; @@ -475,9 +475,10 @@ extern struct workqueue_struct *ipoib_workqueue; /* functions */ -int ipoib_poll(struct napi_struct *napi, int budget); -void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr); -void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr); +int ipoib_rx_poll(struct napi_struct *napi, int budget); +int ipoib_tx_poll(struct napi_struct *napi, int budget); +void ipoib_ib_rx_completion(struct ib_cq *cq, void *ctx_ptr); +void ipoib_ib_tx_completion(struct ib_cq *cq, void *ctx_ptr); struct ipoib_ah *ipoib_create_ah(struct net_device *dev, struct ib_pd *pd, struct rdma_ah_attr *attr); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 6e0fc592791e..87f4bd99cdf7 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -757,30 +757,35 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ return; } + if ((priv->tx_head - priv->tx_tail) == ipoib_sendq_size - 1) { + ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n", + tx->qp->qp_num); + netif_stop_queue(dev); + } + skb_orphan(skb); skb_dst_drop(skb); + if (netif_queue_stopped(dev)) + if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP | + IB_CQ_REPORT_MISSED_EVENTS)) { + ipoib_warn(priv, "IPoIB/CM:request notify on send CQ failed\n"); + napi_schedule(&priv->send_napi); + } + rc = post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), tx_req); if (unlikely(rc)) { - ipoib_warn(priv, "post_send failed, error %d\n", rc); + ipoib_warn(priv, "IPoIB/CM:post_send failed, error %d\n", rc); ++dev->stats.tx_errors; ipoib_dma_unmap_tx(priv, tx_req); dev_kfree_skb_any(skb); + + if (netif_queue_stopped(dev)) + netif_wake_queue(dev); } else { netif_trans_update(dev); ++tx->tx_head; ++priv->tx_head; - if ((priv->tx_head - priv->tx_tail) == ipoib_sendq_size) { - ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n", - tx->qp->qp_num); - netif_stop_queue(dev); - rc = ib_req_notify_cq(priv->send_cq, - IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); - if (rc < 0) - ipoib_warn(priv, "request notify on send CQ failed\n"); - else if (rc) - ipoib_send_comp_handler(priv->send_cq, dev); - } } } @@ -815,9 +820,10 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) ++tx->tx_tail; ++priv->tx_tail; - if (unlikely((priv->tx_head - priv->tx_tail) == ipoib_sendq_size >> 1) && - netif_queue_stopped(dev) && - test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) + + if (unlikely(netif_queue_stopped(dev) && + (priv->tx_head - priv->tx_tail) <= ipoib_sendq_size >> 1 && + test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))) netif_wake_queue(dev); if (wc->status != IB_WC_SUCCESS && @@ -1046,7 +1052,7 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ { struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ib_qp_init_attr attr = { - .send_cq = priv->recv_cq, + .send_cq = priv->send_cq, .recv_cq = priv->recv_cq, .srq = priv->cm.srq, .cap.max_send_wr = ipoib_sendq_size, @@ -1220,9 +1226,9 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p) tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)]; ipoib_dma_unmap_tx(priv, tx_req); dev_kfree_skb_any(tx_req->skb); + netif_tx_lock_bh(p->dev); ++p->tx_tail; ++priv->tx_tail; - netif_tx_lock_bh(p->dev); if (unlikely(priv->tx_head - priv->tx_tail == ipoib_sendq_size >> 1) && netif_queue_stopped(p->dev) && test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index c978f8ffd2bb..3b96cdaf9a83 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -264,7 +264,7 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) likely(wc->wc_flags & IB_WC_IP_CSUM_OK)) skb->ip_summed = CHECKSUM_UNNECESSARY; - napi_gro_receive(&priv->napi, skb); + napi_gro_receive(&priv->recv_napi, skb); repost: if (unlikely(ipoib_ib_post_receive(dev, wr_id))) @@ -406,9 +406,10 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) dev_kfree_skb_any(tx_req->skb); ++priv->tx_tail; - if (unlikely((priv->tx_head - priv->tx_tail) == ipoib_sendq_size >> 1) && - netif_queue_stopped(dev) && - test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) + + if (unlikely(netif_queue_stopped(dev) && + ((priv->tx_head - priv->tx_tail) <= ipoib_sendq_size >> 1) && + test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))) netif_wake_queue(dev); if (wc->status != IB_WC_SUCCESS && @@ -430,17 +431,23 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) static int poll_tx(struct ipoib_dev_priv *priv) { int n, i; + struct ib_wc *wc; n = ib_poll_cq(priv->send_cq, MAX_SEND_CQE, priv->send_wc); - for (i = 0; i < n; ++i) - ipoib_ib_handle_tx_wc(priv->dev, priv->send_wc + i); - + for (i = 0; i < n; ++i) { + wc = priv->send_wc + i; + if (wc->wr_id & IPOIB_OP_CM) + ipoib_cm_handle_tx_wc(priv->dev, priv->send_wc + i); + else + ipoib_ib_handle_tx_wc(priv->dev, priv->send_wc + i); + } return n == MAX_SEND_CQE; } -int ipoib_poll(struct napi_struct *napi, int budget) +int ipoib_rx_poll(struct napi_struct *napi, int budget) { - struct ipoib_dev_priv *priv = container_of(napi, struct ipoib_dev_priv, napi); + struct ipoib_dev_priv *priv = + container_of(napi, struct ipoib_dev_priv, recv_napi); struct net_device *dev = priv->dev; int done; int t; @@ -464,8 +471,9 @@ int ipoib_poll(struct napi_struct *napi, int budget) ipoib_cm_handle_rx_wc(dev, wc); else ipoib_ib_handle_rx_wc(dev, wc); - } else - ipoib_cm_handle_tx_wc(priv->dev, wc); + } else { + pr_warn("%s: Got unexpected wqe id\n", __func__); + } } if (n != t) @@ -484,33 +492,47 @@ int ipoib_poll(struct napi_struct *napi, int budget) return done; } -void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr) +int ipoib_tx_poll(struct napi_struct *napi, int budget) { - struct net_device *dev = dev_ptr; - struct ipoib_dev_priv *priv = ipoib_priv(dev); + struct ipoib_dev_priv *priv = container_of(napi, struct ipoib_dev_priv, + send_napi); + struct net_device *dev = priv->dev; + int n, i; + struct ib_wc *wc; - napi_schedule(&priv->napi); +poll_more: + n = ib_poll_cq(priv->send_cq, MAX_SEND_CQE, priv->send_wc); + + for (i = 0; i < n; i++) { + wc = priv->send_wc + i; + if (wc->wr_id & IPOIB_OP_CM) + ipoib_cm_handle_tx_wc(dev, wc); + else + ipoib_ib_handle_tx_wc(dev, wc); + } + + if (n < budget) { + napi_complete(napi); + if (unlikely(ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP | + IB_CQ_REPORT_MISSED_EVENTS)) && + napi_reschedule(napi)) + goto poll_more; + } + return n < 0 ? 0 : n; } -static void drain_tx_cq(struct net_device *dev) +void ipoib_ib_rx_completion(struct ib_cq *cq, void *ctx_ptr) { - struct ipoib_dev_priv *priv = ipoib_priv(dev); + struct ipoib_dev_priv *priv = ctx_ptr; - netif_tx_lock(dev); - while (poll_tx(priv)) - ; /* nothing */ - - if (netif_queue_stopped(dev)) - mod_timer(&priv->poll_timer, jiffies + 1); - - netif_tx_unlock(dev); + napi_schedule(&priv->recv_napi); } -void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr) +void ipoib_ib_tx_completion(struct ib_cq *cq, void *ctx_ptr) { - struct ipoib_dev_priv *priv = ipoib_priv(dev_ptr); + struct ipoib_dev_priv *priv = ctx_ptr; - mod_timer(&priv->poll_timer, jiffies); + napi_schedule(&priv->send_napi); } static inline int post_send(struct ipoib_dev_priv *priv, @@ -614,14 +636,17 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb, /* increase the tx_head after send success, but use it for queue state */ if (priv->tx_head - priv->tx_tail == ipoib_sendq_size - 1) { ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); - if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP)) - ipoib_warn(priv, "request notify on send CQ failed\n"); netif_stop_queue(dev); } skb_orphan(skb); skb_dst_drop(skb); + if (netif_queue_stopped(dev)) + if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP | + IB_CQ_REPORT_MISSED_EVENTS)) + ipoib_warn(priv, "request notify on send CQ failed\n"); + rc = post_send(priv, priv->tx_head & (ipoib_sendq_size - 1), address, dqpn, tx_req, phead, hlen); if (unlikely(rc)) { @@ -638,11 +663,6 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb, rc = priv->tx_head; ++priv->tx_head; } - - if (unlikely(priv->tx_head - priv->tx_tail > MAX_SEND_CQE)) - while (poll_tx(priv)) - ; /* nothing */ - return rc; } @@ -731,6 +751,22 @@ static void check_qp_movement_and_print(struct ipoib_dev_priv *priv, new_state, qp_attr.qp_state); } +static void ipoib_napi_enable(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + + napi_enable(&priv->recv_napi); + napi_enable(&priv->send_napi); +} + +static void ipoib_napi_disable(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + + napi_disable(&priv->recv_napi); + napi_disable(&priv->send_napi); +} + int ipoib_ib_dev_stop_default(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); @@ -740,7 +776,7 @@ int ipoib_ib_dev_stop_default(struct net_device *dev) int i; if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) - napi_disable(&priv->napi); + ipoib_napi_disable(dev); ipoib_cm_dev_stop(dev); @@ -797,7 +833,6 @@ int ipoib_ib_dev_stop_default(struct net_device *dev) ipoib_dbg(priv, "All sends and receives done.\n"); timeout: - del_timer_sync(&priv->poll_timer); qp_attr.qp_state = IB_QPS_RESET; if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE)) ipoib_warn(priv, "Failed to modify QP to RESET state\n"); @@ -819,13 +854,6 @@ int ipoib_ib_dev_stop(struct net_device *dev) return 0; } -void ipoib_ib_tx_timer_func(struct timer_list *t) -{ - struct ipoib_dev_priv *priv = from_timer(priv, t, poll_timer); - - drain_tx_cq(priv->dev); -} - int ipoib_ib_dev_open_default(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); @@ -850,7 +878,7 @@ int ipoib_ib_dev_open_default(struct net_device *dev) } if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) - napi_enable(&priv->napi); + ipoib_napi_enable(dev); return 0; out: @@ -965,8 +993,9 @@ void ipoib_drain_cq(struct net_device *dev) ipoib_cm_handle_rx_wc(dev, priv->ibwc + i); else ipoib_ib_handle_rx_wc(dev, priv->ibwc + i); - } else - ipoib_cm_handle_tx_wc(dev, priv->ibwc + i); + } else { + pr_warn("%s: Got unexpected wqe id\n", __func__); + } } } while (n == IPOIB_NUM_WC); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 0d6b4053cdf7..12b7f911f0e5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1616,13 +1616,29 @@ static void ipoib_neigh_hash_uninit(struct net_device *dev) wait_for_completion(&priv->ntbl.deleted); } +static void ipoib_napi_add(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + + netif_napi_add(dev, &priv->recv_napi, ipoib_rx_poll, IPOIB_NUM_WC); + netif_napi_add(dev, &priv->send_napi, ipoib_tx_poll, MAX_SEND_CQE); +} + +static void ipoib_napi_del(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + + netif_napi_del(&priv->recv_napi); + netif_napi_del(&priv->send_napi); +} + static void ipoib_dev_uninit_default(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); ipoib_transport_dev_cleanup(dev); - netif_napi_del(&priv->napi); + ipoib_napi_del(dev); ipoib_cm_dev_cleanup(dev); @@ -1637,7 +1653,7 @@ static int ipoib_dev_init_default(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); - netif_napi_add(dev, &priv->napi, ipoib_poll, NAPI_POLL_WEIGHT); + ipoib_napi_add(dev); /* Allocate RX/TX "rings" to hold queued skbs */ priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring, @@ -1665,8 +1681,6 @@ static int ipoib_dev_init_default(struct net_device *dev) priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff; priv->dev->dev_addr[3] = (priv->qp->qp_num) & 0xff; - timer_setup(&priv->poll_timer, ipoib_ib_tx_timer_func, 0); - return 0; out_tx_ring_cleanup: @@ -1676,7 +1690,7 @@ static int ipoib_dev_init_default(struct net_device *dev) kfree(priv->rx_ring); out: - netif_napi_del(&priv->napi); + ipoib_napi_del(dev); return -ENOMEM; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index bb64baf25309..a1ed25422b72 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -156,7 +156,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) }; struct ib_cq_init_attr cq_attr = {}; - int ret, size; + int ret, size, req_vec; int i; size = ipoib_recvq_size + 1; @@ -171,17 +171,21 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) if (ret != -ENOSYS) return -ENODEV; + req_vec = (priv->port - 1) * 2; + cq_attr.cqe = size; - priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, - dev, &cq_attr); + cq_attr.comp_vector = req_vec % priv->ca->num_comp_vectors; + priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_rx_completion, NULL, + priv, &cq_attr); if (IS_ERR(priv->recv_cq)) { printk(KERN_WARNING "%s: failed to create receive CQ\n", ca->name); goto out_cm_dev_cleanup; } cq_attr.cqe = ipoib_sendq_size; - priv->send_cq = ib_create_cq(priv->ca, ipoib_send_comp_handler, NULL, - dev, &cq_attr); + cq_attr.comp_vector = (req_vec + 1) % priv->ca->num_comp_vectors; + priv->send_cq = ib_create_cq(priv->ca, ipoib_ib_tx_completion, NULL, + priv, &cq_attr); if (IS_ERR(priv->send_cq)) { printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name); goto out_free_recv_cq; @@ -208,6 +212,9 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) goto out_free_send_cq; } + if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP)) + goto out_free_send_cq; + for (i = 0; i < MAX_SKB_FRAGS + 1; ++i) priv->tx_sge[i].lkey = priv->pd->local_dma_lkey; From b9595c5bac87af09cc3e82553244893b76632df6 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Thu, 19 Oct 2017 07:56:44 +0300 Subject: [PATCH 203/296] IB/ipoib: Change number of TX wqe to 64 NAPI budget is 64 packets, while maximum polling size for the send CQ is 16. Let's bring them in sync, so the NAPI budget will be reused completely. Cc: Kamal Heib Signed-off-by: Erez Shitrit Reviewed-by: Alex Vesker Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index b534d9412a1e..8033a006277f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -111,7 +111,7 @@ enum { IPOIB_MCAST_FLAG_BUSY = 2, IPOIB_MCAST_FLAG_ATTACHED = 3, - MAX_SEND_CQE = 16, + MAX_SEND_CQE = 64, IPOIB_CM_COPYBREAK = 256, IPOIB_NON_CHILD = 0, From 9a8982dc89484e1144176bf4f5f35110f6c3414c Mon Sep 17 00:00:00 2001 From: "Wei Hu(Xavier)" Date: Wed, 18 Oct 2017 17:32:44 +0800 Subject: [PATCH 204/296] RDMA/hns: Support WQE/CQE/PBL page size configurable feature in hip08 This patch updates to support WQE, CQE and PBL page size configurable feature, which includes base address page size and buffer page size. Signed-off-by: Shaobo Xu Signed-off-by: Wei Hu (Xavier) Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_alloc.c | 29 ++++--- drivers/infiniband/hw/hns/hns_roce_cq.c | 21 ++++- drivers/infiniband/hw/hns/hns_roce_device.h | 10 ++- drivers/infiniband/hw/hns/hns_roce_mr.c | 93 +++++++++++++++------ drivers/infiniband/hw/hns/hns_roce_qp.c | 46 +++++++--- 5 files changed, 142 insertions(+), 57 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index 8c9a33f4e1c1..3e4c5253ab5c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -167,12 +167,12 @@ void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size, if (buf->nbufs == 1) { dma_free_coherent(dev, size, buf->direct.buf, buf->direct.map); } else { - if (bits_per_long == 64) + if (bits_per_long == 64 && buf->page_shift == PAGE_SHIFT) vunmap(buf->direct.buf); for (i = 0; i < buf->nbufs; ++i) if (buf->page_list[i].buf) - dma_free_coherent(dev, PAGE_SIZE, + dma_free_coherent(dev, 1 << buf->page_shift, buf->page_list[i].buf, buf->page_list[i].map); kfree(buf->page_list); @@ -181,20 +181,27 @@ void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size, EXPORT_SYMBOL_GPL(hns_roce_buf_free); int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, - struct hns_roce_buf *buf) + struct hns_roce_buf *buf, u32 page_shift) { int i = 0; dma_addr_t t; struct page **pages; struct device *dev = hr_dev->dev; u32 bits_per_long = BITS_PER_LONG; + u32 page_size = 1 << page_shift; + u32 order; /* SQ/RQ buf lease than one page, SQ + RQ = 8K */ if (size <= max_direct) { buf->nbufs = 1; /* Npages calculated by page_size */ - buf->npages = 1 << get_order(size); - buf->page_shift = PAGE_SHIFT; + order = get_order(size); + if (order <= page_shift - PAGE_SHIFT) + order = 0; + else + order -= page_shift - PAGE_SHIFT; + buf->npages = 1 << order; + buf->page_shift = page_shift; /* MTT PA must be recorded in 4k alignment, t is 4k aligned */ buf->direct.buf = dma_alloc_coherent(dev, size, &t, GFP_KERNEL); if (!buf->direct.buf) @@ -209,9 +216,9 @@ int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, memset(buf->direct.buf, 0, size); } else { - buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE; + buf->nbufs = (size + page_size - 1) / page_size; buf->npages = buf->nbufs; - buf->page_shift = PAGE_SHIFT; + buf->page_shift = page_shift; buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list), GFP_KERNEL); @@ -220,16 +227,16 @@ int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, for (i = 0; i < buf->nbufs; ++i) { buf->page_list[i].buf = dma_alloc_coherent(dev, - PAGE_SIZE, &t, + page_size, &t, GFP_KERNEL); if (!buf->page_list[i].buf) goto err_free; buf->page_list[i].map = t; - memset(buf->page_list[i].buf, 0, PAGE_SIZE); + memset(buf->page_list[i].buf, 0, page_size); } - if (bits_per_long == 64) { + if (bits_per_long == 64 && page_shift == PAGE_SHIFT) { pages = kmalloc_array(buf->nbufs, sizeof(*pages), GFP_KERNEL); if (!pages) @@ -243,6 +250,8 @@ int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, kfree(pages); if (!buf->direct.buf) goto err_free; + } else { + buf->direct.buf = NULL; } } diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 88cdf6f67b69..f558f95d8827 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -220,6 +220,8 @@ static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev, struct ib_umem **umem, u64 buf_addr, int cqe) { int ret; + u32 page_shift; + u32 npages; *umem = ib_umem_get(context, buf_addr, cqe * hr_dev->caps.cq_entry_sz, IB_ACCESS_LOCAL_WRITE, 1); @@ -230,8 +232,19 @@ static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev, buf->hr_mtt.mtt_type = MTT_TYPE_CQE; else buf->hr_mtt.mtt_type = MTT_TYPE_WQE; - ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(*umem), - (*umem)->page_shift, &buf->hr_mtt); + + if (hr_dev->caps.cqe_buf_pg_sz) { + npages = (ib_umem_page_count(*umem) + + (1 << hr_dev->caps.cqe_buf_pg_sz) - 1) / + (1 << hr_dev->caps.cqe_buf_pg_sz); + page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz; + ret = hns_roce_mtt_init(hr_dev, npages, page_shift, + &buf->hr_mtt); + } else { + ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(*umem), + (*umem)->page_shift, + &buf->hr_mtt); + } if (ret) goto err_buf; @@ -253,9 +266,11 @@ static int hns_roce_ib_alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq_buf *buf, u32 nent) { int ret; + u32 page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz; ret = hns_roce_buf_alloc(hr_dev, nent * hr_dev->caps.cq_entry_sz, - PAGE_SIZE * 2, &buf->hr_buf); + (1 << page_shift) * 2, &buf->hr_buf, + page_shift); if (ret) goto out; diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index b314ac0451af..93534003042a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -711,12 +711,14 @@ static inline struct hns_roce_qp static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, int offset) { u32 bits_per_long_val = BITS_PER_LONG; + u32 page_size = 1 << buf->page_shift; - if (bits_per_long_val == 64 || buf->nbufs == 1) + if ((bits_per_long_val == 64 && buf->page_shift == PAGE_SHIFT) || + buf->nbufs == 1) return (char *)(buf->direct.buf) + offset; else - return (char *)(buf->page_list[offset >> PAGE_SHIFT].buf) + - (offset & (PAGE_SIZE - 1)); + return (char *)(buf->page_list[offset >> buf->page_shift].buf) + + (offset & (page_size - 1)); } int hns_roce_init_uar_table(struct hns_roce_dev *dev); @@ -787,7 +789,7 @@ unsigned long key_to_hw_index(u32 key); void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size, struct hns_roce_buf *buf); int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, - struct hns_roce_buf *buf); + struct hns_roce_buf *buf, u32 page_shift); int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt, struct ib_umem *umem); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 452136d98ad5..c47a5ee5db17 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -708,11 +708,17 @@ static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev, dma_addr_t dma_handle; __le64 *mtts; u32 s = start_index * sizeof(u64); + u32 bt_page_size; u32 i; + if (mtt->mtt_type == MTT_TYPE_WQE) + bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT); + else + bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT); + /* All MTTs must fit in the same page */ - if (start_index / (PAGE_SIZE / sizeof(u64)) != - (start_index + npages - 1) / (PAGE_SIZE / sizeof(u64))) + if (start_index / (bt_page_size / sizeof(u64)) != + (start_index + npages - 1) / (bt_page_size / sizeof(u64))) return -EINVAL; if (start_index & (HNS_ROCE_MTT_ENTRY_PER_SEG - 1)) @@ -746,12 +752,18 @@ static int hns_roce_write_mtt(struct hns_roce_dev *hr_dev, { int chunk; int ret; + u32 bt_page_size; if (mtt->order < 0) return -EINVAL; + if (mtt->mtt_type == MTT_TYPE_WQE) + bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT); + else + bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT); + while (npages > 0) { - chunk = min_t(int, PAGE_SIZE / sizeof(u64), npages); + chunk = min_t(int, bt_page_size / sizeof(u64), npages); ret = hns_roce_write_mtt_chunk(hr_dev, mtt, start_index, chunk, page_list); @@ -869,25 +881,44 @@ struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc) int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt, struct ib_umem *umem) { + struct device *dev = hr_dev->dev; struct scatterlist *sg; + unsigned int order; int i, k, entry; + int npage = 0; int ret = 0; - u64 *pages; - u32 n; int len; + u64 page_addr; + u64 *pages; + u32 bt_page_size; + u32 n; - pages = (u64 *) __get_free_page(GFP_KERNEL); + order = mtt->mtt_type == MTT_TYPE_WQE ? hr_dev->caps.mtt_ba_pg_sz : + hr_dev->caps.cqe_ba_pg_sz; + bt_page_size = 1 << (order + PAGE_SHIFT); + + pages = (u64 *) __get_free_pages(GFP_KERNEL, order); if (!pages) return -ENOMEM; i = n = 0; for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - len = sg_dma_len(sg) >> mtt->page_shift; + len = sg_dma_len(sg) >> PAGE_SHIFT; for (k = 0; k < len; ++k) { - pages[i++] = sg_dma_address(sg) + - (k << umem->page_shift); - if (i == PAGE_SIZE / sizeof(u64)) { + page_addr = + sg_dma_address(sg) + (k << umem->page_shift); + if (!(npage % (1 << (mtt->page_shift - PAGE_SHIFT)))) { + if (page_addr & ((1 << mtt->page_shift) - 1)) { + dev_err(dev, "page_addr 0x%llx is not page_shift %d alignment!\n", + page_addr, mtt->page_shift); + ret = -EINVAL; + goto out; + } + pages[i++] = page_addr; + } + npage++; + if (i == bt_page_size / sizeof(u64)) { ret = hns_roce_write_mtt(hr_dev, mtt, n, i, pages); if (ret) @@ -911,29 +942,37 @@ static int hns_roce_ib_umem_write_mr(struct hns_roce_dev *hr_dev, struct ib_umem *umem) { struct scatterlist *sg; - int i = 0, j = 0; + int i = 0, j = 0, k; int entry; + int len; + u64 page_addr; + u32 pbl_bt_sz; if (hr_dev->caps.pbl_hop_num == HNS_ROCE_HOP_NUM_0) return 0; + pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - if (!hr_dev->caps.pbl_hop_num) { - mr->pbl_buf[i] = ((u64)sg_dma_address(sg)) >> 12; - i++; - } else if (hr_dev->caps.pbl_hop_num == 1) { - mr->pbl_buf[i] = sg_dma_address(sg); - i++; - } else { - if (hr_dev->caps.pbl_hop_num == 2) - mr->pbl_bt_l1[i][j] = sg_dma_address(sg); - else if (hr_dev->caps.pbl_hop_num == 3) - mr->pbl_bt_l2[i][j] = sg_dma_address(sg); + len = sg_dma_len(sg) >> PAGE_SHIFT; + for (k = 0; k < len; ++k) { + page_addr = sg_dma_address(sg) + + (k << umem->page_shift); - j++; - if (j >= (PAGE_SIZE / 8)) { - i++; - j = 0; + if (!hr_dev->caps.pbl_hop_num) { + mr->pbl_buf[i++] = page_addr >> 12; + } else if (hr_dev->caps.pbl_hop_num == 1) { + mr->pbl_buf[i++] = page_addr; + } else { + if (hr_dev->caps.pbl_hop_num == 2) + mr->pbl_bt_l1[i][j] = page_addr; + else if (hr_dev->caps.pbl_hop_num == 3) + mr->pbl_bt_l2[i][j] = page_addr; + + j++; + if (j >= (pbl_bt_sz / 8)) { + i++; + j = 0; + } } } } @@ -986,7 +1025,7 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } else { int pbl_size = 1; - bt_size = (1 << PAGE_SHIFT) / 8; + bt_size = (1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT)) / 8; for (i = 0; i < hr_dev->caps.pbl_hop_num; i++) pbl_size *= bt_size; if (n > pbl_size) { diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index e6d11154e6ca..b1c9a3768b19 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -322,6 +322,7 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev, { u32 roundup_sq_stride = roundup_pow_of_two(hr_dev->caps.max_sq_desc_sz); u8 max_sq_stride = ilog2(roundup_sq_stride); + u32 page_size; u32 max_cnt; /* Sanity check SQ size before proceeding */ @@ -363,28 +364,29 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev, hr_qp->rq.offset = HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift), PAGE_SIZE); } else { + page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt << - hr_qp->rq.wqe_shift), PAGE_SIZE) + + hr_qp->rq.wqe_shift), page_size) + HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt << - hr_qp->sge.sge_shift), PAGE_SIZE) + + hr_qp->sge.sge_shift), page_size) + HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt << - hr_qp->sq.wqe_shift), PAGE_SIZE); + hr_qp->sq.wqe_shift), page_size); hr_qp->sq.offset = 0; if (hr_qp->sge.sge_cnt) { hr_qp->sge.offset = HNS_ROCE_ALOGN_UP( (hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift), - PAGE_SIZE); + page_size); hr_qp->rq.offset = hr_qp->sge.offset + HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt << hr_qp->sge.sge_shift), - PAGE_SIZE); + page_size); } else { hr_qp->rq.offset = HNS_ROCE_ALOGN_UP( (hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift), - PAGE_SIZE); + page_size); } } @@ -396,6 +398,7 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { struct device *dev = hr_dev->dev; + u32 page_size; u32 max_cnt; int size; @@ -435,19 +438,20 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev, } /* Get buf size, SQ and RQ are aligned to PAGE_SIZE */ + page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); hr_qp->sq.offset = 0; size = HNS_ROCE_ALOGN_UP(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift, - PAGE_SIZE); + page_size); if (hr_dev->caps.max_sq_sg > 2 && hr_qp->sge.sge_cnt) { hr_qp->sge.offset = size; size += HNS_ROCE_ALOGN_UP(hr_qp->sge.sge_cnt << - hr_qp->sge.sge_shift, PAGE_SIZE); + hr_qp->sge.sge_shift, page_size); } hr_qp->rq.offset = size; size += HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), - PAGE_SIZE); + page_size); hr_qp->buff_size = size; /* Get wr and sge number which send */ @@ -470,6 +474,8 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, struct hns_roce_ib_create_qp ucmd; unsigned long qpn = 0; int ret = 0; + u32 page_shift; + u32 npages; mutex_init(&hr_qp->mutex); spin_lock_init(&hr_qp->sq.lock); @@ -513,8 +519,20 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, } hr_qp->mtt.mtt_type = MTT_TYPE_WQE; - ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(hr_qp->umem), - hr_qp->umem->page_shift, &hr_qp->mtt); + if (hr_dev->caps.mtt_buf_pg_sz) { + npages = (ib_umem_page_count(hr_qp->umem) + + (1 << hr_dev->caps.mtt_buf_pg_sz) - 1) / + (1 << hr_dev->caps.mtt_buf_pg_sz); + page_shift = PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz; + ret = hns_roce_mtt_init(hr_dev, npages, + page_shift, + &hr_qp->mtt); + } else { + ret = hns_roce_mtt_init(hr_dev, + ib_umem_page_count(hr_qp->umem), + hr_qp->umem->page_shift, + &hr_qp->mtt); + } if (ret) { dev_err(dev, "hns_roce_mtt_init error for create qp\n"); goto err_buf; @@ -555,8 +573,10 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, DB_REG_OFFSET * hr_dev->priv_uar.index; /* Allocate QP buf */ - if (hns_roce_buf_alloc(hr_dev, hr_qp->buff_size, PAGE_SIZE * 2, - &hr_qp->hr_buf)) { + page_shift = PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz; + if (hns_roce_buf_alloc(hr_dev, hr_qp->buff_size, + (1 << page_shift) * 2, + &hr_qp->hr_buf, page_shift)) { dev_err(dev, "hns_roce_buf_alloc error!\n"); ret = -ENOMEM; goto err_out; From 29a1fe5d70fab7dbdd777458f3c3129f292e61ac Mon Sep 17 00:00:00 2001 From: "Wei Hu(Xavier)" Date: Wed, 18 Oct 2017 17:32:45 +0800 Subject: [PATCH 205/296] RDMA/hns: Update the IRRL table chunk size in hip08 As the increase of the IRRL specification in hip08, the IRRL table chunk size needs to be updated. This patch updates the IRRL table chunk size to 256k for hip08. Signed-off-by: Wei Hu (Xavier) Signed-off-by: Shaobo Xu Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_device.h | 2 ++ drivers/infiniband/hw/hns/hns_roce_hem.c | 29 +++++++++------------ drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 1 + drivers/infiniband/hw/hns/hns_roce_hw_v1.h | 2 ++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 1 + drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 2 ++ 6 files changed, 21 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 93534003042a..adf7a700b186 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -236,6 +236,7 @@ struct hns_roce_hem_table { unsigned long num_obj; /*Single obj size */ unsigned long obj_size; + unsigned long table_chunk_size; int lowmem; struct mutex mutex; struct hns_roce_hem **hem; @@ -565,6 +566,7 @@ struct hns_roce_caps { u32 cqe_ba_pg_sz; u32 cqe_buf_pg_sz; u32 cqe_hop_num; + u32 chunk_sz; /* chunk size in non multihop mode*/ }; struct hns_roce_hw { diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 8388ae25640c..576eb3bb1f2d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -36,9 +36,6 @@ #include "hns_roce_hem.h" #include "hns_roce_common.h" -#define HNS_ROCE_HEM_ALLOC_SIZE (1 << 17) -#define HNS_ROCE_TABLE_CHUNK_SIZE (1 << 17) - #define DMA_ADDR_T_SHIFT 12 #define BT_BA_SHIFT 32 @@ -296,7 +293,7 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev, /* Find the HEM(Hardware Entry Memory) entry */ unsigned long i = (obj & (table->num_obj - 1)) / - (HNS_ROCE_TABLE_CHUNK_SIZE / table->obj_size); + (table->table_chunk_size / table->obj_size); switch (table->type) { case HEM_TYPE_QPC: @@ -541,7 +538,7 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev, if (hns_roce_check_whether_mhop(hr_dev, table->type)) return hns_roce_table_mhop_get(hr_dev, table, obj); - i = (obj & (table->num_obj - 1)) / (HNS_ROCE_TABLE_CHUNK_SIZE / + i = (obj & (table->num_obj - 1)) / (table->table_chunk_size / table->obj_size); mutex_lock(&table->mutex); @@ -552,8 +549,8 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev, } table->hem[i] = hns_roce_alloc_hem(hr_dev, - HNS_ROCE_TABLE_CHUNK_SIZE >> PAGE_SHIFT, - HNS_ROCE_HEM_ALLOC_SIZE, + table->table_chunk_size >> PAGE_SHIFT, + table->table_chunk_size, (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) | __GFP_NOWARN); if (!table->hem[i]) { @@ -702,7 +699,7 @@ void hns_roce_table_put(struct hns_roce_dev *hr_dev, } i = (obj & (table->num_obj - 1)) / - (HNS_ROCE_TABLE_CHUNK_SIZE / table->obj_size); + (table->table_chunk_size / table->obj_size); mutex_lock(&table->mutex); @@ -739,8 +736,8 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev, if (!hns_roce_check_whether_mhop(hr_dev, table->type)) { idx = (obj & (table->num_obj - 1)) * table->obj_size; - hem = table->hem[idx / HNS_ROCE_TABLE_CHUNK_SIZE]; - dma_offset = offset = idx % HNS_ROCE_TABLE_CHUNK_SIZE; + hem = table->hem[idx / table->table_chunk_size]; + dma_offset = offset = idx % table->table_chunk_size; } else { hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, &mhop); /* mtt mhop */ @@ -791,7 +788,7 @@ int hns_roce_table_get_range(struct hns_roce_dev *hr_dev, unsigned long start, unsigned long end) { struct hns_roce_hem_mhop mhop; - unsigned long inc = HNS_ROCE_TABLE_CHUNK_SIZE / table->obj_size; + unsigned long inc = table->table_chunk_size / table->obj_size; unsigned long i; int ret; @@ -822,7 +819,7 @@ void hns_roce_table_put_range(struct hns_roce_dev *hr_dev, unsigned long start, unsigned long end) { struct hns_roce_hem_mhop mhop; - unsigned long inc = HNS_ROCE_TABLE_CHUNK_SIZE / table->obj_size; + unsigned long inc = table->table_chunk_size / table->obj_size; unsigned long i; if (hns_roce_check_whether_mhop(hr_dev, table->type)) { @@ -830,8 +827,7 @@ void hns_roce_table_put_range(struct hns_roce_dev *hr_dev, inc = mhop.bt_chunk_size / table->obj_size; } - for (i = start; i <= end; - i += inc) + for (i = start; i <= end; i += inc) hns_roce_table_put(hr_dev, table, i); } @@ -845,7 +841,8 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, unsigned long num_hem; if (!hns_roce_check_whether_mhop(hr_dev, type)) { - obj_per_chunk = HNS_ROCE_TABLE_CHUNK_SIZE / obj_size; + table->table_chunk_size = hr_dev->caps.chunk_sz; + obj_per_chunk = table->table_chunk_size / obj_size; num_hem = (nobj + obj_per_chunk - 1) / obj_per_chunk; table->hem = kcalloc(num_hem, sizeof(*table->hem), GFP_KERNEL); @@ -1027,7 +1024,7 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, for (i = 0; i < table->num_hem; ++i) if (table->hem[i]) { if (hr_dev->hw->clear_hem(hr_dev, table, - i * HNS_ROCE_TABLE_CHUNK_SIZE / table->obj_size, 0)) + i * table->table_chunk_size / table->obj_size, 0)) dev_err(dev, "Clear HEM base address failed.\n"); hns_roce_free_hem(hr_dev, table->hem[i]); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index a5693d484421..9eba5dd3f950 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -1514,6 +1514,7 @@ static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev) caps->reserved_mrws = 1; caps->reserved_uars = 0; caps->reserved_cqs = 0; + caps->chunk_sz = HNS_ROCE_V1_TABLE_CHUNK_SIZE; for (i = 0; i < caps->num_ports; i++) caps->pkey_table_len[i] = 1; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h index eb83ff3bfcad..21a07ef0afc9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h @@ -72,6 +72,8 @@ #define HNS_ROCE_V1_CQE_ENTRY_SIZE 32 #define HNS_ROCE_V1_PAGE_SIZE_SUPPORT 0xFFFFF000 +#define HNS_ROCE_V1_TABLE_CHUNK_SIZE (1 << 17) + #define HNS_ROCE_V1_EXT_RAQ_WF 8 #define HNS_ROCE_V1_RAQ_ENTRY 64 #define HNS_ROCE_V1_RAQ_DEPTH 32768 diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index ff61f30a82c2..50399282f3af 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -943,6 +943,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->cqe_ba_pg_sz = 0; caps->cqe_buf_pg_sz = 0; caps->cqe_hop_num = HNS_ROCE_CQE_HOP_NUM; + caps->chunk_sz = HNS_ROCE_V2_TABLE_CHUNK_SIZE; caps->pkey_table_len[0] = 1; caps->gid_table_len[0] = 2; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 4fc4acd45381..65ed3f861f54 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -78,6 +78,8 @@ #define HNS_ROCE_CQE_HOP_NUM 1 #define HNS_ROCE_PBL_HOP_NUM 2 +#define HNS_ROCE_V2_TABLE_CHUNK_SIZE (1 << 18) + #define HNS_ROCE_CMD_FLAG_IN_VALID_SHIFT 0 #define HNS_ROCE_CMD_FLAG_OUT_VALID_SHIFT 1 #define HNS_ROCE_CMD_FLAG_NEXT_SHIFT 2 From 3180236cceeb3d66e45f6395501f3d315beae4c1 Mon Sep 17 00:00:00 2001 From: "Wei Hu(Xavier)" Date: Wed, 18 Oct 2017 17:32:46 +0800 Subject: [PATCH 206/296] RDMA/hns: Update the PD&CQE&MTT specification in hip08 This patch updates the PD specification to 16M for hip08. And it updates the numbers of mtt and cqe segments for the buddy. As the CQE supports hop num 1 addressing, the CQE specification is 64k. This patch updates to set the CQE specification to 64k. Signed-off-by: Shaobo Xu Signed-off-by: Wei Hu (Xavier) Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 65ed3f861f54..6106ad1b1322 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -47,16 +47,16 @@ #define HNS_ROCE_V2_MAX_QP_NUM 0x2000 #define HNS_ROCE_V2_MAX_WQE_NUM 0x8000 #define HNS_ROCE_V2_MAX_CQ_NUM 0x8000 -#define HNS_ROCE_V2_MAX_CQE_NUM 0x400000 +#define HNS_ROCE_V2_MAX_CQE_NUM 0x10000 #define HNS_ROCE_V2_MAX_RQ_SGE_NUM 0x100 #define HNS_ROCE_V2_MAX_SQ_SGE_NUM 0xff #define HNS_ROCE_V2_MAX_SQ_INLINE 0x20 #define HNS_ROCE_V2_UAR_NUM 256 #define HNS_ROCE_V2_PHY_UAR_NUM 1 #define HNS_ROCE_V2_MAX_MTPT_NUM 0x8000 -#define HNS_ROCE_V2_MAX_MTT_SEGS 0x100000 -#define HNS_ROCE_V2_MAX_CQE_SEGS 0x10000 -#define HNS_ROCE_V2_MAX_PD_NUM 0x400000 +#define HNS_ROCE_V2_MAX_MTT_SEGS 0x1000000 +#define HNS_ROCE_V2_MAX_CQE_SEGS 0x1000000 +#define HNS_ROCE_V2_MAX_PD_NUM 0x1000000 #define HNS_ROCE_V2_MAX_QP_INIT_RDMA 128 #define HNS_ROCE_V2_MAX_QP_DEST_RDMA 128 #define HNS_ROCE_V2_MAX_SQ_DESC_SZ 64 From b156269d88e43a93da424a463e57eb5e9ee5f3cd Mon Sep 17 00:00:00 2001 From: oulijun Date: Thu, 19 Oct 2017 11:52:40 +0800 Subject: [PATCH 207/296] RDMA/hns: Add modify CQ support for hip08 It is needed to call modify cq API for modifying cq context fields for controlling event generation moderations. This patch mainly adds it. Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_cmd.h | 1 + drivers/infiniband/hw/hns/hns_roce_device.h | 1 + drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 6 +++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 42 +++++++++++++++++++++ drivers/infiniband/hw/hns/hns_roce_main.c | 1 + 5 files changed, 51 insertions(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h index 77299ae51d82..f0039a7166d8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.h +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h @@ -52,6 +52,7 @@ enum { HNS_ROCE_CMD_MODIFY_QPC = 0x41, HNS_ROCE_CMD_QUERY_QPC = 0x42, + HNS_ROCE_CMD_MODIFY_CQC = 0x52, /* CQC BT commands */ HNS_ROCE_CMD_WRITE_CQC_BT0 = 0x10, HNS_ROCE_CMD_WRITE_CQC_BT1 = 0x11, diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index adf7a700b186..31c7ab8fcdbe 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -609,6 +609,7 @@ struct hns_roce_hw { int (*poll_cq)(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int (*dereg_mr)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr); int (*destroy_cq)(struct ib_cq *ibcq); + int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); }; struct hns_roce_dev { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 9eba5dd3f950..00cbfc97bea8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -2114,6 +2114,11 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, cq_context->cqc_byte_32 = cpu_to_le32(cq_context->cqc_byte_32); } +static int hns_roce_v1_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) +{ + return -EOPNOTSUPP; +} + static int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { @@ -3964,6 +3969,7 @@ static const struct hns_roce_hw hns_roce_hw_v1 = { .set_mtu = hns_roce_v1_set_mtu, .write_mtpt = hns_roce_v1_write_mtpt, .write_cqc = hns_roce_v1_write_cqc, + .modify_cq = hns_roce_v1_modify_cq, .clear_hem = hns_roce_v1_clear_hem, .modify_qp = hns_roce_v1_modify_qp, .query_qp = hns_roce_v1_query_qp, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 50399282f3af..b0736c32e5d1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2994,6 +2994,47 @@ static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp) return 0; } +static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(cq->device); + struct hns_roce_v2_cq_context *cq_context; + struct hns_roce_cq *hr_cq = to_hr_cq(cq); + struct hns_roce_v2_cq_context *cqc_mask; + struct hns_roce_cmd_mailbox *mailbox; + int ret; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + cq_context = mailbox->buf; + cqc_mask = (struct hns_roce_v2_cq_context *)mailbox->buf + 1; + + memset(cqc_mask, 0xff, sizeof(*cqc_mask)); + + roce_set_field(cq_context->byte_56_cqe_period_maxcnt, + V2_CQC_BYTE_56_CQ_MAX_CNT_M, V2_CQC_BYTE_56_CQ_MAX_CNT_S, + cq_count); + roce_set_field(cqc_mask->byte_56_cqe_period_maxcnt, + V2_CQC_BYTE_56_CQ_MAX_CNT_M, V2_CQC_BYTE_56_CQ_MAX_CNT_S, + 0); + roce_set_field(cq_context->byte_56_cqe_period_maxcnt, + V2_CQC_BYTE_56_CQ_PERIOD_M, V2_CQC_BYTE_56_CQ_PERIOD_S, + cq_period); + roce_set_field(cqc_mask->byte_56_cqe_period_maxcnt, + V2_CQC_BYTE_56_CQ_PERIOD_M, V2_CQC_BYTE_56_CQ_PERIOD_S, + 0); + + ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_cq->cqn, 1, + HNS_ROCE_CMD_MODIFY_CQC, + HNS_ROCE_CMD_TIMEOUT_MSECS); + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + if (ret) + dev_err(hr_dev->dev, "MODIFY CQ Failed to cmd mailbox.\n"); + + return ret; +} + static const struct hns_roce_hw hns_roce_hw_v2 = { .cmq_init = hns_roce_v2_cmq_init, .cmq_exit = hns_roce_v2_cmq_exit, @@ -3009,6 +3050,7 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { .modify_qp = hns_roce_v2_modify_qp, .query_qp = hns_roce_v2_query_qp, .destroy_qp = hns_roce_v2_destroy_qp, + .modify_cq = hns_roce_v2_modify_cq, .post_send = hns_roce_v2_post_send, .post_recv = hns_roce_v2_post_recv, .req_notify_cq = hns_roce_v2_req_notify_cq, diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 3dcb2df20ba3..31120fd5e76b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -499,6 +499,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) /* CQ */ ib_dev->create_cq = hns_roce_ib_create_cq; + ib_dev->modify_cq = hr_dev->hw->modify_cq; ib_dev->destroy_cq = hns_roce_ib_destroy_cq; ib_dev->req_notify_cq = hr_dev->hw->req_notify_cq; ib_dev->poll_cq = hr_dev->hw->poll_cq; From b4f34597a5ce148b88a47da621037537c384d565 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Tue, 17 Oct 2017 18:01:12 +0300 Subject: [PATCH 208/296] IB/mlx5: Expose multi-packet RQ capabilities This patch reports the device's striding RQ capabilities to the user-space: - min/max_single_stride_log_num_of_bytes: Log of min/max number of bytes in a single stride. - min/max_single_wqe_log_num_of_strides: Log of min/max number of strides in a single WQE. - supported_qpts: A bit mask to know which QP types support multi- packet RQ, for now only Raw Packet QPs. Signed-off-by: Noa Osherovich Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 16 ++++++++++++++++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 5 +++++ include/uapi/rdma/mlx5-abi.h | 14 ++++++++++++++ 3 files changed, 35 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 552f7bd4ecc3..02da3f58f296 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -848,6 +848,22 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } } + if (field_avail(typeof(resp), striding_rq_caps, uhw->outlen)) { + resp.response_length += sizeof(resp.striding_rq_caps); + if (MLX5_CAP_GEN(mdev, striding_rq)) { + resp.striding_rq_caps.min_single_stride_log_num_of_bytes = + MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES; + resp.striding_rq_caps.max_single_stride_log_num_of_bytes = + MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES; + resp.striding_rq_caps.min_single_wqe_log_num_of_strides = + MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES; + resp.striding_rq_caps.max_single_wqe_log_num_of_strides = + MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES; + resp.striding_rq_caps.supported_qpts = + BIT(IB_QPT_RAW_PACKET); + } + } + if (uhw->outlen) { err = ib_copy_to_udata(uhw, &resp, resp.response_length); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 189e80cd6b2f..8de40852818b 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -256,6 +256,11 @@ enum mlx5_ib_wq_flags { MLX5_IB_WQ_FLAGS_DELAY_DROP = 0x1, }; +#define MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES 9 +#define MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES 16 +#define MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES 6 +#define MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES 13 + struct mlx5_ib_rwq { struct ib_wq ibwq; struct mlx5_core_qp core_qp; diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 1791bf123ba9..0832d9502200 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -190,6 +190,19 @@ struct mlx5_ib_sw_parsing_caps { __u32 supported_qpts; }; +struct mlx5_ib_striding_rq_caps { + __u32 min_single_stride_log_num_of_bytes; + __u32 max_single_stride_log_num_of_bytes; + __u32 min_single_wqe_log_num_of_strides; + __u32 max_single_wqe_log_num_of_strides; + + /* Corresponding bit will be set if qp type from + * 'enum ib_qp_type' is supported, e.g. + * supported_qpts |= 1 << IB_QPT_RAW_PACKET + */ + __u32 supported_qpts; +}; + struct mlx5_ib_query_device_resp { __u32 comp_mask; __u32 response_length; @@ -200,6 +213,7 @@ struct mlx5_ib_query_device_resp { __u32 mlx5_ib_support_multi_pkt_send_wqes; __u32 reserved; struct mlx5_ib_sw_parsing_caps sw_parsing_caps; + struct mlx5_ib_striding_rq_caps striding_rq_caps; }; struct mlx5_ib_create_cq { From ccc8708790273811db24676223b040710793cba7 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Tue, 17 Oct 2017 18:01:13 +0300 Subject: [PATCH 209/296] IB/mlx5: Allow creation of a multi-packet RQ Allow creation of a multi-packet receive queue. In order to create a multi-packet RQ, the following fields in the mlx5_ib_rwq should be set: - log_num_strides: Log of number of strides per WQE - single_stride_log_num_of_bytes: Log of a single stride size - two_byte_shift_en: When enabled, hardware pads 2 bytes of zeros before writing the message to memory (e.g. for the IP alignment). Signed-off-by: Noa Osherovich Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 4 +++ drivers/infiniband/hw/mlx5/qp.c | 52 +++++++++++++++++++++++----- include/linux/mlx5/mlx5_ifc.h | 1 + include/uapi/rdma/mlx5-abi.h | 8 ++++- 4 files changed, 56 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 8de40852818b..e7deaa08535b 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -254,6 +254,7 @@ struct mlx5_ib_wq { enum mlx5_ib_wq_flags { MLX5_IB_WQ_FLAGS_DELAY_DROP = 0x1, + MLX5_IB_WQ_FLAGS_STRIDING_RQ = 0x2, }; #define MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES 9 @@ -269,6 +270,9 @@ struct mlx5_ib_rwq { u32 log_rq_size; u32 rq_page_offset; u32 log_page_size; + u32 log_num_strides; + u32 two_byte_shift_en; + u32 single_stride_log_num_of_bytes; struct ib_umem *umem; size_t buf_size; unsigned int page_shift; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 37a0976240fd..d209c684d729 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4706,9 +4706,19 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); MLX5_SET(rqc, rqc, flush_in_error_en, 1); wq = MLX5_ADDR_OF(rqc, rqc, wq); - MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); + MLX5_SET(wq, wq, wq_type, + rwq->create_flags & MLX5_IB_WQ_FLAGS_STRIDING_RQ ? + MLX5_WQ_TYPE_CYCLIC_STRIDING_RQ : MLX5_WQ_TYPE_CYCLIC); MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); MLX5_SET(wq, wq, log_wq_stride, rwq->log_rq_stride); + if (rwq->create_flags & MLX5_IB_WQ_FLAGS_STRIDING_RQ) { + MLX5_SET(wq, wq, two_byte_shift_en, rwq->two_byte_shift_en); + MLX5_SET(wq, wq, log_wqe_stride_size, + rwq->single_stride_log_num_of_bytes - + MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES); + MLX5_SET(wq, wq, log_wqe_num_of_strides, rwq->log_num_strides - + MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES); + } MLX5_SET(wq, wq, log_wq_sz, rwq->log_rq_size); MLX5_SET(wq, wq, pd, to_mpd(pd)->pdn); MLX5_SET(wq, wq, page_offset, rwq->rq_page_offset); @@ -4790,7 +4800,8 @@ static int prepare_user_rq(struct ib_pd *pd, int err; size_t required_cmd_sz; - required_cmd_sz = offsetof(typeof(ucmd), reserved) + sizeof(ucmd.reserved); + required_cmd_sz = offsetof(typeof(ucmd), single_stride_log_num_of_bytes) + + sizeof(ucmd.single_stride_log_num_of_bytes); if (udata->inlen < required_cmd_sz) { mlx5_ib_dbg(dev, "invalid inlen\n"); return -EINVAL; @@ -4808,14 +4819,39 @@ static int prepare_user_rq(struct ib_pd *pd, return -EFAULT; } - if (ucmd.comp_mask) { + if (ucmd.comp_mask & (~MLX5_IB_CREATE_WQ_STRIDING_RQ)) { mlx5_ib_dbg(dev, "invalid comp mask\n"); return -EOPNOTSUPP; - } - - if (ucmd.reserved) { - mlx5_ib_dbg(dev, "invalid reserved\n"); - return -EOPNOTSUPP; + } else if (ucmd.comp_mask & MLX5_IB_CREATE_WQ_STRIDING_RQ) { + if (!MLX5_CAP_GEN(dev->mdev, striding_rq)) { + mlx5_ib_dbg(dev, "Striding RQ is not supported\n"); + return -EOPNOTSUPP; + } + if ((ucmd.single_stride_log_num_of_bytes < + MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES) || + (ucmd.single_stride_log_num_of_bytes > + MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES)) { + mlx5_ib_dbg(dev, "Invalid log stride size (%u. Range is %u - %u)\n", + ucmd.single_stride_log_num_of_bytes, + MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES, + MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES); + return -EINVAL; + } + if ((ucmd.single_wqe_log_num_of_strides > + MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES) || + (ucmd.single_wqe_log_num_of_strides < + MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES)) { + mlx5_ib_dbg(dev, "Invalid log num strides (%u. Range is %u - %u)\n", + ucmd.single_wqe_log_num_of_strides, + MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES, + MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES); + return -EINVAL; + } + rwq->single_stride_log_num_of_bytes = + ucmd.single_stride_log_num_of_bytes; + rwq->log_num_strides = ucmd.single_wqe_log_num_of_strides; + rwq->two_byte_shift_en = !!ucmd.two_byte_shift_en; + rwq->create_flags |= MLX5_IB_WQ_FLAGS_STRIDING_RQ; } err = set_user_rq_size(dev, init_attr, &ucmd, rwq); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 69772347f866..db655db45b77 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -744,6 +744,7 @@ enum { MLX5_WQ_TYPE_LINKED_LIST = 0x0, MLX5_WQ_TYPE_CYCLIC = 0x1, MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ = 0x2, + MLX5_WQ_TYPE_CYCLIC_STRIDING_RQ = 0x3, }; enum { diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 0832d9502200..b1d5b87ba3fd 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -308,6 +308,10 @@ struct mlx5_ib_alloc_mw { __u16 reserved2; }; +enum mlx5_ib_create_wq_mask { + MLX5_IB_CREATE_WQ_STRIDING_RQ = (1 << 0), +}; + struct mlx5_ib_create_wq { __u64 buf_addr; __u64 db_addr; @@ -316,7 +320,9 @@ struct mlx5_ib_create_wq { __u32 user_index; __u32 flags; __u32 comp_mask; - __u32 reserved; + __u32 single_stride_log_num_of_bytes; + __u32 single_wqe_log_num_of_strides; + __u32 two_byte_shift_en; }; struct mlx5_ib_create_ah_resp { From 0ff8e79ca7c81fafa3f5c91a1b58efc85cbc2302 Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Thu, 19 Oct 2017 08:25:51 +0300 Subject: [PATCH 210/296] IB/mlx5: Add 128B CQE compression and padding HW bits Adding new bits in mlx5_ifc_cmd_hca_cap to get the hardware capabilities for: - compression_128: Support 128B CQE compression - cqe_128_always: Support 128B CQE padding Signed-off-by: Guy Levi Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/mlx5_ifc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index db655db45b77..d49928c314ed 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1047,7 +1047,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 num_of_uars_per_page[0x20]; u8 reserved_at_540[0x40]; - u8 reserved_at_580[0x3f]; + u8 reserved_at_580[0x3d]; + u8 cqe_128_always[0x1]; + u8 cqe_compression_128[0x1]; u8 cqe_compression[0x1]; u8 cqe_compression_timeout[0x10]; From de57f2ad06d5bf01015b955600cbfc77059b2b6e Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Thu, 19 Oct 2017 08:25:52 +0300 Subject: [PATCH 211/296] IB/mlx5: Support 128B CQE compression feature In commit 1cbe6fc86ccf ("IB/mlx5: Add support for CQE compressing") the concept of CQE compression was introduced and added a support for 64B CQE size. This change update the code to support 128B CQE size as well. Signed-off-by: Guy Levi Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/cq.c | 6 ++++-- drivers/infiniband/hw/mlx5/main.c | 8 ++++++-- include/uapi/rdma/mlx5-abi.h | 7 ++++++- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index b8a116d0e063..51871f049c57 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -804,8 +804,10 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, *index = to_mucontext(context)->bfregi.sys_pages[0]; if (ucmd.cqe_comp_en == 1) { - if (unlikely((*cqe_size != 64) || - !MLX5_CAP_GEN(dev->mdev, cqe_compression))) { + if (!((*cqe_size == 128 && + MLX5_CAP_GEN(dev->mdev, cqe_compression_128)) || + (*cqe_size == 64 && + MLX5_CAP_GEN(dev->mdev, cqe_compression)))) { err = -EOPNOTSUPP; mlx5_ib_warn(dev, "CQE compression is not supported for size %d!\n", *cqe_size); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 02da3f58f296..b9337562aa90 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -824,8 +824,12 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes); } - if (field_avail(typeof(resp), reserved, uhw->outlen)) - resp.response_length += sizeof(resp.reserved); + if (field_avail(typeof(resp), flags, uhw->outlen)) { + resp.response_length += sizeof(resp.flags); + if (MLX5_CAP_GEN(mdev, cqe_compression_128)) + resp.flags |= + MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP; + } if (field_avail(typeof(resp), sw_parsing_caps, uhw->outlen)) { diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index b1d5b87ba3fd..a8fc1f0956d0 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -203,6 +203,11 @@ struct mlx5_ib_striding_rq_caps { __u32 supported_qpts; }; +enum mlx5_ib_query_dev_resp_flags { + /* Support 128B CQE compression */ + MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP = 1 << 0, +}; + struct mlx5_ib_query_device_resp { __u32 comp_mask; __u32 response_length; @@ -211,7 +216,7 @@ struct mlx5_ib_query_device_resp { struct mlx5_ib_cqe_comp_caps cqe_comp_caps; struct mlx5_packet_pacing_caps packet_pacing_caps; __u32 mlx5_ib_support_multi_pkt_send_wqes; - __u32 reserved; + __u32 flags; /* Use enum mlx5_ib_query_dev_resp_flags */ struct mlx5_ib_sw_parsing_caps sw_parsing_caps; struct mlx5_ib_striding_rq_caps striding_rq_caps; }; From 7a0c8f4244e9ec7a630563d294b211342b46223d Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Thu, 19 Oct 2017 08:25:53 +0300 Subject: [PATCH 212/296] IB/mlx5: Support padded 128B CQE feature In some benchmarks and some CPU architectures, writing the CQE on a full cache line size improves performance by saving memory access operations (read-modify-write) relative to partial cache line change. This patch lets the user to configure the device to pad the CQE up to 128B in case its content is less than 128B. Currently the driver supports only padding for a CQE size of 128B. Signed-off-by: Guy Levi Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/cq.c | 27 +++++++++++++++++++++++---- drivers/infiniband/hw/mlx5/main.c | 4 ++++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 5 +++++ include/linux/mlx5/cq.h | 6 ++++-- include/uapi/rdma/mlx5-abi.h | 7 ++++++- 5 files changed, 42 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 51871f049c57..01b218a3c277 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -754,13 +754,13 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, int err; ucmdlen = udata->inlen < sizeof(ucmd) ? - (sizeof(ucmd) - sizeof(ucmd.reserved)) : sizeof(ucmd); + (sizeof(ucmd) - sizeof(ucmd.flags)) : sizeof(ucmd); if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) return -EFAULT; if (ucmdlen == sizeof(ucmd) && - ucmd.reserved != 0) + (ucmd.flags & ~(MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD))) return -EINVAL; if (ucmd.cqe_size != 64 && ucmd.cqe_size != 128) @@ -830,6 +830,19 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, ilog2(ucmd.cqe_comp_res_format)); } + if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD) { + if (*cqe_size != 128 || + !MLX5_CAP_GEN(dev->mdev, cqe_128_always)) { + err = -EOPNOTSUPP; + mlx5_ib_warn(dev, + "CQE padding is not supported for CQE size of %dB!\n", + *cqe_size); + goto err_cqb; + } + + cq->private_flags |= MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD; + } + return 0; err_cqb: @@ -989,7 +1002,10 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, cq->cqe_size = cqe_size; cqc = MLX5_ADDR_OF(create_cq_in, cqb, cq_context); - MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size)); + MLX5_SET(cqc, cqc, cqe_sz, + cqe_sz_to_mlx_sz(cqe_size, + cq->private_flags & + MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD)); MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries)); MLX5_SET(cqc, cqc, uar_page, index); MLX5_SET(cqc, cqc, c_eqn, eqn); @@ -1339,7 +1355,10 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) MLX5_SET(cqc, cqc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT); - MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size)); + MLX5_SET(cqc, cqc, cqe_sz, + cqe_sz_to_mlx_sz(cqe_size, + cq->private_flags & + MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD)); MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries)); MLX5_SET(modify_cq_in, in, op_mod, MLX5_CQ_OPMOD_RESIZE); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index b9337562aa90..1edd41e3be1b 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -826,9 +826,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (field_avail(typeof(resp), flags, uhw->outlen)) { resp.response_length += sizeof(resp.flags); + if (MLX5_CAP_GEN(mdev, cqe_compression_128)) resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP; + + if (MLX5_CAP_GEN(mdev, cqe_128_always)) + resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD; } if (field_avail(typeof(resp), sw_parsing_caps, diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index e7deaa08535b..137f2116911f 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -444,6 +444,10 @@ struct mlx5_shared_mr_info { struct ib_umem *umem; }; +enum mlx5_ib_cq_pr_flags { + MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD = 1 << 0, +}; + struct mlx5_ib_cq { struct ib_cq ibcq; struct mlx5_core_cq mcq; @@ -466,6 +470,7 @@ struct mlx5_ib_cq { struct list_head wc_list; enum ib_cq_notify_flags notify_flags; struct work_struct notify_work; + u16 private_flags; /* Use mlx5_ib_cq_pr_flags */ }; struct mlx5_ib_wc { diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 95898847c7d4..cc718e245b1e 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -125,11 +125,13 @@ struct mlx5_cq_modify_params { enum { CQE_SIZE_64 = 0, CQE_SIZE_128 = 1, + CQE_SIZE_128_PAD = 2, }; -static inline int cqe_sz_to_mlx_sz(u8 size) +static inline int cqe_sz_to_mlx_sz(u8 size, int padding_128_en) { - return size == 64 ? CQE_SIZE_64 : CQE_SIZE_128; + return padding_128_en ? CQE_SIZE_128_PAD : + size == 64 ? CQE_SIZE_64 : CQE_SIZE_128; } static inline void mlx5_cq_set_ci(struct mlx5_core_cq *cq) diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index a8fc1f0956d0..201a60f032dd 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -206,6 +206,7 @@ struct mlx5_ib_striding_rq_caps { enum mlx5_ib_query_dev_resp_flags { /* Support 128B CQE compression */ MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP = 1 << 0, + MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD = 1 << 1, }; struct mlx5_ib_query_device_resp { @@ -221,13 +222,17 @@ struct mlx5_ib_query_device_resp { struct mlx5_ib_striding_rq_caps striding_rq_caps; }; +enum mlx5_ib_create_cq_flags { + MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD = 1 << 0, +}; + struct mlx5_ib_create_cq { __u64 buf_addr; __u64 db_addr; __u32 cqe_size; __u8 cqe_comp_en; __u8 cqe_comp_res_format; - __u16 reserved; /* explicit padding (optional on i386) */ + __u16 flags; }; struct mlx5_ib_create_cq_resp { From 4d350f1f89ee8ea84be1ef09717bf392fa5a3b45 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 19 Oct 2017 08:25:54 +0300 Subject: [PATCH 213/296] IB/mlx5: Update tunnel offloads bits This patch updates the mlx5_ifc with the following: - Fix tunnel_stateless_gre typo. - max_geneve_opt_len - Maximum geneve options length. - tunnel_stateless_geneve_rx - If set, receive Stateless Offloads for Geneve tunneled (inner) packets are supported. Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/mlx5_ifc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index d49928c314ed..d4c29c183d28 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -614,7 +614,9 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 swp[0x1]; u8 swp_csum[0x1]; u8 swp_lso[0x1]; - u8 reserved_at_23[0x1d]; + u8 reserved_at_23[0x1b]; + u8 max_geneve_opt_len[0x1]; + u8 tunnel_stateless_geneve_rx[0x1]; u8 reserved_at_40[0x10]; u8 lro_min_mss_size[0x10]; From f95ef6cbae61fa1dd563f5c0f6a0e5b512fda5ba Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 19 Oct 2017 08:25:55 +0300 Subject: [PATCH 214/296] IB/mlx5: Add tunneling offloads support The device can support receive Stateless Offloads for the inner packet's fields only when the packet is processed by TIR which is enabled to support tunneling. Otherwise, the device treats the packet as an ordinary non-tunneling packet and receive offloads can be done only for the outer packet's field. In order to enable receive Stateless Offloading support for incoming tunneling traffic the TIR should be created with tunneled_offload_en. Tunneling offloads is supported only be raw ethernet QP. This patch includes: * New QP creation flag for tunneling offloads. * Reports device capabilities. Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 14 ++++++++++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 +++ drivers/infiniband/hw/mlx5/qp.c | 39 ++++++++++++++++++++++++---- include/uapi/rdma/mlx5-abi.h | 11 +++++++- 4 files changed, 61 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 1edd41e3be1b..260f8be1d0ed 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -872,6 +872,20 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } } + if (field_avail(typeof(resp), tunnel_offloads_caps, + uhw->outlen)) { + resp.response_length += sizeof(resp.tunnel_offloads_caps); + if (MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan)) + resp.tunnel_offloads_caps |= + MLX5_IB_TUNNELED_OFFLOADS_VXLAN; + if (MLX5_CAP_ETH(mdev, tunnel_stateless_geneve_rx)) + resp.tunnel_offloads_caps |= + MLX5_IB_TUNNELED_OFFLOADS_GENEVE; + if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) + resp.tunnel_offloads_caps |= + MLX5_IB_TUNNELED_OFFLOADS_GRE; + } + if (uhw->outlen) { err = ib_copy_to_udata(uhw, &resp, resp.response_length); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 137f2116911f..0a328d6c6494 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -398,6 +398,7 @@ struct mlx5_ib_qp { struct list_head cq_send_list; u32 rate_limit; u32 underlay_qpn; + bool tunnel_offload_en; }; struct mlx5_ib_cq_buf { @@ -420,6 +421,8 @@ enum mlx5_ib_qp_flags { MLX5_IB_QP_RSS = 1 << 8, MLX5_IB_QP_CVLAN_STRIPPING = 1 << 9, MLX5_IB_QP_UNDERLAY = 1 << 10, + /* Reserved for PCI_WRITE_PAD = 1 << 11, */ + MLX5_IB_QP_TUNNEL_OFFLOAD = 1 << 12, }; struct mlx5_umr_wr { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index d209c684d729..53bb0d5cad3d 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1204,8 +1204,16 @@ static void destroy_raw_packet_qp_rq(struct mlx5_ib_dev *dev, mlx5_core_destroy_rq_tracked(dev->mdev, &rq->base.mqp); } +static bool tunnel_offload_supported(struct mlx5_core_dev *dev) +{ + return (MLX5_CAP_ETH(dev, tunnel_stateless_vxlan) || + MLX5_CAP_ETH(dev, tunnel_stateless_gre) || + MLX5_CAP_ETH(dev, tunnel_stateless_geneve_rx)); +} + static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev, - struct mlx5_ib_rq *rq, u32 tdn) + struct mlx5_ib_rq *rq, u32 tdn, + bool tunnel_offload_en) { u32 *in; void *tirc; @@ -1221,6 +1229,8 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev, MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT); MLX5_SET(tirc, tirc, inline_rqn, rq->base.mqp.qpn); MLX5_SET(tirc, tirc, transport_domain, tdn); + if (tunnel_offload_en) + MLX5_SET(tirc, tirc, tunneled_offload_en, 1); err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn); @@ -1271,7 +1281,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, goto err_destroy_sq; - err = create_raw_packet_qp_tir(dev, rq, tdn); + err = create_raw_packet_qp_tir(dev, rq, tdn, + qp->tunnel_offload_en); if (err) goto err_destroy_rq; } @@ -1358,7 +1369,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (udata->outlen < min_resp_len) return -EINVAL; - required_cmd_sz = offsetof(typeof(ucmd), reserved1) + sizeof(ucmd.reserved1); + required_cmd_sz = offsetof(typeof(ucmd), flags) + sizeof(ucmd.flags); if (udata->inlen < required_cmd_sz) { mlx5_ib_dbg(dev, "invalid inlen\n"); return -EINVAL; @@ -1381,8 +1392,14 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, return -EOPNOTSUPP; } - if (memchr_inv(ucmd.reserved, 0, sizeof(ucmd.reserved)) || ucmd.reserved1) { - mlx5_ib_dbg(dev, "invalid reserved\n"); + if (ucmd.flags & ~MLX5_QP_FLAG_TUNNEL_OFFLOADS) { + mlx5_ib_dbg(dev, "invalid flags\n"); + return -EOPNOTSUPP; + } + + if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS && + !tunnel_offload_supported(dev->mdev)) { + mlx5_ib_dbg(dev, "tunnel offloads isn't supported\n"); return -EOPNOTSUPP; } @@ -1405,6 +1422,10 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, MLX5_SET(tirc, tirc, transport_domain, tdn); hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); + + if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS) + MLX5_SET(tirc, tirc, tunneled_offload_en, 1); + switch (ucmd.rx_hash_function) { case MLX5_RX_HASH_FUNC_TOEPLITZ: { @@ -1604,6 +1625,14 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE); qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE); + if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS) { + if (init_attr->qp_type != IB_QPT_RAW_PACKET || + !tunnel_offload_supported(mdev)) { + mlx5_ib_dbg(dev, "Tunnel offload isn't supported\n"); + return -EOPNOTSUPP; + } + qp->tunnel_offload_en = true; + } if (init_attr->create_flags & IB_QP_CREATE_SOURCE_QPN) { if (init_attr->qp_type != IB_QPT_UD || diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 201a60f032dd..791655ec4aff 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -39,6 +39,7 @@ enum { MLX5_QP_FLAG_SIGNATURE = 1 << 0, MLX5_QP_FLAG_SCATTER_CQE = 1 << 1, + MLX5_QP_FLAG_TUNNEL_OFFLOADS = 1 << 2, }; enum { @@ -209,6 +210,12 @@ enum mlx5_ib_query_dev_resp_flags { MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD = 1 << 1, }; +enum mlx5_ib_tunnel_offloads { + MLX5_IB_TUNNELED_OFFLOADS_VXLAN = 1 << 0, + MLX5_IB_TUNNELED_OFFLOADS_GRE = 1 << 1, + MLX5_IB_TUNNELED_OFFLOADS_GENEVE = 1 << 2 +}; + struct mlx5_ib_query_device_resp { __u32 comp_mask; __u32 response_length; @@ -220,6 +227,8 @@ struct mlx5_ib_query_device_resp { __u32 flags; /* Use enum mlx5_ib_query_dev_resp_flags */ struct mlx5_ib_sw_parsing_caps sw_parsing_caps; struct mlx5_ib_striding_rq_caps striding_rq_caps; + __u32 tunnel_offloads_caps; /* enum mlx5_ib_tunnel_offloads */ + __u32 reserved; }; enum mlx5_ib_create_cq_flags { @@ -304,7 +313,7 @@ struct mlx5_ib_create_qp_rss { __u8 reserved[6]; __u8 rx_hash_key[128]; /* valid only for Toeplitz */ __u32 comp_mask; - __u32 reserved1; + __u32 flags; }; struct mlx5_ib_create_qp_resp { From 309fa3470fcaf96b295d2106ab17c00dbf7f3920 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 19 Oct 2017 08:25:56 +0300 Subject: [PATCH 215/296] IB/mlx5: Add support for RSS on the inner packet Some user space application would like to do RSS on the inner packet fields instead on the outer. When MLX5_RX_HASH_INNER is set with one or more of the other hash fields, then the RSS will be done using the inner packet. Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 11 +++++++++++ include/uapi/rdma/mlx5-abi.h | 4 +++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 53bb0d5cad3d..9e22dead259a 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1403,6 +1403,12 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, return -EOPNOTSUPP; } + if (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_INNER && + !(ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS)) { + mlx5_ib_dbg(dev, "Tunnel offloads must be set for inner RSS\n"); + return -EOPNOTSUPP; + } + err = ib_copy_to_udata(udata, &resp, min_resp_len); if (err) { mlx5_ib_dbg(dev, "copy failed\n"); @@ -1426,6 +1432,11 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS) MLX5_SET(tirc, tirc, tunneled_offload_en, 1); + if (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_INNER) + hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner); + else + hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); + switch (ucmd.rx_hash_function) { case MLX5_RX_HASH_FUNC_TOEPLITZ: { diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 791655ec4aff..442b46b74a3a 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -303,7 +303,9 @@ enum mlx5_rx_hash_fields { MLX5_RX_HASH_SRC_PORT_TCP = 1 << 4, MLX5_RX_HASH_DST_PORT_TCP = 1 << 5, MLX5_RX_HASH_SRC_PORT_UDP = 1 << 6, - MLX5_RX_HASH_DST_PORT_UDP = 1 << 7 + MLX5_RX_HASH_DST_PORT_UDP = 1 << 7, + /* Save bits for future fields */ + MLX5_RX_HASH_INNER = 1 << 31 }; struct mlx5_ib_create_qp_rss { From 5a3dc32372439eb9a0d6027c54cbfff64803fce5 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 19 Oct 2017 08:40:30 +0300 Subject: [PATCH 216/296] IB/cm: Fix memory corruption in handling CM request In recent code, two path record entries are alwasy cleared while allocated could be either one or two path record entries. This leads to zero out of unallocated memory. This fix initializes alternative path record only when alternative path is set. While we are at it, path record allocation doesn't check for OPA alternative path, but rest of the code checks for OPA alternative path. Path record allocation code doesn't check for OPA alternative LID. This can further lead to memory corruption when only one path record is allocated, but there is actually alternative OPA path record present in CM request. Cc: # v4.12+ Fixes: 9fdca4da4d8c ("IB/SA: Split struct sa_path_rec based on IB and ROCE specific fields") Signed-off-by: Parav Pandit Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/cm.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 5927ee4e57ca..f6b159d79977 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1573,7 +1573,7 @@ static void cm_format_req_event(struct cm_work *work, param->bth_pkey = cm_get_bth_pkey(work); param->port = cm_id_priv->av.port->port_num; param->primary_path = &work->path[0]; - if (req_msg->alt_local_lid) + if (cm_req_has_alt_path(req_msg)) param->alternate_path = &work->path[1]; else param->alternate_path = NULL; @@ -1854,7 +1854,8 @@ static int cm_req_handler(struct cm_work *work) cm_process_routed_req(req_msg, work->mad_recv_wc->wc); memset(&work->path[0], 0, sizeof(work->path[0])); - memset(&work->path[1], 0, sizeof(work->path[1])); + if (cm_req_has_alt_path(req_msg)) + memset(&work->path[1], 0, sizeof(work->path[1])); grh = rdma_ah_read_grh(&cm_id_priv->av.ah_attr); ret = ib_get_cached_gid(work->port->cm_dev->ib_device, work->port->port_num, @@ -3816,14 +3817,16 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent, struct cm_port *port = mad_agent->context; struct cm_work *work; enum ib_cm_event_type event; + bool alt_path = false; u16 attr_id; int paths = 0; int going_down = 0; switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) { case CM_REQ_ATTR_ID: - paths = 1 + (((struct cm_req_msg *) mad_recv_wc->recv_buf.mad)-> - alt_local_lid != 0); + alt_path = cm_req_has_alt_path((struct cm_req_msg *) + mad_recv_wc->recv_buf.mad); + paths = 1 + (alt_path != 0); event = IB_CM_REQ_RECEIVED; break; case CM_MRA_ATTR_ID: From 3bfbea747353c2b40b48784b70e782ca5352f18c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Oct 2017 03:28:27 -0700 Subject: [PATCH 217/296] IB/rxe: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Moni Shoua Cc: Doug Ledford Cc: Sean Hefty Cc: Hal Rosenstock Cc: linux-rdma@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_comp.c | 4 ++-- drivers/infiniband/sw/rxe/rxe_loc.h | 4 ++-- drivers/infiniband/sw/rxe/rxe_qp.c | 4 ++-- drivers/infiniband/sw/rxe/rxe_req.c | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index ff64875e3d15..6cdc40ed8a9f 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -136,9 +136,9 @@ static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode) } } -void retransmit_timer(unsigned long data) +void retransmit_timer(struct timer_list *t) { - struct rxe_qp *qp = (struct rxe_qp *)data; + struct rxe_qp *qp = from_timer(qp, t, retrans_timer); if (qp->valid) { qp->comp.timeout = 1; diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 77b3ed0df936..d7472a442a2c 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -218,8 +218,8 @@ static inline void rxe_advance_resp_resource(struct rxe_qp *qp) qp->resp.res_head = 0; } -void retransmit_timer(unsigned long data); -void rnr_nak_timer(unsigned long data); +void retransmit_timer(struct timer_list *t); +void rnr_nak_timer(struct timer_list *t); /* rxe_srq.c */ #define IB_SRQ_INIT_MASK (~IB_SRQ_LIMIT) diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 00bda9380a2e..4469592b839d 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -275,8 +275,8 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */ if (init->qp_type == IB_QPT_RC) { - setup_timer(&qp->rnr_nak_timer, rnr_nak_timer, (unsigned long)qp); - setup_timer(&qp->retrans_timer, retransmit_timer, (unsigned long)qp); + timer_setup(&qp->rnr_nak_timer, rnr_nak_timer, 0); + timer_setup(&qp->retrans_timer, retransmit_timer, 0); } return 0; } diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index d84222f9d5d2..26a7f923045b 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -118,9 +118,9 @@ static void req_retry(struct rxe_qp *qp) } } -void rnr_nak_timer(unsigned long data) +void rnr_nak_timer(struct timer_list *t) { - struct rxe_qp *qp = (struct rxe_qp *)data; + struct rxe_qp *qp = from_timer(qp, t, rnr_nak_timer); pr_debug("qp#%d rnr nak timer fired\n", qp_num(qp)); rxe_run_task(&qp->req.task, 1); From 35fb2a88ed4b77356fa679a8525c869a3594e287 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 25 Oct 2017 07:41:11 +0300 Subject: [PATCH 218/296] RDMA/cxgb4: Declare stag as __be32 The scqe.stag is actually __b32, fix it. drivers/infiniband/hw/cxgb4/cq.c:754:52: warning: cast to restricted __be32 Cc: Steve Wise Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/t4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 2b44fa850bbb..427aaf20d77c 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -171,7 +171,7 @@ struct t4_cqe { __be32 msn; } rcqe; struct { - u32 stag; + __be32 stag; u16 nada2; u16 cidx; } scqe; From 5455e73a76a397d77541bb5b0eedac137ef525b5 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Wed, 25 Oct 2017 09:12:30 +0530 Subject: [PATCH 219/296] bnxt_re: Implement the shutdown hook of the L2-RoCE driver interface When host is shutting down, it invokes the shutdown hook of the L2 driver where it would attempt to free the MSI-X vectors, but would fail because some vectors are held by the RoCE driver. Implement the new hook in the L2 -> RoCE interface which will be invoked so that the RoCE driver can unregister the device and free up the MSI-X vectors it had claimed so that L2 can proceed with it's shutdown without failure. Signed-off-by: Somnath Kotur Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/main.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 87f8a5d5d78f..aafc19aa5de1 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -78,6 +78,7 @@ static struct list_head bnxt_re_dev_list = LIST_HEAD_INIT(bnxt_re_dev_list); /* Mutex to protect the list of bnxt_re devices added */ static DEFINE_MUTEX(bnxt_re_dev_lock); static struct workqueue_struct *bnxt_re_wq; +static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev, bool lock_wait); /* for handling bnxt_en callbacks later */ static void bnxt_re_stop(void *p) @@ -92,11 +93,22 @@ static void bnxt_re_sriov_config(void *p, int num_vfs) { } +static void bnxt_re_shutdown(void *p) +{ + struct bnxt_re_dev *rdev = p; + + if (!rdev) + return; + + bnxt_re_ib_unreg(rdev, false); +} + static struct bnxt_ulp_ops bnxt_re_ulp_ops = { .ulp_async_notifier = NULL, .ulp_stop = bnxt_re_stop, .ulp_start = bnxt_re_start, - .ulp_sriov_config = bnxt_re_sriov_config + .ulp_sriov_config = bnxt_re_sriov_config, + .ulp_shutdown = bnxt_re_shutdown }; /* RoCE -> Net driver */ From 4061f3a4da4574b8c9f11a82c767aaaed3ef2aa9 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Mon, 23 Oct 2017 06:05:45 -0700 Subject: [PATCH 220/296] IB/hfi1: Race condition between user notification and driver state The handler for link init state (HLS_UP_INIT) notifies userspace (update_statusp()) before enabling the device (RCV_CTRL_RCV_PORT_ENABLE_SMASK) or setting the device state (ppd->host_link_state). This causes a race condition where the userspace thinks the interface is in the INIT state before the driver has set that state. Rework the code path to eliminate the race. Delay setting the init state until after a HW settling period. Reviewed-by: Sebastian Sanchez Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 27 ++++++++++++++++++--------- drivers/infiniband/hw/hfi1/intr.c | 14 +++++++++++++- 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 3afd149b8089..8dd0a4ded67b 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -10316,9 +10316,6 @@ static void force_logical_link_state_down(struct hfi1_pportdata *ppd) write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP, 0); write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK, 0); - /* adjust ppd->statusp, if needed */ - update_statusp(ppd, IB_PORT_DOWN); - dd_dev_info(ppd->dd, "logical state forced to LINK_DOWN\n"); } @@ -10400,6 +10397,7 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason) force_logical_link_state_down(ppd); ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */ + update_statusp(ppd, IB_PORT_DOWN); /* * The LNI has a mandatory wait time after the physical state @@ -10661,6 +10659,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) handle_linkup_change(dd, 1); ppd->host_link_state = HLS_UP_INIT; + update_statusp(ppd, IB_PORT_INIT); break; case HLS_UP_ARMED: if (ppd->host_link_state != HLS_UP_INIT) @@ -10682,6 +10681,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) break; } ppd->host_link_state = HLS_UP_ARMED; + update_statusp(ppd, IB_PORT_ARMED); /* * The simulator does not currently implement SMA messages, * so neighbor_normal is not set. Set it here when we first @@ -10704,6 +10704,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) /* tell all engines to go running */ sdma_all_running(dd); ppd->host_link_state = HLS_UP_ACTIVE; + update_statusp(ppd, IB_PORT_ACTIVE); /* Signal the IB layer that the port has went active */ event.device = &dd->verbs_dev.rdi.ibdev; @@ -12717,6 +12718,17 @@ const char *opa_pstate_name(u32 pstate) return "unknown"; } +/** + * update_statusp - Update userspace status flag + * @ppd: Port data structure + * @state: port state information + * + * Actual port status is determined by the host_link_state value + * in the ppd. + * + * host_link_state MUST be updated before updating the user space + * statusp. + */ static void update_statusp(struct hfi1_pportdata *ppd, u32 state) { /* @@ -12742,9 +12754,11 @@ static void update_statusp(struct hfi1_pportdata *ppd, u32 state) break; } } + dd_dev_info(ppd->dd, "logical state changed to %s (0x%x)\n", + opa_lstate_name(state), state); } -/* +/** * wait_logical_linkstate - wait for an IB link state change to occur * @ppd: port device * @state: the state to wait for @@ -12775,11 +12789,6 @@ static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state, msleep(20); } - update_statusp(ppd, state); - dd_dev_info(ppd->dd, - "logical state changed to %s (0x%x)\n", - opa_lstate_name(state), - state); return 0; } diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c index 96845dfed5c5..3e3184ddab5b 100644 --- a/drivers/infiniband/hw/hfi1/intr.c +++ b/drivers/infiniband/hw/hfi1/intr.c @@ -53,6 +53,8 @@ #include "common.h" #include "sdma.h" +#define LINK_UP_DELAY 500 /* in microseconds */ + /** * format_hwmsg - format a single hwerror message * @msg message buffer @@ -102,9 +104,16 @@ static void signal_ib_event(struct hfi1_pportdata *ppd, enum ib_event_type ev) ib_dispatch_event(&event); } -/* +/** + * handle_linkup_change - finish linkup/down state changes + * @dd: valid device + * @linkup: link state information + * * Handle a linkup or link down notification. + * The HW needs time to finish its link up state change. Give it that chance. + * * This is called outside an interrupt. + * */ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup) { @@ -151,6 +160,9 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup) ppd->neighbor_guid, ppd->neighbor_type, ppd->neighbor_port_number); + /* HW needs LINK_UP_DELAY to settle, give it that chance */ + udelay(LINK_UP_DELAY); + /* physical link went up */ ppd->linkup = 1; ppd->offline_disabled_reason = From 00f9203119dd2774564407c7a67b17d81916298b Mon Sep 17 00:00:00 2001 From: Patel Jay P Date: Mon, 23 Oct 2017 06:05:53 -0700 Subject: [PATCH 221/296] Ib/hfi1: Return actual operational VLs in port info query __subn_get_opa_portinfo stores value returned by hfi1_get_ib_cfg() as operational vls. hfi1_get_ib_cfg() returns vls_operational field in hfi1_pportdata. The problem with this is that the value is always equal to vls_supported field in hfi1_pportdata. The logic to calculate operational_vls is to set value passed by FM (in __subn_set_opa_portinfo routine). If no value is passed then default value is stored in operational_vls. Field actual_vls_operational is calculated on the basis of buffer control table. Hence, modifying hfi1_get_ib_cfg() to return actual_operational_vls when used with HFI1_IB_CFG_OP_VLS parameter Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Patel Jay P Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 8dd0a4ded67b..b69b85e7a244 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -9966,7 +9966,7 @@ int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which) goto unimplemented; case HFI1_IB_CFG_OP_VLS: - val = ppd->vls_operational; + val = ppd->actual_vls_operational; break; case HFI1_IB_CFG_VL_HIGH_CAP: /* VL arb high priority table size */ val = VL_ARB_HIGH_PRIO_TABLE_SIZE; From 406310c66d95781e58e29a36c3d1528248aaf941 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Wed, 25 Oct 2017 08:15:18 -0700 Subject: [PATCH 222/296] IB/hfi1: Validate PKEY for incoming GSI MAD packets These are the use-cases where the pkey needs to be tested to see if a packet needs to be dropped. a) Check if pkey is not FULL_MGMT_P_KEY or LIM_MGMT_P_KEY, drop the packet as it's not part of the management partition. Self-originated packets are an exception. b) If pkey index points to FULL_MGMT_P_KEY and LIM_MGMT_P_KEY is in the table, the packet is coming from a management node, and the receiving node is also a management node, so it is safe for the packet to go through. c) If pkey index points to FULL_MGMT_P_KEY and LIM_MGMT_P_KEY is NOT in the table, drop the packet as LIM_MGMT_P_KEY should always be in the pkey table. It could be a misconfiguration. d) If pkey index points to LIM_MGMT_P_KEY and FULL_MGMT_P_KEY is NOT in the table, it is safe for the packet to go through since a non-management node is talking to another non-managment node. e) If pkey index points to LIM_MGMT_P_KEY and FULL_MGMT_P_KEY is in the table, drop the packet because a non-management node is talking to a management node, and it could be an attack. For the implementation, these rules can be simplied to only checking for (a) and (e). There's no need to check for rule (b) as the packet doesn't need to be dropped. Rule (c) is not possible in the driver as LIM_MGMT_P_KEY is always in the pkey table. Reviewed-by: Michael J. Ruhl Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/mad.c | 86 +++++++++++++++++++++++++++++++- 1 file changed, 84 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 6f7b2c086c5a..e648f589bade 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -98,6 +98,16 @@ static inline void clear_opa_smp_data(struct opa_smp *smp) memset(data, 0, size); } +static u16 hfi1_lookup_pkey_value(struct hfi1_ibport *ibp, int pkey_idx) +{ + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + + if (pkey_idx < ARRAY_SIZE(ppd->pkeys)) + return ppd->pkeys[pkey_idx]; + + return 0; +} + void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port) { struct ib_event event; @@ -4259,6 +4269,18 @@ void clear_linkup_counters(struct hfi1_devdata *dd) dd->err_info_xmit_constraint.status &= ~OPA_EI_STATUS_SMASK; } +static int is_full_mgmt_pkey_in_table(struct hfi1_ibport *ibp) +{ + unsigned int i; + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + + for (i = 0; i < ARRAY_SIZE(ppd->pkeys); ++i) + if (ppd->pkeys[i] == FULL_MGMT_P_KEY) + return 1; + + return 0; +} + /* * is_local_mad() returns 1 if 'mad' is sent from, and destined to the * local node, 0 otherwise. @@ -4326,6 +4348,63 @@ static int opa_local_smp_check(struct hfi1_ibport *ibp, return 1; } +/** + * hfi1_pkey_validation_pma - It validates PKEYs for incoming PMA MAD packets. + * @ibp: IB port data + * @in_mad: MAD packet with header and data + * @in_wc: Work completion data such as source LID, port number, etc. + * + * These are all the possible logic rules for validating a pkey: + * + * a) If pkey neither FULL_MGMT_P_KEY nor LIM_MGMT_P_KEY, + * and NOT self-originated packet: + * Drop MAD packet as it should always be part of the + * management partition unless it's a self-originated packet. + * + * b) If pkey_index -> FULL_MGMT_P_KEY, and LIM_MGMT_P_KEY in pkey table: + * The packet is coming from a management node and the receiving node + * is also a management node, so it is safe for the packet to go through. + * + * c) If pkey_index -> FULL_MGMT_P_KEY, and LIM_MGMT_P_KEY is NOT in pkey table: + * Drop the packet as LIM_MGMT_P_KEY should always be in the pkey table. + * It could be an FM misconfiguration. + * + * d) If pkey_index -> LIM_MGMT_P_KEY and FULL_MGMT_P_KEY is NOT in pkey table: + * It is safe for the packet to go through since a non-management node is + * talking to another non-management node. + * + * e) If pkey_index -> LIM_MGMT_P_KEY and FULL_MGMT_P_KEY in pkey table: + * Drop the packet because a non-management node is talking to a + * management node, and it could be an attack. + * + * For the implementation, these rules can be simplied to only checking + * for (a) and (e). There's no need to check for rule (b) as + * the packet doesn't need to be dropped. Rule (c) is not possible in + * the driver as LIM_MGMT_P_KEY is always in the pkey table. + * + * Return: + * 0 - pkey is okay, -EINVAL it's a bad pkey + */ +static int hfi1_pkey_validation_pma(struct hfi1_ibport *ibp, + const struct opa_mad *in_mad, + const struct ib_wc *in_wc) +{ + u16 pkey_value = hfi1_lookup_pkey_value(ibp, in_wc->pkey_index); + + /* Rule (a) from above */ + if (!is_local_mad(ibp, in_mad, in_wc) && + pkey_value != LIM_MGMT_P_KEY && + pkey_value != FULL_MGMT_P_KEY) + return -EINVAL; + + /* Rule (e) from above */ + if (pkey_value == LIM_MGMT_P_KEY && + is_full_mgmt_pkey_in_table(ibp)) + return -EINVAL; + + return 0; +} + static int process_subn_opa(struct ib_device *ibdev, int mad_flags, u8 port, const struct opa_mad *in_mad, struct opa_mad *out_mad, @@ -4665,8 +4744,11 @@ static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags, out_mad, &resp_len); goto bail; case IB_MGMT_CLASS_PERF_MGMT: - ret = process_perf_opa(ibdev, port, in_mad, out_mad, - &resp_len); + ret = hfi1_pkey_validation_pma(ibp, in_mad, in_wc); + if (ret) + return IB_MAD_RESULT_FAILURE; + + ret = process_perf_opa(ibdev, port, in_mad, out_mad, &resp_len); goto bail; default: From 1b311f8931cfe5b678c43904cb6f838489afdc0f Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 23 Oct 2017 06:06:08 -0700 Subject: [PATCH 223/296] IB/hfi1: Add tx_opcode_stats like the opcode_stats This patch adds tx_opcode_stats to parallel the (rx)opcode_stats in the debugfs. Reviewed-by: Kaike Wan Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/debugfs.c | 56 ++++++++++++++++++++++++---- drivers/infiniband/hw/hfi1/hfi.h | 6 +-- drivers/infiniband/hw/hfi1/init.c | 7 ++++ drivers/infiniband/hw/hfi1/verbs.c | 24 ++++++++++++ 4 files changed, 83 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index 24128f4e5748..76157cc03eca 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -165,6 +165,17 @@ static void _opcode_stats_seq_stop(struct seq_file *s, void *v) { } +static int opcode_stats_show(struct seq_file *s, u8 i, u64 packets, u64 bytes) +{ + if (!packets && !bytes) + return SEQ_SKIP; + seq_printf(s, "%02x %llu/%llu\n", i, + (unsigned long long)packets, + (unsigned long long)bytes); + + return 0; +} + static int _opcode_stats_seq_show(struct seq_file *s, void *v) { loff_t *spos = v; @@ -182,19 +193,49 @@ static int _opcode_stats_seq_show(struct seq_file *s, void *v) } hfi1_rcd_put(rcd); } - if (!n_packets && !n_bytes) - return SEQ_SKIP; - seq_printf(s, "%02llx %llu/%llu\n", i, - (unsigned long long)n_packets, - (unsigned long long)n_bytes); - - return 0; + return opcode_stats_show(s, i, n_packets, n_bytes); } DEBUGFS_SEQ_FILE_OPS(opcode_stats); DEBUGFS_SEQ_FILE_OPEN(opcode_stats) DEBUGFS_FILE_OPS(opcode_stats); +static void *_tx_opcode_stats_seq_start(struct seq_file *s, loff_t *pos) +{ + return _opcode_stats_seq_start(s, pos); +} + +static void *_tx_opcode_stats_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + return _opcode_stats_seq_next(s, v, pos); +} + +static void _tx_opcode_stats_seq_stop(struct seq_file *s, void *v) +{ +} + +static int _tx_opcode_stats_seq_show(struct seq_file *s, void *v) +{ + loff_t *spos = v; + loff_t i = *spos; + int j; + u64 n_packets = 0, n_bytes = 0; + struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; + struct hfi1_devdata *dd = dd_from_dev(ibd); + + for_each_possible_cpu(j) { + struct hfi1_opcode_stats_perctx *s = + per_cpu_ptr(dd->tx_opstats, j); + n_packets += s->stats[i].n_packets; + n_bytes += s->stats[i].n_bytes; + } + return opcode_stats_show(s, i, n_packets, n_bytes); +} + +DEBUGFS_SEQ_FILE_OPS(tx_opcode_stats); +DEBUGFS_SEQ_FILE_OPEN(tx_opcode_stats) +DEBUGFS_FILE_OPS(tx_opcode_stats); + static void *_ctx_stats_seq_start(struct seq_file *s, loff_t *pos) { struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; @@ -1363,6 +1404,7 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) return; } DEBUGFS_SEQ_FILE_CREATE(opcode_stats, ibd->hfi1_ibdev_dbg, ibd); + DEBUGFS_SEQ_FILE_CREATE(tx_opcode_stats, ibd->hfi1_ibdev_dbg, ibd); DEBUGFS_SEQ_FILE_CREATE(ctx_stats, ibd->hfi1_ibdev_dbg, ibd); DEBUGFS_SEQ_FILE_CREATE(qp_stats, ibd->hfi1_ibdev_dbg, ibd); DEBUGFS_SEQ_FILE_CREATE(sdes, ibd->hfi1_ibdev_dbg, ibd); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 854b64e5976d..08d394f384c6 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -164,9 +164,7 @@ extern const struct pci_error_handlers hfi1_pci_err_handler; * Below contains all data related to a single context (formerly called port). */ -#ifdef CONFIG_DEBUG_FS struct hfi1_opcode_stats_perctx; -#endif struct ctxt_eager_bufs { ssize_t size; /* total size of eager buffers */ @@ -283,7 +281,7 @@ struct hfi1_ctxtdata { u64 imask; /* clear interrupt mask */ int ireg; /* clear interrupt register */ unsigned numa_id; /* numa node of this context */ - /* verbs stats per CTX */ + /* verbs rx_stats per rcd */ struct hfi1_opcode_stats_perctx *opstats; /* Is ASPM interrupt supported for this context */ @@ -1276,6 +1274,8 @@ struct hfi1_devdata { /* receive context data */ struct hfi1_ctxtdata **rcd; u64 __percpu *int_counter; + /* verbs tx opcode stats */ + struct hfi1_opcode_stats_perctx __percpu *tx_opstats; /* device (not port) flags, basically device capabilities */ u16 flags; /* Number of physical ports available */ diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index b5c2efce73f2..f29e7a327f29 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1217,6 +1217,7 @@ static void __hfi1_free_devdata(struct kobject *kobj) free_percpu(dd->int_counter); free_percpu(dd->rcv_limit); free_percpu(dd->send_schedule); + free_percpu(dd->tx_opstats); rvt_dealloc_device(&dd->verbs_dev.rdi); } @@ -1306,6 +1307,12 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) goto bail; } + dd->tx_opstats = alloc_percpu(struct hfi1_opcode_stats_perctx); + if (!dd->tx_opstats) { + ret = -ENOMEM; + goto bail; + } + kobject_init(&dd->kobj, &hfi1_devdata_type); return dd; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 8f6666bb1c2c..a38785e224cc 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -796,6 +796,27 @@ static noinline int build_verbs_ulp_payload( return ret; } +/** + * update_tx_opstats - record stats by opcode + * @qp; the qp + * @ps: transmit packet state + * @plen: the plen in dwords + * + * This is a routine to record the tx opstats after a + * packet has been presented to the egress mechanism. + */ +static void update_tx_opstats(struct rvt_qp *qp, struct hfi1_pkt_state *ps, + u32 plen) +{ +#ifdef CONFIG_DEBUG_FS + struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); + struct hfi1_opcode_stats_perctx *s = get_cpu_ptr(dd->tx_opstats); + + inc_opstats(plen * 4, &s->stats[ps->opcode]); + put_cpu_ptr(s); +#endif +} + /* * Build the number of DMA descriptors needed to send length bytes of data. * @@ -934,6 +955,8 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, goto bail_ecomm; return ret; } + + update_tx_opstats(qp, ps, plen); trace_sdma_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &ps->s_txreq->phdr.hdr, ib_is_sc5(sc5)); return ret; @@ -1127,6 +1150,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, seg_pio_copy_end(pbuf); } + update_tx_opstats(qp, ps, plen); trace_pio_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &ps->s_txreq->phdr.hdr, ib_is_sc5(sc5)); From 2d9544aacf9e6bdfd4fed44207f815fc7b4d9fc1 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 23 Oct 2017 06:06:16 -0700 Subject: [PATCH 224/296] IB/hfi1: Insure int mask for in-kernel receive contexts is clear The only use for the urg interrupt is for priority PSM packets. There is no reason for this interrupt to be enabled for kernel contexts. Reviewed-by: Kaike Wan Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 35 ++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index b69b85e7a244..0d12dafd619c 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -12935,6 +12935,32 @@ int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp) return ret; } +/** + * get_int_mask - get 64 bit int mask + * @dd - the devdata + * @i - the csr (relative to CCE_INT_MASK) + * + * Returns the mask with the urgent interrupt mask + * bit clear for kernel receive contexts. + */ +static u64 get_int_mask(struct hfi1_devdata *dd, u32 i) +{ + u64 mask = U64_MAX; /* default to no change */ + + if (i >= (IS_RCVURGENT_START / 64) && i < (IS_RCVURGENT_END / 64)) { + int j = (i - (IS_RCVURGENT_START / 64)) * 64; + int k = !j ? IS_RCVURGENT_START % 64 : 0; + + if (j) + j -= IS_RCVURGENT_START % 64; + /* j = 0..dd->first_dyn_alloc_ctxt - 1,k = 0..63 */ + for (; j < dd->first_dyn_alloc_ctxt && k < 64; j++, k++) + /* convert to bit in mask and clear */ + mask &= ~BIT_ULL(k); + } + return mask; +} + /* ========================================================================= */ /* @@ -12948,9 +12974,12 @@ void set_intr_state(struct hfi1_devdata *dd, u32 enable) * In HFI, the mask needs to be 1 to allow interrupts. */ if (enable) { - /* enable all interrupts */ - for (i = 0; i < CCE_NUM_INT_CSRS; i++) - write_csr(dd, CCE_INT_MASK + (8 * i), ~(u64)0); + /* enable all interrupts but urgent on kernel contexts */ + for (i = 0; i < CCE_NUM_INT_CSRS; i++) { + u64 mask = get_int_mask(dd, i); + + write_csr(dd, CCE_INT_MASK + (8 * i), mask); + } init_qsfp_int(dd); } else { From 45a041cce7c4b393e6ee242b7f090393dd1bf9ac Mon Sep 17 00:00:00 2001 From: Kamenee Arumugam Date: Mon, 23 Oct 2017 06:06:24 -0700 Subject: [PATCH 225/296] IB/hfi1: Don't modify num_user_contexts module parameter The driver parameter num_user_contexts controls global behavior and should not be modified by the driver. This patch eliminates modification of num_user_contexts by using a local variable to keep track of the value. Reviewed-by: Michael J. Ruhl Signed-off-by: Kamenee Arumugam Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 0d12dafd619c..05e03a05fcc9 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -13406,6 +13406,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd) unsigned ngroups; int qos_rmt_count; int user_rmt_reduced; + u32 n_usr_ctxts; /* * Kernel receive contexts: @@ -13448,42 +13449,42 @@ static int set_up_context_variables(struct hfi1_devdata *dd) * num_user_contexts is negative */ if (num_user_contexts < 0) - num_user_contexts = - cpumask_weight(&node_affinity.real_cpu_mask); - + n_usr_ctxts = cpumask_weight(&node_affinity.real_cpu_mask); + else + n_usr_ctxts = num_user_contexts; /* * Adjust the counts given a global max. */ - if (total_contexts + num_user_contexts > dd->chip_rcv_contexts) { + if (total_contexts + n_usr_ctxts > dd->chip_rcv_contexts) { dd_dev_err(dd, - "Reducing # user receive contexts to: %d, from %d\n", + "Reducing # user receive contexts to: %d, from %u\n", (int)(dd->chip_rcv_contexts - total_contexts), - (int)num_user_contexts); + n_usr_ctxts); /* recalculate */ - num_user_contexts = dd->chip_rcv_contexts - total_contexts; + n_usr_ctxts = dd->chip_rcv_contexts - total_contexts; } /* each user context requires an entry in the RMT */ qos_rmt_count = qos_rmt_entries(dd, NULL, NULL); - if (qos_rmt_count + num_user_contexts > NUM_MAP_ENTRIES) { + if (qos_rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) { user_rmt_reduced = NUM_MAP_ENTRIES - qos_rmt_count; dd_dev_err(dd, - "RMT size is reducing the number of user receive contexts from %d to %d\n", - (int)num_user_contexts, + "RMT size is reducing the number of user receive contexts from %u to %d\n", + n_usr_ctxts, user_rmt_reduced); /* recalculate */ - num_user_contexts = user_rmt_reduced; + n_usr_ctxts = user_rmt_reduced; } - total_contexts += num_user_contexts; + total_contexts += n_usr_ctxts; /* the first N are kernel contexts, the rest are user/vnic contexts */ dd->num_rcv_contexts = total_contexts; dd->n_krcv_queues = num_kernel_contexts; dd->first_dyn_alloc_ctxt = num_kernel_contexts; dd->num_vnic_contexts = num_vnic_contexts; - dd->num_user_contexts = num_user_contexts; - dd->freectxts = num_user_contexts; + dd->num_user_contexts = n_usr_ctxts; + dd->freectxts = n_usr_ctxts; dd_dev_info(dd, "rcv contexts: chip %d, used %d (kernel %d, vnic %u, user %u)\n", (int)dd->chip_rcv_contexts, From 31acd18b61a48bf9225f106a6ff74774583ee375 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 23 Oct 2017 06:06:32 -0700 Subject: [PATCH 226/296] IB/hfi1: Take advantage of kvzalloc_node in sdma initialization The code that allocates the tx ring in the sdma code fails to take advantage of kvzalloc variations. Fix by converting to use kvzalloc_node. Reported-by: Leon Romanovsky Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/sdma.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 78716b829a28..6a1b8e479616 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1465,13 +1465,8 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) if (!sde->descq) goto bail; sde->tx_ring = - kcalloc(descq_cnt, sizeof(struct sdma_txreq *), - GFP_KERNEL); - if (!sde->tx_ring) - sde->tx_ring = - vzalloc( - sizeof(struct sdma_txreq *) * - descq_cnt); + kvzalloc_node(sizeof(struct sdma_txreq *) * descq_cnt, + GFP_KERNEL, dd->node); if (!sde->tx_ring) goto bail; } From a2c80b7b41194417b597d4c96c9892e4355f57a4 Mon Sep 17 00:00:00 2001 From: "Wei Hu(Xavier)" Date: Thu, 26 Oct 2017 17:10:23 +0800 Subject: [PATCH 227/296] RDMA/hns: Add rereg mr support for hip08 This patch adds rereg mr support for hip08. Signed-off-by: Shaobo Xu Signed-off-by: Wei Hu (Xavier) Signed-off-by: Lijun Ou Signed-off-by: Yixian Liu Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_cmd.h | 3 + drivers/infiniband/hw/hns/hns_roce_device.h | 12 ++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 53 +++++++++ drivers/infiniband/hw/hns/hns_roce_main.c | 4 + drivers/infiniband/hw/hns/hns_roce_mr.c | 123 ++++++++++++++++++++ 5 files changed, 195 insertions(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h index f0039a7166d8..b1c94223c28b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.h +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h @@ -75,6 +75,9 @@ enum { HNS_ROCE_CMD_DESTROY_MPT_BT1 = 0x29, HNS_ROCE_CMD_DESTROY_MPT_BT2 = 0x2a, + /* MPT commands */ + HNS_ROCE_CMD_QUERY_MPT = 0x62, + /* SRQC BT commands */ HNS_ROCE_CMD_WRITE_SRQC_BT0 = 0x30, HNS_ROCE_CMD_WRITE_SRQC_BT1 = 0x31, diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 31c7ab8fcdbe..7b3f4440cd5c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -170,6 +170,10 @@ enum { HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE = 0x07, }; +enum { + HNS_ROCE_CAP_FLAG_REREG_MR = BIT(0), +}; + enum hns_roce_mtt_type { MTT_TYPE_WQE, MTT_TYPE_CQE, @@ -567,6 +571,7 @@ struct hns_roce_caps { u32 cqe_buf_pg_sz; u32 cqe_hop_num; u32 chunk_sz; /* chunk size in non multihop mode*/ + u64 flags; }; struct hns_roce_hw { @@ -587,6 +592,10 @@ struct hns_roce_hw { enum ib_mtu mtu); int (*write_mtpt)(void *mb_buf, struct hns_roce_mr *mr, unsigned long mtpt_idx); + int (*rereg_write_mtpt)(struct hns_roce_dev *hr_dev, + struct hns_roce_mr *mr, int flags, u32 pdn, + int mr_access_flags, u64 iova, u64 size, + void *mb_buf); void (*write_cqc)(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts, dma_addr_t dma_handle, int nent, u32 vector); @@ -783,6 +792,9 @@ struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); +int hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, u64 length, + u64 virt_addr, int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata); int hns_roce_dereg_mr(struct ib_mr *ibmr); int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev, struct hns_roce_cmd_mailbox *mailbox, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index b0736c32e5d1..7e24e1f1c2b1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -945,6 +945,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->cqe_hop_num = HNS_ROCE_CQE_HOP_NUM; caps->chunk_sz = HNS_ROCE_V2_TABLE_CHUNK_SIZE; + caps->flags = HNS_ROCE_CAP_FLAG_REREG_MR; caps->pkey_table_len[0] = 1; caps->gid_table_len[0] = 2; caps->local_ca_ack_delay = 0; @@ -1183,6 +1184,57 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, return 0; } +static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev, + struct hns_roce_mr *mr, int flags, + u32 pdn, int mr_access_flags, u64 iova, + u64 size, void *mb_buf) +{ + struct hns_roce_v2_mpt_entry *mpt_entry = mb_buf; + + if (flags & IB_MR_REREG_PD) { + roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, + V2_MPT_BYTE_4_PD_S, pdn); + mr->pd = pdn; + } + + if (flags & IB_MR_REREG_ACCESS) { + roce_set_bit(mpt_entry->byte_8_mw_cnt_en, + V2_MPT_BYTE_8_BIND_EN_S, + (mr_access_flags & IB_ACCESS_MW_BIND ? 1 : 0)); + roce_set_bit(mpt_entry->byte_8_mw_cnt_en, + V2_MPT_BYTE_8_ATOMIC_EN_S, + (mr_access_flags & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0)); + roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RR_EN_S, + (mr_access_flags & IB_ACCESS_REMOTE_READ ? 1 : 0)); + roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RW_EN_S, + (mr_access_flags & IB_ACCESS_REMOTE_WRITE ? 1 : 0)); + roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_LW_EN_S, + (mr_access_flags & IB_ACCESS_LOCAL_WRITE ? 1 : 0)); + } + + if (flags & IB_MR_REREG_TRANS) { + mpt_entry->va_l = cpu_to_le32(lower_32_bits(iova)); + mpt_entry->va_h = cpu_to_le32(upper_32_bits(iova)); + mpt_entry->len_l = cpu_to_le32(lower_32_bits(size)); + mpt_entry->len_h = cpu_to_le32(upper_32_bits(size)); + + mpt_entry->pbl_size = cpu_to_le32(mr->pbl_size); + mpt_entry->pbl_ba_l = + cpu_to_le32(lower_32_bits(mr->pbl_ba >> 3)); + roce_set_field(mpt_entry->byte_48_mode_ba, + V2_MPT_BYTE_48_PBL_BA_H_M, + V2_MPT_BYTE_48_PBL_BA_H_S, + upper_32_bits(mr->pbl_ba >> 3)); + mpt_entry->byte_48_mode_ba = + cpu_to_le32(mpt_entry->byte_48_mode_ba); + + mr->iova = iova; + mr->size = size; + } + + return 0; +} + static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n) { return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf, @@ -3044,6 +3096,7 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { .set_gid = hns_roce_v2_set_gid, .set_mac = hns_roce_v2_set_mac, .write_mtpt = hns_roce_v2_write_mtpt, + .rereg_write_mtpt = hns_roce_v2_rereg_write_mtpt, .write_cqc = hns_roce_v2_write_cqc, .set_hem = hns_roce_v2_set_hem, .clear_hem = hns_roce_v2_clear_hem, diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 31120fd5e76b..63a2f3b2b964 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -508,6 +508,10 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) ib_dev->get_dma_mr = hns_roce_get_dma_mr; ib_dev->reg_user_mr = hns_roce_reg_user_mr; ib_dev->dereg_mr = hns_roce_dereg_mr; + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_REREG_MR) { + ib_dev->rereg_user_mr = hns_roce_rereg_user_mr; + ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_REREG_MR); + } /* OTHERS */ ib_dev->get_port_immutable = hns_roce_port_immutable; diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index c47a5ee5db17..da86a8117bd5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -1065,6 +1065,129 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return ERR_PTR(ret); } +int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, + u64 virt_addr, int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); + struct hns_roce_mr *mr = to_hr_mr(ibmr); + struct hns_roce_cmd_mailbox *mailbox; + struct device *dev = hr_dev->dev; + unsigned long mtpt_idx; + u32 pdn = 0; + int npages; + int ret; + + if (!mr->enabled) + return -EINVAL; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + mtpt_idx = key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1); + ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, mtpt_idx, 0, + HNS_ROCE_CMD_QUERY_MPT, + HNS_ROCE_CMD_TIMEOUT_MSECS); + if (ret) + goto free_cmd_mbox; + + ret = hns_roce_hw2sw_mpt(hr_dev, NULL, mtpt_idx); + if (ret) + dev_warn(dev, "HW2SW_MPT failed (%d)\n", ret); + + mr->enabled = 0; + + if (flags & IB_MR_REREG_PD) + pdn = to_hr_pd(pd)->pdn; + + if (flags & IB_MR_REREG_TRANS) { + if (mr->size != ~0ULL) { + npages = ib_umem_page_count(mr->umem); + + if (hr_dev->caps.pbl_hop_num) + hns_roce_mhop_free(hr_dev, mr); + else + dma_free_coherent(dev, npages * 8, mr->pbl_buf, + mr->pbl_dma_addr); + } + ib_umem_release(mr->umem); + + mr->umem = ib_umem_get(ibmr->uobject->context, start, length, + mr_access_flags, 0); + if (IS_ERR(mr->umem)) { + ret = PTR_ERR(mr->umem); + mr->umem = NULL; + goto free_cmd_mbox; + } + npages = ib_umem_page_count(mr->umem); + + if (hr_dev->caps.pbl_hop_num) { + ret = hns_roce_mhop_alloc(hr_dev, npages, mr); + if (ret) + goto release_umem; + } else { + mr->pbl_buf = dma_alloc_coherent(dev, npages * 8, + &(mr->pbl_dma_addr), + GFP_KERNEL); + if (!mr->pbl_buf) { + ret = -ENOMEM; + goto release_umem; + } + } + } + + ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn, + mr_access_flags, virt_addr, + length, mailbox->buf); + if (ret) { + if (flags & IB_MR_REREG_TRANS) + goto release_umem; + else + goto free_cmd_mbox; + } + + if (flags & IB_MR_REREG_TRANS) { + ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem); + if (ret) { + if (mr->size != ~0ULL) { + npages = ib_umem_page_count(mr->umem); + + if (hr_dev->caps.pbl_hop_num) + hns_roce_mhop_free(hr_dev, mr); + else + dma_free_coherent(dev, npages * 8, + mr->pbl_buf, + mr->pbl_dma_addr); + } + + goto release_umem; + } + } + + ret = hns_roce_sw2hw_mpt(hr_dev, mailbox, mtpt_idx); + if (ret) { + dev_err(dev, "SW2HW_MPT failed (%d)\n", ret); + goto release_umem; + } + + mr->enabled = 1; + if (flags & IB_MR_REREG_ACCESS) + mr->access = mr_access_flags; + + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + + return 0; + +release_umem: + ib_umem_release(mr->umem); + +free_cmd_mbox: + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + + return ret; +} + int hns_roce_dereg_mr(struct ib_mr *ibmr) { struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); From 023c1477b0d0b123c0dec6466ec568a25928bef2 Mon Sep 17 00:00:00 2001 From: "Wei Hu(Xavier)" Date: Thu, 26 Oct 2017 17:10:24 +0800 Subject: [PATCH 228/296] RDMA/hns: Generate gid type of RoCEv2 HNS_ROCE_CAP_FALG_ROCE_V1_V2 is added for selecting capability of RoCE in hns driver. When HNS_ROCE_CAP_FALG_ROCE_V1_V2 is set, driver will inform ib core that the related hns device can support RoCEv2, and ib core can generate the gid of the related type. Signed-off-by: Lijun Ou Signed-off-by: Wei Hu (Xavier) Signed-off-by: Shaobo Xu Signed-off-by: Yixian Liu Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_device.h | 1 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 3 ++- drivers/infiniband/hw/hns/hns_roce_main.c | 5 +++-- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 7b3f4440cd5c..ba3bb36f84f8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -172,6 +172,7 @@ enum { enum { HNS_ROCE_CAP_FLAG_REREG_MR = BIT(0), + HNS_ROCE_CAP_FLAG_ROCE_V1_V2 = BIT(1), }; enum hns_roce_mtt_type { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 7e24e1f1c2b1..2e17dc1272a3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -945,7 +945,8 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->cqe_hop_num = HNS_ROCE_CQE_HOP_NUM; caps->chunk_sz = HNS_ROCE_V2_TABLE_CHUNK_SIZE; - caps->flags = HNS_ROCE_CAP_FLAG_REREG_MR; + caps->flags = HNS_ROCE_CAP_FLAG_REREG_MR | + HNS_ROCE_CAP_FLAG_ROCE_V1_V2; caps->pkey_table_len[0] = 1; caps->gid_table_len[0] = 2; caps->local_ca_ack_delay = 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 63a2f3b2b964..8bb75eaf12d6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -406,8 +406,6 @@ static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num, struct ib_port_attr attr; int ret; - immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; - ret = ib_query_port(ib_dev, port_num, &attr); if (ret) return ret; @@ -416,6 +414,9 @@ static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num, immutable->gid_tbl_len = attr.gid_tbl_len; immutable->max_mad_size = IB_MGMT_MAD_SIZE; + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; + if (to_hr_dev(ib_dev)->caps.flags & HNS_ROCE_CAP_FLAG_ROCE_V1_V2) + immutable->core_cap_flags |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; return 0; } From b5ff0f610b523478a167ae6f07e30c9f75d0c538 Mon Sep 17 00:00:00 2001 From: "Wei Hu(Xavier)" Date: Thu, 26 Oct 2017 17:10:25 +0800 Subject: [PATCH 229/296] RDMA/hns: Configure sgid type for hip08 RoCE The hardware vendors need to generate RoCEv1 or RoCEv2 packet according to the sgid type configured. Besides, update the gid table size for hip08 RoCE device. Signed-off-by: Lijun Ou Signed-off-by: Wei Hu (Xavier) Signed-off-by: Shaobo Xu Signed-off-by: Yixian Liu Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_device.h | 4 ++-- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 7 ++++-- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 25 +++++++++++++++++---- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 7 ++++++ drivers/infiniband/hw/hns/hns_roce_main.c | 11 +++++---- 5 files changed, 42 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index ba3bb36f84f8..c4a28a00fe88 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -586,8 +586,8 @@ struct hns_roce_hw { u64 out_param, u32 in_modifier, u8 op_modifier, u16 op, u16 token, int event); int (*chk_mbox)(struct hns_roce_dev *hr_dev, unsigned long timeout); - void (*set_gid)(struct hns_roce_dev *hr_dev, u8 port, int gid_index, - union ib_gid *gid); + int (*set_gid)(struct hns_roce_dev *hr_dev, u8 port, int gid_index, + union ib_gid *gid, const struct ib_gid_attr *attr); int (*set_mac)(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr); void (*set_mtu)(struct hns_roce_dev *hr_dev, u8 phy_port, enum ib_mtu mtu); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 00cbfc97bea8..af27168faf0f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -1703,8 +1703,9 @@ static int hns_roce_v1_chk_mbox(struct hns_roce_dev *hr_dev, return 0; } -static void hns_roce_v1_set_gid(struct hns_roce_dev *hr_dev, u8 port, - int gid_index, union ib_gid *gid) +static int hns_roce_v1_set_gid(struct hns_roce_dev *hr_dev, u8 port, + int gid_index, union ib_gid *gid, + const struct ib_gid_attr *attr) { u32 *p = NULL; u8 gid_idx = 0; @@ -1726,6 +1727,8 @@ static void hns_roce_v1_set_gid(struct hns_roce_dev *hr_dev, u8 port, p = (u32 *)&gid->raw[0xc]; roce_raw_write(*p, hr_dev->reg_base + ROCEE_PORT_GID_H_0_REG + (HNS_ROCE_V1_GID_NUM * gid_idx)); + + return 0; } static int hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 2e17dc1272a3..3b0cf5536486 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -948,7 +948,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->flags = HNS_ROCE_CAP_FLAG_REREG_MR | HNS_ROCE_CAP_FLAG_ROCE_V1_V2; caps->pkey_table_len[0] = 1; - caps->gid_table_len[0] = 2; + caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM; caps->local_ca_ack_delay = 0; caps->max_mtu = IB_MTU_4096; @@ -1043,12 +1043,27 @@ static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev, return 0; } -static void hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port, - int gid_index, union ib_gid *gid) +static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port, + int gid_index, union ib_gid *gid, + const struct ib_gid_attr *attr) { + enum hns_roce_sgid_type sgid_type = GID_TYPE_FLAG_ROCE_V1; u32 *p; u32 val; + if (!gid || !attr) + return -EINVAL; + + if (attr->gid_type == IB_GID_TYPE_ROCE) + sgid_type = GID_TYPE_FLAG_ROCE_V1; + + if (attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { + if (ipv6_addr_v4mapped((void *)gid)) + sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV4; + else + sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV6; + } + p = (u32 *)&gid->raw[0]; roce_raw_write(*p, hr_dev->reg_base + ROCEE_VF_SGID_CFG0_REG + 0x20 * gid_index); @@ -1067,9 +1082,11 @@ static void hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port, val = roce_read(hr_dev, ROCEE_VF_SGID_CFG4_REG + 0x20 * gid_index); roce_set_field(val, ROCEE_VF_SGID_CFG4_SGID_TYPE_M, - ROCEE_VF_SGID_CFG4_SGID_TYPE_S, 0); + ROCEE_VF_SGID_CFG4_SGID_TYPE_S, sgid_type); roce_write(hr_dev, ROCEE_VF_SGID_CFG4_REG + 0x20 * gid_index, val); + + return 0; } static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 6106ad1b1322..864fcd1448b7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -77,6 +77,7 @@ #define HNS_ROCE_MTT_HOP_NUM 1 #define HNS_ROCE_CQE_HOP_NUM 1 #define HNS_ROCE_PBL_HOP_NUM 2 +#define HNS_ROCE_V2_GID_INDEX_NUM 256 #define HNS_ROCE_V2_TABLE_CHUNK_SIZE (1 << 18) @@ -203,6 +204,12 @@ enum hns_roce_cmd_return_status { CMD_QUEUE_FULL = 3, }; +enum hns_roce_sgid_type { + GID_TYPE_FLAG_ROCE_V1 = 0, + GID_TYPE_FLAG_ROCE_V2_IPV4, + GID_TYPE_FLAG_ROCE_V2_IPV6, +}; + struct hns_roce_v2_cq_context { u32 byte_4_pg_ceqn; u32 byte_8_cqn; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 8bb75eaf12d6..5bc8cc2d9ef4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -81,17 +81,19 @@ static int hns_roce_add_gid(struct ib_device *device, u8 port_num, struct hns_roce_dev *hr_dev = to_hr_dev(device); u8 port = port_num - 1; unsigned long flags; + int ret; if (port >= hr_dev->caps.num_ports) return -EINVAL; spin_lock_irqsave(&hr_dev->iboe.lock, flags); - hr_dev->hw->set_gid(hr_dev, port, index, (union ib_gid *)gid); + ret = hr_dev->hw->set_gid(hr_dev, port, index, (union ib_gid *)gid, + attr); spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); - return 0; + return ret; } static int hns_roce_del_gid(struct ib_device *device, u8 port_num, @@ -101,17 +103,18 @@ static int hns_roce_del_gid(struct ib_device *device, u8 port_num, union ib_gid zgid = { {0} }; u8 port = port_num - 1; unsigned long flags; + int ret; if (port >= hr_dev->caps.num_ports) return -EINVAL; spin_lock_irqsave(&hr_dev->iboe.lock, flags); - hr_dev->hw->set_gid(hr_dev, port, index, &zgid); + ret = hr_dev->hw->set_gid(hr_dev, port, index, &zgid, NULL); spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); - return 0; + return ret; } static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port, From d55142461759890ac6dc3e93b4c5a4271937b1cf Mon Sep 17 00:00:00 2001 From: oulijun Date: Fri, 10 Nov 2017 16:55:43 +0800 Subject: [PATCH 230/296] RDMA/hns: Update calculation of irrl_ba field for hip08 The irrl(initiator RDMA Read/Atomic list) base address of qp context is assigned for addr[63:6]. This patch mainly fixed it. Signed-off-by: Lijun Ou Signed-off-by: Wei Hu (Xavier) Signed-off-by: Shaobo Xu Signed-off-by: Yixian Liu Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 3b0cf5536486..a950f7838555 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2393,11 +2393,11 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, V2_QPC_BYTE_108_RX_REQ_EPSN_M, V2_QPC_BYTE_108_RX_REQ_EPSN_S, 0); - context->irrl_ba = (u32)dma_handle_2; + context->irrl_ba = (u32)(dma_handle_2 >> 6); qpc_mask->irrl_ba = 0; roce_set_field(context->byte_208_irrl, V2_QPC_BYTE_208_IRRL_BA_M, V2_QPC_BYTE_208_IRRL_BA_S, - (dma_handle_2 >> 32) & V2_QPC_BYTE_208_IRRL_BA_M); + dma_handle_2 >> (32 + 6)); roce_set_field(qpc_mask->byte_208_irrl, V2_QPC_BYTE_208_IRRL_BA_M, V2_QPC_BYTE_208_IRRL_BA_S, 0); From e92f2c182bcfdda4f05fd8ebf269c0b5bfe18e7d Mon Sep 17 00:00:00 2001 From: oulijun Date: Fri, 10 Nov 2017 16:55:44 +0800 Subject: [PATCH 231/296] RDMA/hns: Configure TRRL field in hip08 RoCE device The TRRL(Target RDMA Read/aTOMIC List) record the information of receiving RDMA READ or ATOMIC operation in hip08. It will be used the hardware. The driver need to assign a continuous physical address for trrl_ba field of qp context. Signed-off-by: Lijun Ou Signed-off-by: Wei Hu (Xavier) Signed-off-by: Shaobo Xu Signed-off-by: Yixian Liu Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_device.h | 2 ++ drivers/infiniband/hw/hns/hns_roce_hem.c | 3 +++ drivers/infiniband/hw/hns/hns_roce_hem.h | 1 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 23 +++++++++++++++++++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 1 + drivers/infiniband/hw/hns/hns_roce_main.c | 21 ++++++++++++++++++- drivers/infiniband/hw/hns/hns_roce_qp.c | 19 ++++++++++++++++- 7 files changed, 68 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index c4a28a00fe88..724a5a0c1991 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -367,6 +367,7 @@ struct hns_roce_qp_table { spinlock_t lock; struct hns_roce_hem_table qp_table; struct hns_roce_hem_table irrl_table; + struct hns_roce_hem_table trrl_table; }; struct hns_roce_cq_table { @@ -542,6 +543,7 @@ struct hns_roce_caps { int mtpt_entry_sz; int qpc_entry_sz; int irrl_entry_sz; + int trrl_entry_sz; int cqc_entry_sz; u32 pbl_ba_pg_sz; u32 pbl_buf_pg_sz; diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 576eb3bb1f2d..f1f8fa43d59d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -1037,6 +1037,9 @@ void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev) { hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.irrl_table); + if (hr_dev->caps.trrl_entry_sz) + hns_roce_cleanup_hem_table(hr_dev, + &hr_dev->qp_table.trrl_table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.qp_table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtt_table); diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index af28bbf31c7f..db66db12075e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -49,6 +49,7 @@ enum { HEM_TYPE_MTT, HEM_TYPE_CQE, HEM_TYPE_IRRL, + HEM_TYPE_TRRL, }; #define HNS_ROCE_HEM_CHUNK_LEN \ diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index a950f7838555..8ed81644e022 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -911,6 +911,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->max_srq_desc_sz = HNS_ROCE_V2_MAX_SRQ_DESC_SZ; caps->qpc_entry_sz = HNS_ROCE_V2_QPC_ENTRY_SZ; caps->irrl_entry_sz = HNS_ROCE_V2_IRRL_ENTRY_SZ; + caps->trrl_entry_sz = HNS_ROCE_V2_TRRL_ENTRY_SZ; caps->cqc_entry_sz = HNS_ROCE_V2_CQC_ENTRY_SZ; caps->mtpt_entry_sz = HNS_ROCE_V2_MTPT_ENTRY_SZ; caps->mtt_entry_sz = HNS_ROCE_V2_MTT_ENTRY_SZ; @@ -2265,10 +2266,12 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct device *dev = hr_dev->dev; + dma_addr_t dma_handle_3; dma_addr_t dma_handle_2; dma_addr_t dma_handle; u32 page_size; u8 port_num; + u64 *mtts_3; u64 *mtts_2; u64 *mtts; u8 *dmac; @@ -2291,6 +2294,14 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, return -EINVAL; } + /* Search TRRL's mtts */ + mtts_3 = hns_roce_table_find(hr_dev, &hr_dev->qp_table.trrl_table, + hr_qp->qpn, &dma_handle_3); + if (!mtts_3) { + dev_err(dev, "qp trrl_table find failed\n"); + return -EINVAL; + } + if ((attr_mask & IB_QP_ALT_PATH) || (attr_mask & IB_QP_ACCESS_FLAGS) || (attr_mask & IB_QP_PKEY_INDEX) || (attr_mask & IB_QP_QKEY)) { dev_err(dev, "INIT2RTR attr_mask (0x%x) error\n", attr_mask); @@ -2393,6 +2404,18 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, V2_QPC_BYTE_108_RX_REQ_EPSN_M, V2_QPC_BYTE_108_RX_REQ_EPSN_S, 0); + roce_set_field(context->byte_132_trrl, V2_QPC_BYTE_132_TRRL_BA_M, + V2_QPC_BYTE_132_TRRL_BA_S, dma_handle_3 >> 4); + roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_BA_M, + V2_QPC_BYTE_132_TRRL_BA_S, 0); + context->trrl_ba = (u32)(dma_handle_3 >> (16 + 4)); + qpc_mask->trrl_ba = 0; + roce_set_field(context->byte_140_raq, V2_QPC_BYTE_140_TRRL_BA_M, + V2_QPC_BYTE_140_TRRL_BA_S, + (u32)(dma_handle_3 >> (32 + 16 + 4))); + roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_TRRL_BA_M, + V2_QPC_BYTE_140_TRRL_BA_S, 0); + context->irrl_ba = (u32)(dma_handle_2 >> 6); qpc_mask->irrl_ba = 0; roce_set_field(context->byte_208_irrl, V2_QPC_BYTE_208_IRRL_BA_M, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 864fcd1448b7..ea142253d8f8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -64,6 +64,7 @@ #define HNS_ROCE_V2_MAX_SRQ_DESC_SZ 64 #define HNS_ROCE_V2_QPC_ENTRY_SZ 256 #define HNS_ROCE_V2_IRRL_ENTRY_SZ 64 +#define HNS_ROCE_V2_TRRL_ENTRY_SZ 48 #define HNS_ROCE_V2_CQC_ENTRY_SZ 64 #define HNS_ROCE_V2_MTPT_ENTRY_SZ 64 #define HNS_ROCE_V2_MTT_ENTRY_SZ 64 diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 5bc8cc2d9ef4..cf02ac2d3596 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -597,16 +597,35 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) goto err_unmap_qp; } + if (hr_dev->caps.trrl_entry_sz) { + ret = hns_roce_init_hem_table(hr_dev, + &hr_dev->qp_table.trrl_table, + HEM_TYPE_TRRL, + hr_dev->caps.trrl_entry_sz * + hr_dev->caps.max_qp_dest_rdma, + hr_dev->caps.num_qps, 1); + if (ret) { + dev_err(dev, + "Failed to init trrl_table memory, aborting.\n"); + goto err_unmap_irrl; + } + } + ret = hns_roce_init_hem_table(hr_dev, &hr_dev->cq_table.table, HEM_TYPE_CQC, hr_dev->caps.cqc_entry_sz, hr_dev->caps.num_cqs, 1); if (ret) { dev_err(dev, "Failed to init CQ context memory, aborting.\n"); - goto err_unmap_irrl; + goto err_unmap_trrl; } return 0; +err_unmap_trrl: + if (hr_dev->caps.trrl_entry_sz) + hns_roce_cleanup_hem_table(hr_dev, + &hr_dev->qp_table.trrl_table); + err_unmap_irrl: hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.irrl_table); diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index b1c9a3768b19..49586ec8126a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -194,13 +194,23 @@ static int hns_roce_qp_alloc(struct hns_roce_dev *hr_dev, unsigned long qpn, goto err_put_qp; } + if (hr_dev->caps.trrl_entry_sz) { + /* Alloc memory for TRRL */ + ret = hns_roce_table_get(hr_dev, &qp_table->trrl_table, + hr_qp->qpn); + if (ret) { + dev_err(dev, "TRRL table get failed\n"); + goto err_put_irrl; + } + } + spin_lock_irq(&qp_table->lock); ret = radix_tree_insert(&hr_dev->qp_table_tree, hr_qp->qpn & (hr_dev->caps.num_qps - 1), hr_qp); spin_unlock_irq(&qp_table->lock); if (ret) { dev_err(dev, "QPC radix_tree_insert failed\n"); - goto err_put_irrl; + goto err_put_trrl; } atomic_set(&hr_qp->refcount, 1); @@ -208,6 +218,10 @@ static int hns_roce_qp_alloc(struct hns_roce_dev *hr_dev, unsigned long qpn, return 0; +err_put_trrl: + if (hr_dev->caps.trrl_entry_sz) + hns_roce_table_put(hr_dev, &qp_table->trrl_table, hr_qp->qpn); + err_put_irrl: hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn); @@ -239,6 +253,9 @@ void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) wait_for_completion(&hr_qp->free); if ((hr_qp->ibqp.qp_type) != IB_QPT_GSI) { + if (hr_dev->caps.trrl_entry_sz) + hns_roce_table_put(hr_dev, &qp_table->trrl_table, + hr_qp->qpn); hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn); hns_roce_table_put(hr_dev, &qp_table->qp_table, hr_qp->qpn); } From 651487c229d5fe7c6d65acbfb061089d8c899729 Mon Sep 17 00:00:00 2001 From: oulijun Date: Fri, 10 Nov 2017 16:55:45 +0800 Subject: [PATCH 232/296] RDMA/hns: Configure fence attribute in hip08 RoCE When post wr for mixed rdma operation, we need to use fence mechanism to keep the correct execute order. Signed-off-by: Lijun Ou Signed-off-by: Wei Hu (Xavier) Signed-off-by: Shaobo Xu Signed-off-by: Yixian Liu Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 8ed81644e022..049a0dfd207e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -104,7 +104,6 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] = wr->wr_id; - rc_sq_wqe = wqe; memset(rc_sq_wqe, 0, sizeof(*rc_sq_wqe)); for (i = 0; i < wr->num_sge; i++) @@ -112,6 +111,9 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, rc_sq_wqe->inv_key_immtdata = send_ieth(wr); + roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FENCE_S, + (wr->send_flags & IB_SEND_FENCE) ? 1 : 0); + switch (wr->opcode) { case IB_WR_RDMA_READ: roce_set_field(rc_sq_wqe->byte_4, From 492b2bd0267d466845042c6f1dfff8a6bc8f871b Mon Sep 17 00:00:00 2001 From: oulijun Date: Fri, 10 Nov 2017 16:55:46 +0800 Subject: [PATCH 233/296] RDMA/hns: Set se attribute of sqwqe in hip08 When send flags is IB_SEND_SOLICITED, the se(solicated event) field of sqwqe will be set. Signed-off-by: Lijun Ou Signed-off-by: Wei Hu (Xavier) Signed-off-by: Shaobo Xu Signed-off-by: Yixian Liu Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 049a0dfd207e..f2615446495c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -114,6 +114,9 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FENCE_S, (wr->send_flags & IB_SEND_FENCE) ? 1 : 0); + roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SE_S, + (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0); + switch (wr->opcode) { case IB_WR_RDMA_READ: roce_set_field(rc_sq_wqe->byte_4, From a49d761fc10ee64f1d97aba0b4b6244854d288bc Mon Sep 17 00:00:00 2001 From: oulijun Date: Fri, 10 Nov 2017 16:55:47 +0800 Subject: [PATCH 234/296] RDMA/hns: Enable the cqe field of sqwqe of RC When sig_type of qpc is non-selectable, all sq's wqes will produce cqe and not depend on the cqe attribute of wqe. When sig_type of qpc is selectable, The cqe attribute of wqe will decide whether to produce the cqe. Signed-off-by: Lijun Ou Signed-off-by: Wei Hu (Xavier) Signed-off-by: Shaobo Xu Signed-off-by: Yixian Liu Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index f2615446495c..b2513bda3786 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -117,6 +117,9 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SE_S, (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0); + roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_CQE_S, + (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0); + switch (wr->opcode) { case IB_WR_RDMA_READ: roce_set_field(rc_sq_wqe->byte_4, @@ -198,8 +201,6 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; } - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_CQE_S, 1); - wqe += sizeof(struct hns_roce_v2_rc_send_wqe); dseg = wqe; if (wr->send_flags & IB_SEND_INLINE && wr->num_sge) { From befb63b43df535917cd73e961e701d0b157b92d0 Mon Sep 17 00:00:00 2001 From: oulijun Date: Fri, 10 Nov 2017 16:55:48 +0800 Subject: [PATCH 235/296] RDMA/hns: Set sq_cur_sge_blk_addr field in QPC in hip08 If the extend sges exist, the sq_cur_sge_blk_addr field in QPC (qp context) should be configured. Signed-off-by: Lijun Ou Signed-off-by: Shaobo Xu Signed-off-by: Wei Hu (Xavier) Signed-off-by: Yixian Liu Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 16 ++++++++++++++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index b2513bda3786..ee2d832c53c2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2566,6 +2566,7 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct device *dev = hr_dev->dev; dma_addr_t dma_handle; + u32 page_size; u64 *mtts; /* Search qp buf's mtts */ @@ -2608,6 +2609,21 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M, V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_S, 0); + page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); + context->sq_cur_sge_blk_addr = hr_qp->sq.max_gs > 2 ? + ((u32)(mtts[hr_qp->sge.offset / page_size] + >> PAGE_ADDR_SHIFT)) : 0; + roce_set_field(context->byte_184_irrl_idx, + V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M, + V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_S, + hr_qp->sq.max_gs > 2 ? + (mtts[hr_qp->sge.offset / page_size] >> + (32 + PAGE_ADDR_SHIFT)) : 0); + qpc_mask->sq_cur_sge_blk_addr = 0; + roce_set_field(qpc_mask->byte_184_irrl_idx, + V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M, + V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_S, 0); + context->rx_sq_cur_blk_addr = (u32)(mtts[0] >> PAGE_ADDR_SHIFT); roce_set_field(context->byte_232_irrl_sge, V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_M, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index ea142253d8f8..3d9114ee0154 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -353,7 +353,7 @@ struct hns_roce_v2_qp_context { u32 byte_168_irrl_idx; u32 byte_172_sq_psn; u32 byte_176_msg_pktn; - u32 sq_cur_sqe_blk_addr; + u32 sq_cur_sge_blk_addr; u32 byte_184_irrl_idx; u32 cur_sge_offset; u32 byte_192_ext_sge; From 2872646134aa5cb99f674d81c1fdb0d595243499 Mon Sep 17 00:00:00 2001 From: oulijun Date: Fri, 10 Nov 2017 16:55:49 +0800 Subject: [PATCH 236/296] RDMA/hns: Update the usage of ack timeout in hip08 The ack timeout's value in qp context shall be a 5-bit value and be assgined by users. When at of qpc is set zero, the timer is disabled. When attr_mask set for IB_QP_TIMEOUT, The ack timeout field is effective. Signed-off-by: Lijun Ou Signed-off-by: Wei Hu (Xavier) Signed-off-by: Shaobo Xu Signed-off-by: Yixian Liu Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index ee2d832c53c2..d74a5220d826 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2725,15 +2725,12 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_212_lsn, V2_QPC_BYTE_212_LSN_M, V2_QPC_BYTE_212_LSN_S, 0); - if (attr->timeout < 0xf) - roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_AT_M, - V2_QPC_BYTE_28_AT_S, 0xf); - else + if (attr_mask & IB_QP_TIMEOUT) { roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_AT_M, V2_QPC_BYTE_28_AT_S, attr->timeout); - - roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_AT_M, - V2_QPC_BYTE_28_AT_S, 0); + roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_AT_M, + V2_QPC_BYTE_28_AT_S, 0); + } roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M, V2_QPC_BYTE_28_SL_S, From b5fddb7ce768a2e97dcf90614c5da4a7824bdc81 Mon Sep 17 00:00:00 2001 From: oulijun Date: Fri, 10 Nov 2017 16:55:50 +0800 Subject: [PATCH 237/296] RDMA/hns: Add sq_invld_flg field in QP context In hip08 RoCE, it need to add the sq_invld_flg field in QP context for RoCE hardware. Signed-off-by: Lijun Ou Signed-off-by: Wei Hu (Xavier) Signed-off-by: Shaobo Xu Signed-off-by: Yixian Liu Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 ++ drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index d74a5220d826..c1f33251a73c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2042,6 +2042,8 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, roce_set_bit(qpc_mask->byte_168_irrl_idx, V2_QPC_BYTE_168_MSG_RTY_LP_FLG_S, 0); + roce_set_bit(qpc_mask->byte_168_irrl_idx, + V2_QPC_BYTE_168_SQ_INVLD_FLG_S, 0); roce_set_field(qpc_mask->byte_168_irrl_idx, V2_QPC_BYTE_168_IRRL_IDX_LSB_M, V2_QPC_BYTE_168_IRRL_IDX_LSB_S, 0); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 3d9114ee0154..04b7a51b8efb 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -606,8 +606,10 @@ struct hns_roce_v2_qp_context { #define V2_QPC_BYTE_168_MSG_RTY_LP_FLG_S 20 -#define V2_QPC_BYTE_168_LP_SGEN_INI_S 21 -#define V2_QPC_BYTE_168_LP_SGEN_INI_M GENMASK(23, 21) +#define V2_QPC_BYTE_168_SQ_INVLD_FLG_S 21 + +#define V2_QPC_BYTE_168_LP_SGEN_INI_S 22 +#define V2_QPC_BYTE_168_LP_SGEN_INI_M GENMASK(23, 22) #define V2_QPC_BYTE_168_SQ_SHIFT_BAK_S 24 #define V2_QPC_BYTE_168_SQ_SHIFT_BAK_M GENMASK(27, 24) From e8d1853357d237e6af69e384d6b05a23e3a70b93 Mon Sep 17 00:00:00 2001 From: oulijun Date: Fri, 10 Nov 2017 16:55:51 +0800 Subject: [PATCH 238/296] RDMA/hns: Set the owner field of SQWQE in hip08 RoCE the owner need to be set when posting sqwqe in hip08 RoCE. The owner be used according to the below algorithm: The value of owner should be 1 in the first lap, it should be 0 in the second lap and in turn. Signed-off-by: Lijun Ou Signed-off-by: Wei Hu (Xavier) Signed-off-by: Shaobo Xu Signed-off-by: Yixian Liu Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index c1f33251a73c..7008fa3add80 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -62,6 +62,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct hns_roce_v2_db sq_db; unsigned int sge_ind = 0; unsigned int wqe_sz = 0; + unsigned int owner_bit; unsigned long flags; unsigned int ind; void *wqe = NULL; @@ -104,6 +105,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] = wr->wr_id; + owner_bit = ~(qp->sq.head >> ilog2(qp->sq.wqe_cnt)) & 0x1; rc_sq_wqe = wqe; memset(rc_sq_wqe, 0, sizeof(*rc_sq_wqe)); for (i = 0; i < wr->num_sge; i++) @@ -120,6 +122,9 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_CQE_S, (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0); + roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S, + owner_bit); + switch (wr->opcode) { case IB_WR_RDMA_READ: roce_set_field(rc_sq_wqe->byte_4, From 0203b14c4f32b9b6e526db910844222705fc6e5f Mon Sep 17 00:00:00 2001 From: oulijun Date: Fri, 10 Nov 2017 16:55:52 +0800 Subject: [PATCH 239/296] RDMA/hns: Unify the calculation for hem index in hip08 The calculation of hem index are different between hns_roce_table_get and hns_roce_table_find. When the table chunk size of TRRL is not divisible by object size, it will faile to find the trrl table. This patch is to update the calculation of the hem index in the hns_roce_table_find to the same as which in the hns_roce_table_get. Signed-off-by: Shaobo Xu Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hem.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index f1f8fa43d59d..8b733a66fae5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -724,7 +724,8 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev, struct hns_roce_hem *hem; struct page *page = NULL; unsigned long mhop_obj = obj; - unsigned long idx; + unsigned long obj_per_chunk; + unsigned long idx_offset; int offset, dma_offset; int i, j; u32 hem_idx = 0; @@ -735,9 +736,10 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev, mutex_lock(&table->mutex); if (!hns_roce_check_whether_mhop(hr_dev, table->type)) { - idx = (obj & (table->num_obj - 1)) * table->obj_size; - hem = table->hem[idx / table->table_chunk_size]; - dma_offset = offset = idx % table->table_chunk_size; + obj_per_chunk = table->table_chunk_size / table->obj_size; + hem = table->hem[(obj & (table->num_obj - 1)) / obj_per_chunk]; + idx_offset = (obj & (table->num_obj - 1)) % obj_per_chunk; + dma_offset = offset = idx_offset * table->obj_size; } else { hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, &mhop); /* mtt mhop */ From 26beb85f4168dba76a197883065398a55ce11cf4 Mon Sep 17 00:00:00 2001 From: oulijun Date: Fri, 10 Nov 2017 16:55:53 +0800 Subject: [PATCH 240/296] RDMA/hns: Modify the usage of cmd_sn in hip08 The cmd_sn field of CQ doorbell inits for 0. It should be increment on each first db rung after a completion Event. if the cmd_sn of notify doorbell Adjacent two times is the same, the hardware will distinguish it for the same notify request and update its type according to the priority level of next event and solicited event. Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_cq.c | 2 ++ drivers/infiniband/hw/hns/hns_roce_device.h | 1 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 +- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index f558f95d8827..2111b57a3489 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -156,6 +156,7 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, } hr_cq->cons_index = 0; + hr_cq->arm_sn = 1; hr_cq->uar = hr_uar; atomic_set(&hr_cq->refcount, 1); @@ -456,6 +457,7 @@ void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn) return; } + ++cq->arm_sn; cq->comp(cq); } diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 724a5a0c1991..01d3d695cbba 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -347,6 +347,7 @@ struct hns_roce_cq { u32 cons_index; void __iomem *cq_db_l; u16 *tptr_addr; + int arm_sn; unsigned long cqn; u32 vector; atomic_t refcount; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 7008fa3add80..8f719c00467b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1440,7 +1440,7 @@ static int hns_roce_v2_req_notify_cq(struct ib_cq *ibcq, V2_CQ_DB_PARAMETER_CONS_IDX_S, hr_cq->cons_index & ((hr_cq->cq_depth << 1) - 1)); roce_set_field(doorbell[1], V2_CQ_DB_PARAMETER_CMD_SN_M, - V2_CQ_DB_PARAMETER_CMD_SN_S, 1); + V2_CQ_DB_PARAMETER_CMD_SN_S, hr_cq->arm_sn & 0x3); roce_set_bit(doorbell[1], V2_CQ_DB_PARAMETER_NOTIFY_S, notification_flag); From fec99ededf6be46178d7f571b34dae80fc05f090 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 25 Oct 2017 18:56:49 +0300 Subject: [PATCH 241/296] RDMA/umem: Avoid partial declaration of non-static function The RDMA/umem uses generic RB-trees macros to generate various ib_umem access functions. The generation is performed with INTERVAL_TREE_DEFINE macro, which allows one of two modes: declare all functions as static or declare none of the function to be static. The second mode of operation produces the following sparse errors: drivers/infiniband/core/umem_rbtree.c:69:1: warning: symbol 'rbt_ib_umem_iter_first' was not declared. Should it be static? drivers/infiniband/core/umem_rbtree.c:69:1: warning: symbol 'rbt_ib_umem_iter_next' was not declared. Should it be static? Code relocation together with declaration of such functions to be "static" solves the issue. Because there is no need to have separate file for two functions, let's consolidate umem_rtree.c and umem_odp.c into one file. Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/Makefile | 2 +- drivers/infiniband/core/umem_odp.c | 72 +++++++++++++++++ drivers/infiniband/core/umem_rbtree.c | 109 -------------------------- include/rdma/ib_umem_odp.h | 4 - 4 files changed, 73 insertions(+), 114 deletions(-) delete mode 100644 drivers/infiniband/core/umem_rbtree.c diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index b4df164f71a6..336ace50b0ab 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -14,7 +14,7 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \ security.o nldev.o ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o -ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o +ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o ib_cm-y := cm.o diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 55e8f5ed8b3c..2aadf5813a40 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -39,11 +39,44 @@ #include #include #include +#include #include #include #include +/* + * The ib_umem list keeps track of memory regions for which the HW + * device request to receive notification when the related memory + * mapping is changed. + * + * ib_umem_lock protects the list. + */ + +static u64 node_start(struct umem_odp_node *n) +{ + struct ib_umem_odp *umem_odp = + container_of(n, struct ib_umem_odp, interval_tree); + + return ib_umem_start(umem_odp->umem); +} + +/* Note that the representation of the intervals in the interval tree + * considers the ending point as contained in the interval, while the + * function ib_umem_end returns the first address which is not contained + * in the umem. + */ +static u64 node_last(struct umem_odp_node *n) +{ + struct ib_umem_odp *umem_odp = + container_of(n, struct ib_umem_odp, interval_tree); + + return ib_umem_end(umem_odp->umem) - 1; +} + +INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last, + node_start, node_last, static, rbt_ib_umem) + static void ib_umem_notifier_start_account(struct ib_umem *item) { mutex_lock(&item->odp_data->umem_mutex); @@ -754,3 +787,42 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt, mutex_unlock(&umem->odp_data->umem_mutex); } EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages); + +/* @last is not a part of the interval. See comment for function + * node_last. + */ +int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root, + u64 start, u64 last, + umem_call_back cb, + void *cookie) +{ + int ret_val = 0; + struct umem_odp_node *node, *next; + struct ib_umem_odp *umem; + + if (unlikely(start == last)) + return ret_val; + + for (node = rbt_ib_umem_iter_first(root, start, last - 1); + node; node = next) { + next = rbt_ib_umem_iter_next(node, start, last - 1); + umem = container_of(node, struct ib_umem_odp, interval_tree); + ret_val = cb(umem->umem, start, last, cookie) || ret_val; + } + + return ret_val; +} +EXPORT_SYMBOL(rbt_ib_umem_for_each_in_range); + +struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root, + u64 addr, u64 length) +{ + struct umem_odp_node *node; + + node = rbt_ib_umem_iter_first(root, addr, addr + length - 1); + if (node) + return container_of(node, struct ib_umem_odp, interval_tree); + return NULL; + +} +EXPORT_SYMBOL(rbt_ib_umem_lookup); diff --git a/drivers/infiniband/core/umem_rbtree.c b/drivers/infiniband/core/umem_rbtree.c deleted file mode 100644 index fc801920e341..000000000000 --- a/drivers/infiniband/core/umem_rbtree.c +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (c) 2014 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include - -/* - * The ib_umem list keeps track of memory regions for which the HW - * device request to receive notification when the related memory - * mapping is changed. - * - * ib_umem_lock protects the list. - */ - -static inline u64 node_start(struct umem_odp_node *n) -{ - struct ib_umem_odp *umem_odp = - container_of(n, struct ib_umem_odp, interval_tree); - - return ib_umem_start(umem_odp->umem); -} - -/* Note that the representation of the intervals in the interval tree - * considers the ending point as contained in the interval, while the - * function ib_umem_end returns the first address which is not contained - * in the umem. - */ -static inline u64 node_last(struct umem_odp_node *n) -{ - struct ib_umem_odp *umem_odp = - container_of(n, struct ib_umem_odp, interval_tree); - - return ib_umem_end(umem_odp->umem) - 1; -} - -INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last, - node_start, node_last, , rbt_ib_umem) - -/* @last is not a part of the interval. See comment for function - * node_last. - */ -int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root, - u64 start, u64 last, - umem_call_back cb, - void *cookie) -{ - int ret_val = 0; - struct umem_odp_node *node, *next; - struct ib_umem_odp *umem; - - if (unlikely(start == last)) - return ret_val; - - for (node = rbt_ib_umem_iter_first(root, start, last - 1); - node; node = next) { - next = rbt_ib_umem_iter_next(node, start, last - 1); - umem = container_of(node, struct ib_umem_odp, interval_tree); - ret_val = cb(umem->umem, start, last, cookie) || ret_val; - } - - return ret_val; -} -EXPORT_SYMBOL(rbt_ib_umem_for_each_in_range); - -struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root, - u64 addr, u64 length) -{ - struct umem_odp_node *node; - - node = rbt_ib_umem_iter_first(root, addr, addr + length - 1); - if (node) - return container_of(node, struct ib_umem_odp, interval_tree); - return NULL; - -} -EXPORT_SYMBOL(rbt_ib_umem_lookup); diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index 5eb7f5bc8248..6a17f856f841 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -111,10 +111,6 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bcnt, void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bound); -void rbt_ib_umem_insert(struct umem_odp_node *node, - struct rb_root_cached *root); -void rbt_ib_umem_remove(struct umem_odp_node *node, - struct rb_root_cached *root); typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end, void *cookie); /* From 09d208b258a2e012aad399b80a9595c4aa95e6dc Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Wed, 25 Oct 2017 22:39:34 +0300 Subject: [PATCH 242/296] IB/mlx4: Add report for RSS capabilities by vendor channel The mlx4's RSS patches submission missed a report of RSS capabilities which should be reported by the vendor channel in query_device. Signed-off-by: Guy Levi Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 17 +++++++++++++++++ drivers/infiniband/hw/mlx4/mlx4_ib.h | 16 +++++++++++----- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index c636842c5be0..d8f0b94f1dc4 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -581,6 +581,23 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, sizeof(struct mlx4_wqe_data_seg); } + if (uhw->outlen >= resp.response_length + sizeof(resp.rss_caps)) { + resp.response_length += sizeof(resp.rss_caps); + if (props->rss_caps.supported_qpts) { + resp.rss_caps.rx_hash_function = + MLX4_IB_RX_HASH_FUNC_TOEPLITZ; + resp.rss_caps.rx_hash_fields_mask = + MLX4_IB_RX_HASH_SRC_IPV4 | + MLX4_IB_RX_HASH_DST_IPV4 | + MLX4_IB_RX_HASH_SRC_IPV6 | + MLX4_IB_RX_HASH_DST_IPV6 | + MLX4_IB_RX_HASH_SRC_PORT_TCP | + MLX4_IB_RX_HASH_DST_PORT_TCP | + MLX4_IB_RX_HASH_SRC_PORT_UDP | + MLX4_IB_RX_HASH_DST_PORT_UDP; + } + } + if (uhw->outlen) { err = ib_copy_to_udata(uhw, &resp, resp.response_length); if (err) diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 1fa19820355a..d09d4e7b2706 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -644,12 +644,18 @@ enum query_device_resp_mask { QUERY_DEVICE_RESP_MASK_TIMESTAMP = 1UL << 0, }; +struct mlx4_ib_rss_caps { + __u64 rx_hash_fields_mask; /* enum mlx4_rx_hash_fields */ + __u8 rx_hash_function; /* enum mlx4_rx_hash_function_flags */ + __u8 reserved[7]; +}; + struct mlx4_uverbs_ex_query_device_resp { - __u32 comp_mask; - __u32 response_length; - __u64 hca_core_clock_offset; - __u32 max_inl_recv_sz; - __u32 reserved; + __u32 comp_mask; + __u32 response_length; + __u64 hca_core_clock_offset; + __u32 max_inl_recv_sz; + struct mlx4_ib_rss_caps rss_caps; }; static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev) From 108809a0571cd1e1b317c5c083a371e163e1f8f9 Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Wed, 25 Oct 2017 22:39:35 +0300 Subject: [PATCH 243/296] IB/mlx4: Fix RSS's QPC attributes assignments In the modify QP handler the base_qpn_udp field in the RSS QPC is overwrite later by irrelevant value assignment. Hence, ingress packets which gets to the RSS QP will be steered then to a garbage QPN. The patch fixes this by skipping the above assignment when a RSS QP is modified, also, the RSS context's attributes assignments are relocated just before the context is posted to avoid future issues like this. Additionally, this patch takes the opportunity to change the code to be disciplined to the device's manual and assigns the RSS QP context just at RESET to INIT transition. Fixes:3078f5f1bd8b ("IB/mlx4: Add support for RSS QP") Signed-off-by: Guy Levi Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/qp.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index b6b33d99b0b4..26f3345948e2 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -2182,11 +2182,6 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) | (to_mlx4_st(dev, qp->mlx4_ib_qp_type) << 16)); - if (rwq_ind_tbl) { - fill_qp_rss_context(context, qp); - context->flags |= cpu_to_be32(1 << MLX4_RSS_QPC_FLAG_OFFSET); - } - if (!(attr_mask & IB_QP_PATH_MIG_STATE)) context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11); else { @@ -2387,6 +2382,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, context->pd = cpu_to_be32(pd->pdn); if (!rwq_ind_tbl) { + context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28); get_cqs(qp, src_type, &send_cq, &recv_cq); } else { /* Set dummy CQs to be compatible with HV and PRM */ send_cq = to_mcq(rwq_ind_tbl->ind_tbl[0]->cq); @@ -2394,7 +2390,6 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, } context->cqn_send = cpu_to_be32(send_cq->mcq.cqn); context->cqn_recv = cpu_to_be32(recv_cq->mcq.cqn); - context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28); /* Set "fast registration enabled" for all kernel QPs */ if (!ibuobject) @@ -2513,7 +2508,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, MLX4_IB_LINK_TYPE_ETH; if (dev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) { /* set QP to receive both tunneled & non-tunneled packets */ - if (!(context->flags & cpu_to_be32(1 << MLX4_RSS_QPC_FLAG_OFFSET))) + if (!rwq_ind_tbl) context->srqn = cpu_to_be32(7 << 28); } } @@ -2562,6 +2557,13 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, } } + if (rwq_ind_tbl && + cur_state == IB_QPS_RESET && + new_state == IB_QPS_INIT) { + fill_qp_rss_context(context, qp); + context->flags |= cpu_to_be32(1 << MLX4_RSS_QPC_FLAG_OFFSET); + } + err = mlx4_qp_modify(dev->dev, &qp->mtt, to_mlx4_state(cur_state), to_mlx4_state(new_state), context, optpar, sqd_event, &qp->mqp); From 7d7d065a5eec7e218174d5c64a9f53f99ffdb119 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 25 Oct 2017 23:10:19 +0300 Subject: [PATCH 244/296] RDMA/cxgb4: Annotate r2 and stag as __be32 Chelsio cxgb4 HW is big-endian, hence there is need to properly annotate r2 and stag fields as __be32 and not __u32 to fix the following sparse warnings. drivers/infiniband/hw/cxgb4/qp.c:614:16: warning: incorrect type in assignment (different base types) expected unsigned int [unsigned] [usertype] r2 got restricted __be32 [usertype] drivers/infiniband/hw/cxgb4/qp.c:615:18: warning: incorrect type in assignment (different base types) expected unsigned int [unsigned] [usertype] stag got restricted __be32 [usertype] Cc: Steve Wise Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/t4fw_ri_api.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h index 010c709ba3bb..58c531db4f4a 100644 --- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h +++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h @@ -675,8 +675,8 @@ struct fw_ri_fr_nsmr_tpte_wr { __u16 wrid; __u8 r1[3]; __u8 len16; - __u32 r2; - __u32 stag; + __be32 r2; + __be32 stag; struct fw_ri_tpte tpte; __u64 pbl[2]; }; From 89548bcafec7ecfeea58c553f0834b5d575a66eb Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 31 Oct 2017 10:33:18 -0500 Subject: [PATCH 245/296] IB/core: Avoid crash on pkey enforcement failed in received MADs Below kernel crash is observed when Pkey security enforcement fails on received MADs. This issue is reported in [1]. ib_free_recv_mad() accesses the rmpp_list, whose initialization is needed before accessing it. When security enformcent fails on received MADs, MAD processing avoided due to security checks failed. OpenSM[3770]: SM port is down kernel: BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 kernel: IP: ib_free_recv_mad+0x44/0xa0 [ib_core] kernel: PGD 0 kernel: P4D 0 kernel: kernel: Oops: 0002 [#1] SMP kernel: CPU: 0 PID: 2833 Comm: kworker/0:1H Tainted: P IO 4.13.4-1-pve #1 kernel: Hardware name: Dell XS23-TY3 /9CMP63, BIOS 1.71 09/17/2013 kernel: Workqueue: ib-comp-wq ib_cq_poll_work [ib_core] kernel: task: ffffa069c6541600 task.stack: ffffb9a729054000 kernel: RIP: 0010:ib_free_recv_mad+0x44/0xa0 [ib_core] kernel: RSP: 0018:ffffb9a729057d38 EFLAGS: 00010286 kernel: RAX: ffffa069cb138a48 RBX: ffffa069cb138a10 RCX: 0000000000000000 kernel: RDX: ffffb9a729057d38 RSI: 0000000000000000 RDI: ffffa069cb138a20 kernel: RBP: ffffb9a729057d60 R08: ffffa072d2d49800 R09: ffffa069cb138ae0 kernel: R10: ffffa069cb138ae0 R11: ffffa072b3994e00 R12: ffffb9a729057d38 kernel: R13: ffffa069d1c90000 R14: 0000000000000000 R15: ffffa069d1c90880 kernel: FS: 0000000000000000(0000) GS:ffffa069dba00000(0000) knlGS:0000000000000000 kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 kernel: CR2: 0000000000000008 CR3: 00000011f51f2000 CR4: 00000000000006f0 kernel: Call Trace: kernel: ib_mad_recv_done+0x5cc/0xb50 [ib_core] kernel: __ib_process_cq+0x5c/0xb0 [ib_core] kernel: ib_cq_poll_work+0x20/0x60 [ib_core] kernel: process_one_work+0x1e9/0x410 kernel: worker_thread+0x4b/0x410 kernel: kthread+0x109/0x140 kernel: ? process_one_work+0x410/0x410 kernel: ? kthread_create_on_node+0x70/0x70 kernel: ? SyS_exit_group+0x14/0x20 kernel: ret_from_fork+0x25/0x30 kernel: RIP: ib_free_recv_mad+0x44/0xa0 [ib_core] RSP: ffffb9a729057d38 kernel: CR2: 0000000000000008 [1] : https://www.spinics.net/lists/linux-rdma/msg56190.html Fixes: 47a2b338fe63 ("IB/core: Enforce security on management datagrams") Cc: stable@vger.kernel.org # 4.13+ Signed-off-by: Parav Pandit Reported-by: Chris Blake Reviewed-by: Daniel Jurgens Reviewed-by: Hal Rosenstock Signed-off-by: Doug Ledford --- drivers/infiniband/core/mad.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index f8f53bb90837..cb91245e9163 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -1974,14 +1974,15 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, unsigned long flags; int ret; + INIT_LIST_HEAD(&mad_recv_wc->rmpp_list); ret = ib_mad_enforce_security(mad_agent_priv, mad_recv_wc->wc->pkey_index); if (ret) { ib_free_recv_mad(mad_recv_wc); deref_mad_agent(mad_agent_priv); + return; } - INIT_LIST_HEAD(&mad_recv_wc->rmpp_list); list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list); if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) { mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv, From 3192c53e5affe767472176197b83c9bc0b1e459d Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Tue, 31 Oct 2017 11:16:46 +0100 Subject: [PATCH 246/296] IB/rxe: don't crash, if allocation of crc algorithm failed Following crash happens, if crc algorithm couldn't be allocated: [ 1087.989072] rdma_rxe: loaded [ 1097.855397] PCLMULQDQ-NI instructions are not detected. [ 1097.901220] rdma_rxe: failed to allocate crc algorithmi err:-2 [ 1097.901248] BUG: unable to handle kernel [ 1097.901249] NULL pointer dereference [ 1097.901250] at 0000000000000046 [...] Reason is that rxe->tfm is assigned the error return, which will then be used for crypto_free_shash() in rxe_cleanup. Fix by using a temporary variable and assigning it rxe->tfm after allocation succeeded. Fixes: cee2688e3cd6 ("IB/rxe: Offload CRC calculation when possible") Signed-off-by: Thomas Bogendoerfer Reviewed-by: Leon Romanovsky Acked-by: Moni Shoua Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_verbs.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index ff77f4f66970..d03002b9d84d 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1192,6 +1192,7 @@ int rxe_register_device(struct rxe_dev *rxe) int err; int i; struct ib_device *dev = &rxe->ib_dev; + struct crypto_shash *tfm; strlcpy(dev->name, "rxe%d", IB_DEVICE_NAME_MAX); strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc)); @@ -1289,12 +1290,13 @@ int rxe_register_device(struct rxe_dev *rxe) dev->get_hw_stats = rxe_ib_get_hw_stats; dev->alloc_hw_stats = rxe_ib_alloc_hw_stats; - rxe->tfm = crypto_alloc_shash("crc32", 0, 0); - if (IS_ERR(rxe->tfm)) { + tfm = crypto_alloc_shash("crc32", 0, 0); + if (IS_ERR(tfm)) { pr_err("failed to allocate crc algorithm err:%ld\n", - PTR_ERR(rxe->tfm)); - return PTR_ERR(rxe->tfm); + PTR_ERR(tfm)); + return PTR_ERR(tfm); } + rxe->tfm = tfm; err = ib_register_device(dev, NULL); if (err) { From e1d2e88733695038754d3303b180f8005a02b6f1 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Sun, 29 Oct 2017 13:59:44 +0200 Subject: [PATCH 247/296] IB/core: Add PCI write end padding flags for WQ and QP There are root complexes that are able to optimize their performance when incoming data is multiple full cache lines. PCI write end padding is the device's ability to pad the ending of incoming packets (scatter) to full cache line such that the last upstream write generated by an incoming packet will be a full cache line. Add a relevant entry to ib_device_cap_flags to report such capability of an RDMA device. Add the QP and WQ create flags: * A QP/WQ created with a scatter end padding flag will cause HW to pad the last upstream write generated by a packet to cache line. User should consider several factors before activating this feature: - In case of high CPU memory load (which may cause PCI back pressure in turn), if a large percent of the writes are partial cache line, this feature should be checked as an optional solution. - This feature might reduce performance if most packets are between one and two cache lines and PCIe throughput has reached its maximum capacity. E.g. 65B packet from the network port will lead to 128B write on PCIe, which may cause traffic on PCIe to reach high throughput. Signed-off-by: Noa Osherovich Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 3 ++- include/rdma/ib_verbs.h | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index d31e4bc58e9a..8ca36843ef38 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1491,7 +1491,8 @@ static int create_qp(struct ib_uverbs_file *file, IB_QP_CREATE_MANAGED_RECV | IB_QP_CREATE_SCATTER_FCS | IB_QP_CREATE_CVLAN_STRIPPING | - IB_QP_CREATE_SOURCE_QPN)) { + IB_QP_CREATE_SOURCE_QPN | + IB_QP_CREATE_PCI_WRITE_END_PADDING)) { ret = -EINVAL; goto err_put; } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 9810e4568635..0b671982bbb3 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -229,6 +229,8 @@ enum ib_device_cap_flags { /* Deprecated. Please use IB_RAW_PACKET_CAP_SCATTER_FCS. */ IB_DEVICE_RAW_SCATTER_FCS = (1ULL << 34), IB_DEVICE_RDMA_NETDEV_OPA_VNIC = (1ULL << 35), + /* The device supports padding incoming writes to cacheline. */ + IB_DEVICE_PCI_WRITE_END_PADDING = (1ULL << 36), }; enum ib_signature_prot_cap { @@ -1098,6 +1100,7 @@ enum ib_qp_create_flags { IB_QP_CREATE_SCATTER_FCS = 1 << 8, IB_QP_CREATE_CVLAN_STRIPPING = 1 << 9, IB_QP_CREATE_SOURCE_QPN = 1 << 10, + IB_QP_CREATE_PCI_WRITE_END_PADDING = 1 << 11, /* reserve bits 26-31 for low level drivers' internal use */ IB_QP_CREATE_RESERVED_START = 1 << 26, IB_QP_CREATE_RESERVED_END = 1 << 31, @@ -1621,6 +1624,7 @@ enum ib_wq_flags { IB_WQ_FLAGS_CVLAN_STRIPPING = 1 << 0, IB_WQ_FLAGS_SCATTER_FCS = 1 << 1, IB_WQ_FLAGS_DELAY_DROP = 1 << 2, + IB_WQ_FLAGS_PCI_WRITE_END_PADDING = 1 << 3, }; struct ib_wq_init_attr { From b1383aa64121778d9419cc982e387e27d9e96c64 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Sun, 29 Oct 2017 13:59:45 +0200 Subject: [PATCH 248/296] IB/mlx5: Add PCI write end padding support Add the PCI write end padding flag to device_cap_flags enum and set it during mlx5_ib_query_device so it will be reported to user-space. During WQ/QP creation, set that capability for WQ/QP if user requested it and HW supports it. PCI write end padding modification is not supported for now. There's no such flag for a QP but for a WQ, create and modify use the same flag. Return an error if PCI write end padding flag is set during modify_wq. Signed-off-by: Noa Osherovich Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 3 +++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 ++- drivers/infiniband/hw/mlx5/qp.c | 36 +++++++++++++++++++++++++--- 3 files changed, 38 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 260f8be1d0ed..e5f8d5d1cd7e 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -715,6 +715,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS)) props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING; + if (MLX5_CAP_GEN(mdev, end_pad)) + props->device_cap_flags |= IB_DEVICE_PCI_WRITE_END_PADDING; + props->vendor_part_id = mdev->pdev->device; props->hw_ver = mdev->pdev->revision; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 0a328d6c6494..6dd8cac78de2 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -228,6 +228,7 @@ struct wr_list { enum mlx5_ib_rq_flags { MLX5_IB_RQ_CVLAN_STRIPPING = 1 << 0, + MLX5_IB_RQ_PCI_WRITE_END_PADDING = 1 << 1, }; struct mlx5_ib_wq { @@ -421,7 +422,7 @@ enum mlx5_ib_qp_flags { MLX5_IB_QP_RSS = 1 << 8, MLX5_IB_QP_CVLAN_STRIPPING = 1 << 9, MLX5_IB_QP_UNDERLAY = 1 << 10, - /* Reserved for PCI_WRITE_PAD = 1 << 11, */ + MLX5_IB_QP_PCI_WRITE_END_PADDING = 1 << 11, MLX5_IB_QP_TUNNEL_OFFLOAD = 1 << 12, }; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 9e22dead259a..74799c7bd256 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1178,8 +1178,8 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, wq = MLX5_ADDR_OF(rqc, rqc, wq); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); - MLX5_SET(wq, wq, end_padding_mode, - MLX5_GET(qpc, qpc, end_padding_mode)); + if (rq->flags & MLX5_IB_RQ_PCI_WRITE_END_PADDING) + MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); MLX5_SET(wq, wq, page_offset, MLX5_GET(qpc, qpc, page_offset)); MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd)); MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr)); @@ -1276,6 +1276,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (qp->flags & MLX5_IB_QP_CVLAN_STRIPPING) rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING; + if (qp->flags & MLX5_IB_QP_PCI_WRITE_END_PADDING) + rq->flags |= MLX5_IB_RQ_PCI_WRITE_END_PADDING; err = create_raw_packet_qp_rq(dev, rq, in); if (err) goto err_destroy_sq; @@ -1821,6 +1823,19 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->flags |= MLX5_IB_QP_LSO; } + if (init_attr->create_flags & IB_QP_CREATE_PCI_WRITE_END_PADDING) { + if (!MLX5_CAP_GEN(dev->mdev, end_pad)) { + mlx5_ib_dbg(dev, "scatter end padding is not supported\n"); + err = -EOPNOTSUPP; + goto err; + } else if (init_attr->qp_type != IB_QPT_RAW_PACKET) { + MLX5_SET(qpc, qpc, end_padding_mode, + MLX5_WQ_END_PAD_MODE_ALIGN); + } else { + qp->flags |= MLX5_IB_QP_PCI_WRITE_END_PADDING; + } + } + if (init_attr->qp_type == IB_QPT_RAW_PACKET || qp->flags & MLX5_IB_QP_UNDERLAY) { qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr; @@ -1865,6 +1880,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, else if (qp->create_type == MLX5_QP_KERNEL) destroy_qp_kernel(dev, qp); +err: kvfree(in); return err; } @@ -4749,7 +4765,15 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, MLX5_SET(wq, wq, wq_type, rwq->create_flags & MLX5_IB_WQ_FLAGS_STRIDING_RQ ? MLX5_WQ_TYPE_CYCLIC_STRIDING_RQ : MLX5_WQ_TYPE_CYCLIC); - MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); + if (init_attr->create_flags & IB_WQ_FLAGS_PCI_WRITE_END_PADDING) { + if (!MLX5_CAP_GEN(dev->mdev, end_pad)) { + mlx5_ib_dbg(dev, "Scatter end padding is not supported\n"); + err = -EOPNOTSUPP; + goto out; + } else { + MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); + } + } MLX5_SET(wq, wq, log_wq_stride, rwq->log_rq_stride); if (rwq->create_flags & MLX5_IB_WQ_FLAGS_STRIDING_RQ) { MLX5_SET(wq, wq, two_byte_shift_en, rwq->two_byte_shift_en); @@ -5129,6 +5153,12 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, MLX5_SET(rqc, rqc, vsd, (wq_attr->flags & IB_WQ_FLAGS_CVLAN_STRIPPING) ? 0 : 1); } + + if (wq_attr->flags_mask & IB_WQ_FLAGS_PCI_WRITE_END_PADDING) { + mlx5_ib_dbg(dev, "Modifying scatter end padding is not supported\n"); + err = -EOPNOTSUPP; + goto out; + } } if (curr_wq_state == IB_WQS_RESET && wq_state == IB_WQS_RDY) { From e32d2d7144efca745f6d182ac9abd16ee6c7598e Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 29 Oct 2017 17:05:22 +0200 Subject: [PATCH 249/296] RDMA/bnxt_re: Remove unused vlan_tag variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Broadcom driver produces the following compilation warning drivers/infiniband/hw/bnxt_re/ib_verbs.c: In function ‘bnxt_re_create_ah’: drivers/infiniband/hw/bnxt_re/ib_verbs.c:668:6: warning: variable ‘vlan_tag’ set but not used [-Wunused-but-set-variable] u16 vlan_tag; Let's remove it till vlan_tag will be implemented properly. Cc: Selvin Xavier Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index ebcdfb4f5f75..39e49d0e8467 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -665,7 +665,6 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd, struct bnxt_re_ah *ah; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); int rc; - u16 vlan_tag; u8 nw_type; struct ib_gid_attr sgid_attr; @@ -711,11 +710,8 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd, grh->sgid_index); goto fail; } - if (sgid_attr.ndev) { - if (is_vlan_dev(sgid_attr.ndev)) - vlan_tag = vlan_dev_vlan_id(sgid_attr.ndev); + if (sgid_attr.ndev) dev_put(sgid_attr.ndev); - } /* Get network header type for this GID */ nw_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid); switch (nw_type) { From 9950acf945f55222385d85489617e1d81e45fe34 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 29 Oct 2017 21:34:35 +0200 Subject: [PATCH 250/296] RDMA/cxgb4: Protect from possible dereference Smatch tool reports the following error: drivers/infiniband/hw/cxgb4/qp.c:1886 c4iw_create_qp() error: we previously assumed 'ucontext' could be null (see line 1804) Cc: Steve Wise Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 57b23e33eb8b..4b3267358dff 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1843,7 +1843,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, if (ret) goto err_destroy_qp; - if (udata) { + if (udata && ucontext) { sq_key_mm = kmalloc(sizeof(*sq_key_mm), GFP_KERNEL); if (!sq_key_mm) { ret = -ENOMEM; From 31fde034a8bd964a5c7c1a5663fc87a913158db2 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Mon, 30 Oct 2017 14:23:13 +0200 Subject: [PATCH 251/296] IB/mlx5: Assign send CQ and recv CQ of UMR QP The UMR's QP is created by calling mlx5_ib_create_qp directly, and therefore the send CQ and the recv CQ on the ibqp weren't assigned. Assign them right after calling the mlx5_ib_create_qp to assure that any access to those pointers will work as expected and won't crash the system as might happen as part of reset flow. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Majd Dibbiny Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index e5f8d5d1cd7e..d2aadb13637b 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3138,6 +3138,8 @@ static int create_umr_res(struct mlx5_ib_dev *dev) qp->real_qp = qp; qp->uobject = NULL; qp->qp_type = MLX5_IB_QPT_REG_UMR; + qp->send_cq = init_attr->send_cq; + qp->recv_cq = init_attr->recv_cq; attr->qp_state = IB_QPS_INIT; attr->port_num = 1; From 2b621851acb34762ca893b2528823215e0d4b98c Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Mon, 30 Oct 2017 14:23:14 +0200 Subject: [PATCH 252/296] IB/mlx5: Fix RoCE Address Path fields When working over a RoCE network, the UDP source port should be set only for statically connected QPs (RC, UC and XRC). Fixes: 2811ba51b049 ("IB/mlx5: Add RoCE fields to Address Vector") Signed-off-by: Majd Dibbiny Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 74799c7bd256..31ad28853efa 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2339,8 +2339,12 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (err) return err; memcpy(path->rmac, ah->roce.dmac, sizeof(ah->roce.dmac)); - path->udp_sport = mlx5_get_roce_udp_sport(dev, port, - grh->sgid_index); + if (qp->ibqp.qp_type == IB_QPT_RC || + qp->ibqp.qp_type == IB_QPT_UC || + qp->ibqp.qp_type == IB_QPT_XRC_INI || + qp->ibqp.qp_type == IB_QPT_XRC_TGT) + path->udp_sport = mlx5_get_roce_udp_sport(dev, port, + grh->sgid_index); path->dci_cfi_prio_sl = (sl & 0x7) << 4; if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) path->ecn_dscp = (grh->traffic_class >> 2) & 0x3f; From 9901abf583683e58f95f822da63cd0e32e7b2f0a Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Thu, 2 Nov 2017 15:22:24 +0200 Subject: [PATCH 253/296] IB/mlx4: Use optimal numbers of MTT entries Optimize the device performance by assigning multiple physical pages, which are contiguous, to a single MTT. As a result, the number of MTTs is reduced and in turn save cache misses of MTTs. Signed-off-by: Guy Levi Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/mr.c | 285 +++++++++++++++++++++++++++++--- 1 file changed, 261 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index e6f77f63da75..8f408a02f699 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -87,50 +87,287 @@ struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc) return ERR_PTR(err); } +enum { + MLX4_MAX_MTT_SHIFT = 31 +}; + +static int mlx4_ib_umem_write_mtt_block(struct mlx4_ib_dev *dev, + struct mlx4_mtt *mtt, + u64 mtt_size, u64 mtt_shift, u64 len, + u64 cur_start_addr, u64 *pages, + int *start_index, int *npages) +{ + u64 cur_end_addr = cur_start_addr + len; + u64 cur_end_addr_aligned = 0; + u64 mtt_entries; + int err = 0; + int k; + + len += (cur_start_addr & (mtt_size - 1ULL)); + cur_end_addr_aligned = round_up(cur_end_addr, mtt_size); + len += (cur_end_addr_aligned - cur_end_addr); + if (len & (mtt_size - 1ULL)) { + pr_warn("write_block: len %llx is not aligned to mtt_size %llx\n", + len, mtt_size); + return -EINVAL; + } + + mtt_entries = (len >> mtt_shift); + + /* + * Align the MTT start address to the mtt_size. + * Required to handle cases when the MR starts in the middle of an MTT + * record. Was not required in old code since the physical addresses + * provided by the dma subsystem were page aligned, which was also the + * MTT size. + */ + cur_start_addr = round_down(cur_start_addr, mtt_size); + /* A new block is started ... */ + for (k = 0; k < mtt_entries; ++k) { + pages[*npages] = cur_start_addr + (mtt_size * k); + (*npages)++; + /* + * Be friendly to mlx4_write_mtt() and pass it chunks of + * appropriate size. + */ + if (*npages == PAGE_SIZE / sizeof(u64)) { + err = mlx4_write_mtt(dev->dev, mtt, *start_index, + *npages, pages); + if (err) + return err; + + (*start_index) += *npages; + *npages = 0; + } + } + + return 0; +} + +static inline u64 alignment_of(u64 ptr) +{ + return ilog2(ptr & (~(ptr - 1))); +} + +static int mlx4_ib_umem_calc_block_mtt(u64 next_block_start, + u64 current_block_end, + u64 block_shift) +{ + /* Check whether the alignment of the new block is aligned as well as + * the previous block. + * Block address must start with zeros till size of entity_size. + */ + if ((next_block_start & ((1ULL << block_shift) - 1ULL)) != 0) + /* + * It is not as well aligned as the previous block-reduce the + * mtt size accordingly. Here we take the last right bit which + * is 1. + */ + block_shift = alignment_of(next_block_start); + + /* + * Check whether the alignment of the end of previous block - is it + * aligned as well as the start of the block + */ + if (((current_block_end) & ((1ULL << block_shift) - 1ULL)) != 0) + /* + * It is not as well aligned as the start of the block - + * reduce the mtt size accordingly. + */ + block_shift = alignment_of(current_block_end); + + return block_shift; +} + int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, struct ib_umem *umem) { u64 *pages; - int i, k, entry; - int n; - int len; + u64 len = 0; int err = 0; + u64 mtt_size; + u64 cur_start_addr = 0; + u64 mtt_shift; + int start_index = 0; + int npages = 0; struct scatterlist *sg; + int i; pages = (u64 *) __get_free_page(GFP_KERNEL); if (!pages) return -ENOMEM; - i = n = 0; + mtt_shift = mtt->page_shift; + mtt_size = 1ULL << mtt_shift; - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - len = sg_dma_len(sg) >> mtt->page_shift; - for (k = 0; k < len; ++k) { - pages[i++] = sg_dma_address(sg) + - (k << umem->page_shift); - /* - * Be friendly to mlx4_write_mtt() and - * pass it chunks of appropriate size. - */ - if (i == PAGE_SIZE / sizeof (u64)) { - err = mlx4_write_mtt(dev->dev, mtt, n, - i, pages); - if (err) - goto out; - n += i; - i = 0; - } + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) { + if (cur_start_addr + len == sg_dma_address(sg)) { + /* still the same block */ + len += sg_dma_len(sg); + continue; } + /* + * A new block is started ... + * If len is malaligned, write an extra mtt entry to cover the + * misaligned area (round up the division) + */ + err = mlx4_ib_umem_write_mtt_block(dev, mtt, mtt_size, + mtt_shift, len, + cur_start_addr, + pages, &start_index, + &npages); + if (err) + goto out; + + cur_start_addr = sg_dma_address(sg); + len = sg_dma_len(sg); } - if (i) - err = mlx4_write_mtt(dev->dev, mtt, n, i, pages); + /* Handle the last block */ + if (len > 0) { + /* + * If len is malaligned, write an extra mtt entry to cover + * the misaligned area (round up the division) + */ + err = mlx4_ib_umem_write_mtt_block(dev, mtt, mtt_size, + mtt_shift, len, + cur_start_addr, pages, + &start_index, &npages); + if (err) + goto out; + } + + if (npages) + err = mlx4_write_mtt(dev->dev, mtt, start_index, npages, pages); out: free_page((unsigned long) pages); return err; } +/* + * Calculate optimal mtt size based on contiguous pages. + * Function will return also the number of pages that are not aligned to the + * calculated mtt_size to be added to total number of pages. For that we should + * check the first chunk length & last chunk length and if not aligned to + * mtt_size we should increment the non_aligned_pages number. All chunks in the + * middle already handled as part of mtt shift calculation for both their start + * & end addresses. + */ +static int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, + u64 start_va, + int *num_of_mtts) +{ + u64 block_shift = MLX4_MAX_MTT_SHIFT; + u64 min_shift = umem->page_shift; + u64 last_block_aligned_end = 0; + u64 current_block_start = 0; + u64 first_block_start = 0; + u64 current_block_len = 0; + u64 last_block_end = 0; + struct scatterlist *sg; + u64 current_block_end; + u64 misalignment_bits; + u64 next_block_start; + u64 total_len = 0; + int i; + + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) { + /* + * Initialization - save the first chunk start as the + * current_block_start - block means contiguous pages. + */ + if (current_block_len == 0 && current_block_start == 0) { + current_block_start = sg_dma_address(sg); + first_block_start = current_block_start; + /* + * Find the bits that are different between the physical + * address and the virtual address for the start of the + * MR. + * umem_get aligned the start_va to a page boundary. + * Therefore, we need to align the start va to the same + * boundary. + * misalignment_bits is needed to handle the case of a + * single memory region. In this case, the rest of the + * logic will not reduce the block size. If we use a + * block size which is bigger than the alignment of the + * misalignment bits, we might use the virtual page + * number instead of the physical page number, resulting + * in access to the wrong data. + */ + misalignment_bits = + (start_va & (~(((u64)(BIT(umem->page_shift))) - 1ULL))) + ^ current_block_start; + block_shift = min(alignment_of(misalignment_bits), + block_shift); + } + + /* + * Go over the scatter entries and check if they continue the + * previous scatter entry. + */ + next_block_start = sg_dma_address(sg); + current_block_end = current_block_start + current_block_len; + /* If we have a split (non-contig.) between two blocks */ + if (current_block_end != next_block_start) { + block_shift = mlx4_ib_umem_calc_block_mtt + (next_block_start, + current_block_end, + block_shift); + + /* + * If we reached the minimum shift for 4k page we stop + * the loop. + */ + if (block_shift <= min_shift) + goto end; + + /* + * If not saved yet we are in first block - we save the + * length of first block to calculate the + * non_aligned_pages number at the end. + */ + total_len += current_block_len; + + /* Start a new block */ + current_block_start = next_block_start; + current_block_len = sg_dma_len(sg); + continue; + } + /* The scatter entry is another part of the current block, + * increase the block size. + * An entry in the scatter can be larger than 4k (page) as of + * dma mapping which merge some blocks together. + */ + current_block_len += sg_dma_len(sg); + } + + /* Account for the last block in the total len */ + total_len += current_block_len; + /* Add to the first block the misalignment that it suffers from. */ + total_len += (first_block_start & ((1ULL << block_shift) - 1ULL)); + last_block_end = current_block_start + current_block_len; + last_block_aligned_end = round_up(last_block_end, 1 << block_shift); + total_len += (last_block_aligned_end - last_block_end); + + if (total_len & ((1ULL << block_shift) - 1ULL)) + pr_warn("misaligned total length detected (%llu, %llu)!", + total_len, block_shift); + + *num_of_mtts = total_len >> block_shift; +end: + if (block_shift < min_shift) { + /* + * If shift is less than the min we set a warning and return the + * min shift. + */ + pr_warn("umem_calc_optimal_mtt_size - unexpected shift %lld\n", block_shift); + + block_shift = min_shift; + } + return block_shift; +} + struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata) @@ -155,7 +392,7 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } n = ib_umem_page_count(mr->umem); - shift = mr->umem->page_shift; + shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start, &n); err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length, convert_access(access_flags), n, shift, &mr->mmr); From ed8637d3615b38bd4d12ba5eb8ee6a0c3888e754 Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Thu, 2 Nov 2017 15:22:25 +0200 Subject: [PATCH 254/296] IB/mlx4: Add contig support for control objects Taking advantage of the optimization which was introduced in previous commit ("IB/mlx4: Use optimal numbers of MTT entries") to optimize the MTT usage for QP and CQ. Signed-off-by: Guy Levi Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/cq.c | 8 ++++++-- drivers/infiniband/hw/mlx4/mlx4_ib.h | 2 ++ drivers/infiniband/hw/mlx4/mr.c | 5 ++--- drivers/infiniband/hw/mlx4/qp.c | 8 ++++++-- 4 files changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 6d5f405912dd..bf4f14a1b4fc 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -140,14 +140,18 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *cont { int err; int cqe_size = dev->dev->caps.cqe_size; + int shift; + int n; *umem = ib_umem_get(context, buf_addr, cqe * cqe_size, IB_ACCESS_LOCAL_WRITE, 1); if (IS_ERR(*umem)) return PTR_ERR(*umem); - err = mlx4_mtt_init(dev->dev, ib_umem_page_count(*umem), - (*umem)->page_shift, &buf->mtt); + n = ib_umem_page_count(*umem); + shift = mlx4_ib_umem_calc_optimal_mtt_size(*umem, 0, &n); + err = mlx4_mtt_init(dev->dev, n, shift, &buf->mtt); + if (err) goto err_buf; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index d09d4e7b2706..719dae354066 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -935,5 +935,7 @@ struct ib_rwq_ind_table struct ib_rwq_ind_table_init_attr *init_attr, struct ib_udata *udata); int mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table); +int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va, + int *num_of_mtts); #endif /* MLX4_IB_H */ diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 8f408a02f699..313bfb9ccb71 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -254,9 +254,8 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, * middle already handled as part of mtt shift calculation for both their start * & end addresses. */ -static int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, - u64 start_va, - int *num_of_mtts) +int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va, + int *num_of_mtts) { u64 block_shift = MLX4_MAX_MTT_SHIFT; u64 min_shift = umem->page_shift; diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 26f3345948e2..f807a6278d44 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1038,6 +1038,8 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, struct mlx4_ib_create_wq wq; } ucmd; size_t copy_len; + int shift; + int n; copy_len = (src == MLX4_IB_QP_SRC) ? sizeof(struct mlx4_ib_create_qp) : @@ -1100,8 +1102,10 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err; } - err = mlx4_mtt_init(dev->dev, ib_umem_page_count(qp->umem), - qp->umem->page_shift, &qp->mtt); + n = ib_umem_page_count(qp->umem); + shift = mlx4_ib_umem_calc_optimal_mtt_size(qp->umem, 0, &n); + err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt); + if (err) goto err_buf; From 5f22a1d87c5315a98981ecf93cd8de226cffe6ca Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 2 Nov 2017 15:22:26 +0200 Subject: [PATCH 255/296] IB/mlx4: Increase maximal message size under UD QP Maximal message should be used as a limit to the max message payload allowed, without the headers. The ConnectX-3 check is done against this value includes the headers. When the payload is 4K this will cause the NIC to drop packets. Increase maximal message to 8K as workaround, this shouldn't change current behaviour because we continue to set the MTU to 4k. To reproduce; set MTU to 4296 on the corresponding interface, for example: ifconfig eth0 mtu 4296 (both server and client) On server: ib_send_bw -c UD -d mlx4_0 -s 4096 -n 1000000 -i1 -m 4096 On client: ib_send_bw -d mlx4_0 -c UD -s 4096 -n 1000000 -i 1 -m 4096 Fixes: 6e0d733d9215 ("IB/mlx4: Allow 4K messages for UD QPs") Signed-off-by: Mark Bloch Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index f807a6278d44..013049bcdb53 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -2215,7 +2215,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, context->mtu_msgmax = (IB_MTU_4096 << 5) | ilog2(dev->dev->caps.max_gso_sz); else - context->mtu_msgmax = (IB_MTU_4096 << 5) | 12; + context->mtu_msgmax = (IB_MTU_4096 << 5) | 13; } else if (attr_mask & IB_QP_PATH_MTU) { if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) { pr_err("path MTU (%u) is invalid\n", From 2e4c85c6edc80fa532b2c7e1eb3597ef4d4bbb8f Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 2 Nov 2017 15:22:27 +0200 Subject: [PATCH 256/296] IB/core: Avoid unnecessary return value check Since there is nothing done with non zero return value, such check is avoided. Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/security.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index 88bdafb297f5..455cba7f9640 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -692,20 +692,13 @@ void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent) int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index) { - int ret; - if (map->agent.qp->qp_type == IB_QPT_SMI && !map->agent.smp_allowed) return -EACCES; - ret = ib_security_pkey_access(map->agent.device, - map->agent.port_num, - pkey_index, - map->agent.security); - - if (ret) - return ret; - - return 0; + return ib_security_pkey_access(map->agent.device, + map->agent.port_num, + pkey_index, + map->agent.security); } #endif /* CONFIG_SECURITY_INFINIBAND */ From f17966f19575eac9d5dea68b08f6292dd3d4d3db Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Thu, 2 Nov 2017 15:22:28 +0200 Subject: [PATCH 257/296] IB/mlx5: Fix ABI alignment to 64 bit Struct mlx5_ib_striding_rq_caps was not aligned to 64 bit as it should have been. Add a 32 bit reserved field. Fixes: b4f34597a5ce ('IB/mlx5: Expose multi-packet RQ capabilities') Signed-off-by: Noa Osherovich Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/uapi/rdma/mlx5-abi.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 442b46b74a3a..21722e3c2c70 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -202,6 +202,7 @@ struct mlx5_ib_striding_rq_caps { * supported_qpts |= 1 << IB_QPT_RAW_PACKET */ __u32 supported_qpts; + __u32 reserved; }; enum mlx5_ib_query_dev_resp_flags { From d6d5c59905c8af932c1cee874e1fb5cd9e83fa61 Mon Sep 17 00:00:00 2001 From: Sriharsha Basavapatna Date: Tue, 31 Oct 2017 14:59:17 +0530 Subject: [PATCH 258/296] bnxt_re: fix a crash in qp error event processing In bnxt_qplib_process_qp_event(), for qp error events we look up the qp-handle and pass it for further processing. But we don't check if the handle is NULL. This could lead to a crash in the called functions when that qp-handle is dereferenced, if the qp is destroyed in the meantime. Fix this by checking for a valid qp-handle in that function. Signed-off-by: Sriharsha Basavapatna Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 6d116146fa3c..a7b5de3e193c 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -302,6 +302,8 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, "QPLIB: qpid 0x%x, req_err=0x%x, resp_err=0x%x\n", qp_id, err_event->req_err_state_reason, err_event->res_err_state_reason); + if (!qp) + break; bnxt_qplib_acquire_cq_locks(qp, &flags); bnxt_qplib_mark_qp_error(qp); bnxt_qplib_release_cq_locks(qp, &flags); From 063fb5bd1a01937094f40169a20e4aa5ca030db1 Mon Sep 17 00:00:00 2001 From: Sriharsha Basavapatna Date: Fri, 3 Nov 2017 02:39:04 +0530 Subject: [PATCH 259/296] bnxt_re: changing the ip address shouldn't affect new connections While adding a new gid, the driver currently does not return the context back to the stack. A subsequent del_gid() (e.g, when ip address is changed) doesn't find the right context in the driver and it ends up dropping that request. This results in the HW caching a stale gid entry and traffic fails because of that. Fix by returning the proper context in bnxt_re_add_gid(). Signed-off-by: Sriharsha Basavapatna Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 39e49d0e8467..adc42d7ff4f8 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -394,6 +394,7 @@ int bnxt_re_add_gid(struct ib_device *ibdev, u8 port_num, ctx->idx = tbl_idx; ctx->refcnt = 1; ctx_tbl[tbl_idx] = ctx; + *context = ctx; return rc; } From ba97b749979ef0ebb821e58ee8b16a84412922f6 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 2 Nov 2017 14:11:03 -0700 Subject: [PATCH 260/296] iw_cxgb4: remove BUG_ON() usage. iw_cxgb4 has many BUG_ON()s that were left over from various enhancemnets made over the years. Almost all of them should just be removed. Some, however indicate a ULP usage error and can be handled w/o bringing down the system. If the condition cannot happen with correctly implemented cxgb4 sw/fw, then remove the BUG_ON. If the condition indicates a misbehaving ULP (like CQ overflows), add proper recovery logic. Signed-off-by: Steve Wise Reviewed-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 25 +++++++++---------------- drivers/infiniband/hw/cxgb4/cq.c | 10 ---------- drivers/infiniband/hw/cxgb4/id_table.c | 1 - drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 1 - drivers/infiniband/hw/cxgb4/provider.c | 4 ++-- drivers/infiniband/hw/cxgb4/qp.c | 3 --- drivers/infiniband/hw/cxgb4/t4.h | 7 ++----- 7 files changed, 13 insertions(+), 38 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 7eb8a85e4d42..21db3b48a617 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -912,8 +912,6 @@ static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, pr_debug("ep %p tid %u pd_len %d\n", ep, ep->hwtid, ep->plen); - BUG_ON(skb_cloned(skb)); - mpalen = sizeof(*mpa) + ep->plen; if (mpa_rev_to_use == 2) mpalen += sizeof(struct mpa_v2_conn_params); @@ -996,7 +994,6 @@ static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, */ skb_get(skb); t4_set_arp_err_handler(skb, NULL, arp_failure_discard); - BUG_ON(ep->mpa_skb); ep->mpa_skb = skb; ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); if (ret) @@ -1082,7 +1079,6 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) skb_get(skb); set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); t4_set_arp_err_handler(skb, NULL, mpa_start_arp_failure); - BUG_ON(ep->mpa_skb); ep->mpa_skb = skb; ep->snd_seq += mpalen; return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); @@ -1836,7 +1832,6 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) struct c4iw_qp_attributes attrs; update_rx_credits(ep, dlen); - BUG_ON(!ep->com.qp); if (status) pr_err("%s Unexpected streaming data." \ " qpid %u ep %p state %d tid %u status %d\n", @@ -2109,7 +2104,7 @@ static int c4iw_reconnect(struct c4iw_ep *ep) * further connection establishment. As we are using the same EP pointer * for reconnect, few skbs are used during the previous c4iw_connect(), * which leaves the EP with inadequate skbs for further - * c4iw_reconnect(), Further causing an assert BUG_ON() due to empty + * c4iw_reconnect(), Further causing a crash due to an empty * skb_list() during peer_abort(). Allocate skbs which is already used. */ size = (CN_MAX_CON_BUF - skb_queue_len(&ep->com.ep_skb_list)); @@ -2356,7 +2351,6 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type; pr_debug("ep %p tid %u\n", ep, ep->hwtid); - BUG_ON(skb_cloned(skb)); skb_get(skb); rpl = cplhdr(skb); @@ -2440,7 +2434,6 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb) { pr_debug("c4iw_dev %p tid %u\n", dev, hwtid); - BUG_ON(skb_cloned(skb)); skb_trim(skb, sizeof(struct cpl_tid_release)); release_tid(&dev->rdev, hwtid, skb); return; @@ -2713,7 +2706,7 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) disconnect = 0; break; default: - BUG_ON(1); + WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state); } mutex_unlock(&ep->com.mutex); if (disconnect) @@ -2813,7 +2806,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) mutex_unlock(&ep->com.mutex); goto deref_ep; default: - BUG_ON(1); + WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state); break; } dst_confirm(ep->dst); @@ -2900,7 +2893,7 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb) case DEAD: break; default: - BUG_ON(1); + WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state); break; } mutex_unlock(&ep->com.mutex); @@ -2918,7 +2911,6 @@ static int terminate(struct c4iw_dev *dev, struct sk_buff *skb) struct c4iw_qp_attributes attrs; ep = get_ep_from_tid(dev, tid); - BUG_ON(!ep); if (ep && ep->com.qp) { pr_warn("TERM received tid %u qpid %u\n", @@ -3018,7 +3010,10 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) goto err_out; } - BUG_ON(!qp); + if (!qp) { + err = -EINVAL; + goto err_out; + } set_bit(ULP_ACCEPT, &ep->com.history); if ((conn_param->ord > cur_max_read_depth(ep->com.dev)) || @@ -3576,7 +3571,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) __func__, ep, ep->com.state); break; default: - BUG(); + WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state); break; } @@ -3676,7 +3671,6 @@ static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, int ret; rpl_skb = (struct sk_buff *)(unsigned long)req->cookie; - BUG_ON(!rpl_skb); if (req->retval) { pr_err("%s passive open failure %d\n", __func__, req->retval); mutex_lock(&dev->rdev.stats.lock); @@ -4103,7 +4097,6 @@ static void process_work(struct work_struct *work) dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *))); opcode = rpl->ot.opcode; - BUG_ON(!work_handlers[opcode]); ret = work_handlers[opcode](dev, skb); if (!ret) kfree_skb(skb); diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 8e2d490e757a..ea55e95cd2c5 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -188,7 +188,6 @@ int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count) int flushed = 0; int in_use = wq->rq.in_use - count; - BUG_ON(in_use < 0); pr_debug("wq %p cq %p rq.in_use %u skip count %u\n", wq, cq, wq->rq.in_use, count); while (in_use--) { @@ -231,14 +230,11 @@ int c4iw_flush_sq(struct c4iw_qp *qhp) if (wq->sq.flush_cidx == -1) wq->sq.flush_cidx = wq->sq.cidx; idx = wq->sq.flush_cidx; - BUG_ON(idx >= wq->sq.size); while (idx != wq->sq.pidx) { swsqe = &wq->sq.sw_sq[idx]; - BUG_ON(swsqe->flushed); swsqe->flushed = 1; insert_sq_cqe(wq, cq, swsqe); if (wq->sq.oldest_read == swsqe) { - BUG_ON(swsqe->opcode != FW_RI_READ_REQ); advance_oldest_read(wq); } flushed++; @@ -259,7 +255,6 @@ static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq) if (wq->sq.flush_cidx == -1) wq->sq.flush_cidx = wq->sq.cidx; cidx = wq->sq.flush_cidx; - BUG_ON(cidx > wq->sq.size); while (cidx != wq->sq.pidx) { swsqe = &wq->sq.sw_sq[cidx]; @@ -268,8 +263,6 @@ static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq) cidx = 0; } else if (swsqe->complete) { - BUG_ON(swsqe->flushed); - /* * Insert this completed cqe into the swcq. */ @@ -613,7 +606,6 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, */ if (SQ_TYPE(hw_cqe)) { int idx = CQE_WRID_SQ_IDX(hw_cqe); - BUG_ON(idx >= wq->sq.size); /* * Account for any unsignaled completions completed by @@ -627,7 +619,6 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, wq->sq.in_use -= wq->sq.size + idx - wq->sq.cidx; else wq->sq.in_use -= idx - wq->sq.cidx; - BUG_ON(wq->sq.in_use <= 0 && wq->sq.in_use >= wq->sq.size); wq->sq.cidx = (uint16_t)idx; pr_debug("completing sq idx %u\n", wq->sq.cidx); @@ -638,7 +629,6 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, } else { pr_debug("completing rq idx %u\n", wq->rq.cidx); *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id; - BUG_ON(t4_rq_empty(wq)); if (c4iw_wr_log) c4iw_log_wr_stats(wq, hw_cqe); t4_rq_consume(wq); diff --git a/drivers/infiniband/hw/cxgb4/id_table.c b/drivers/infiniband/hw/cxgb4/id_table.c index 0161ae6ad629..5c2cfdea06ad 100644 --- a/drivers/infiniband/hw/cxgb4/id_table.c +++ b/drivers/infiniband/hw/cxgb4/id_table.c @@ -73,7 +73,6 @@ void c4iw_id_free(struct c4iw_id_table *alloc, u32 obj) unsigned long flags; obj -= alloc->start; - BUG_ON((int)obj < 0); spin_lock_irqsave(&alloc->lock, flags); clear_bit(obj, alloc->table); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 788ddb6aa57b..8d525825d50c 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -364,7 +364,6 @@ static inline int _insert_handle(struct c4iw_dev *rhp, struct idr *idr, idr_preload_end(); } - BUG_ON(ret == -ENOSPC); return ret < 0 ? ret : 0; } diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index fb99a05562a3..30206a57dbe7 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -310,8 +310,9 @@ static int c4iw_query_gid(struct ib_device *ibdev, u8 port, int index, pr_debug("ibdev %p, port %d, index %d, gid %p\n", ibdev, port, index, gid); + if (!port) + return -EINVAL; dev = to_c4iw_dev(ibdev); - BUG_ON(port == 0); memset(&(gid->raw[0]), 0, sizeof(gid->raw)); memcpy(&(gid->raw[0]), dev->rdev.lldi.ports[port-1]->dev_addr, 6); return 0; @@ -536,7 +537,6 @@ int c4iw_register_device(struct c4iw_dev *dev) int i; pr_debug("c4iw_dev %p\n", dev); - BUG_ON(!dev->rdev.lldi.ports[0]); strlcpy(dev->ibdev.name, "cxgb4_%d", IB_DEVICE_NAME_MAX); memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); memcpy(&dev->ibdev.node_guid, dev->rdev.lldi.ports[0]->dev_addr, 6); diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 4b3267358dff..1374b41201a9 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -689,7 +689,6 @@ static int build_memreg(struct t4_sq *sq, union t4_wr *wqe, if (++p == (__be64 *)&sq->queue[sq->size]) p = (__be64 *)sq->queue; } - BUG_ON(rem < 0); while (rem) { *p = 0; rem -= sizeof(*p); @@ -1568,7 +1567,6 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, case C4IW_QP_STATE_RTS: switch (attrs->next_state) { case C4IW_QP_STATE_CLOSING: - BUG_ON(kref_read(&qhp->ep->com.kref) < 2); t4_set_wq_in_error(&qhp->wq); set_state(qhp, C4IW_QP_STATE_CLOSING); ep = qhp->ep; @@ -1677,7 +1675,6 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, set_state(qhp, C4IW_QP_STATE_ERROR); free = 1; abort = 1; - BUG_ON(!ep); flush_qp(qhp); wake_up(&qhp->wait); out: diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 427aaf20d77c..e9ea94268d51 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -425,7 +425,6 @@ static inline void t4_sq_produce(struct t4_wq *wq, u8 len16) static inline void t4_sq_consume(struct t4_wq *wq) { - BUG_ON(wq->sq.in_use < 1); if (wq->sq.cidx == wq->sq.flush_cidx) wq->sq.flush_cidx = -1; wq->sq.in_use--; @@ -600,7 +599,8 @@ static inline void t4_swcq_produce(struct t4_cq *cq) pr_warn("%s cxgb4 sw cq overflow cqid %u\n", __func__, cq->cqid); cq->error = 1; - BUG_ON(1); + cq->sw_in_use--; + return; } if (++cq->sw_pidx == cq->size) cq->sw_pidx = 0; @@ -608,7 +608,6 @@ static inline void t4_swcq_produce(struct t4_cq *cq) static inline void t4_swcq_consume(struct t4_cq *cq) { - BUG_ON(cq->sw_in_use < 1); cq->sw_in_use--; if (++cq->sw_cidx == cq->size) cq->sw_cidx = 0; @@ -654,7 +653,6 @@ static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe) ret = -EOVERFLOW; cq->error = 1; pr_err("cq overflow cqid %u\n", cq->cqid); - BUG_ON(1); } else if (t4_valid_cqe(cq, &cq->queue[cq->cidx])) { /* Ensure CQE is flushed to memory */ @@ -672,7 +670,6 @@ static inline struct t4_cqe *t4_next_sw_cqe(struct t4_cq *cq) pr_warn("%s cxgb4 sw cq overflow cqid %u\n", __func__, cq->cqid); cq->error = 1; - BUG_ON(1); return NULL; } if (cq->sw_in_use) From 321e329b9c0b3328e3f6bd4924b9c8c5cbf5c22d Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 3 Nov 2017 16:20:52 -0700 Subject: [PATCH 261/296] IB/srpt: Post receive work requests after qp transition to INIT state IB and iWARP specs both spell out that posting a receive work request to a queue pair in the RESET state is an invalid operation and required to fail. Postpone posting receive work requests until after the transition to the INIT state. Fixes: commit dea262094cdf ("IB/srpt: Change default behavior from using SRQ to using RC") Signed-off-by: Mike Marciniszyn Signed-off-by: Bart Van Assche Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srpt/ib_srpt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 304855b9b537..98b1b80e476b 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1687,10 +1687,6 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) goto err_destroy_cq; } - if (!sdev->use_srq) - for (i = 0; i < ch->rq_size; i++) - srpt_post_recv(sdev, ch, ch->ioctx_recv_ring[i]); - atomic_set(&ch->sq_wr_avail, qp_init->cap.max_send_wr); pr_debug("%s: max_cqe= %d max_sge= %d sq_size = %d cm_id= %p\n", @@ -1701,6 +1697,10 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) if (ret) goto err_destroy_qp; + if (!sdev->use_srq) + for (i = 0; i < ch->rq_size; i++) + srpt_post_recv(sdev, ch, ch->ioctx_recv_ring[i]); + out: kfree(qp_init); return ret; From c76d7d64f8067699260421509a7c11ad51761061 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 3 Nov 2017 16:20:53 -0700 Subject: [PATCH 262/296] IB/srpt: Introduce helper functions for SRQ allocation and freeing The only functional change in this patch is in the srpt_add_one() error path: if allocating the ring buffer for the SRQ fails, fall back to non-SRQ mode instead of disabling SRP target functionality. Signed-off-by: Bart Van Assche Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srpt/ib_srpt.c | 115 ++++++++++++++++---------- 1 file changed, 72 insertions(+), 43 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 98b1b80e476b..a5f232e9b1f3 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2510,6 +2510,74 @@ static struct se_wwn *srpt_lookup_wwn(const char *name) return wwn; } +static void srpt_free_srq(struct srpt_device *sdev) +{ + if (!sdev->srq) + return; + + ib_destroy_srq(sdev->srq); + srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev, + sdev->srq_size, srp_max_req_size, DMA_FROM_DEVICE); + sdev->srq = NULL; +} + +static int srpt_alloc_srq(struct srpt_device *sdev) +{ + struct ib_srq_init_attr srq_attr = { + .event_handler = srpt_srq_event, + .srq_context = (void *)sdev, + .attr.max_wr = sdev->srq_size, + .attr.max_sge = 1, + .srq_type = IB_SRQT_BASIC, + }; + struct ib_device *device = sdev->device; + struct ib_srq *srq; + int i; + + WARN_ON_ONCE(sdev->srq); + srq = ib_create_srq(sdev->pd, &srq_attr); + if (IS_ERR(srq)) { + pr_debug("ib_create_srq() failed: %ld\n", PTR_ERR(srq)); + return PTR_ERR(srq); + } + + pr_debug("create SRQ #wr= %d max_allow=%d dev= %s\n", sdev->srq_size, + sdev->device->attrs.max_srq_wr, device->name); + + sdev->ioctx_ring = (struct srpt_recv_ioctx **) + srpt_alloc_ioctx_ring(sdev, sdev->srq_size, + sizeof(*sdev->ioctx_ring[0]), + srp_max_req_size, DMA_FROM_DEVICE); + if (!sdev->ioctx_ring) { + ib_destroy_srq(srq); + return -ENOMEM; + } + + sdev->use_srq = true; + sdev->srq = srq; + + for (i = 0; i < sdev->srq_size; ++i) + srpt_post_recv(sdev, NULL, sdev->ioctx_ring[i]); + + return 0; +} + +static int srpt_use_srq(struct srpt_device *sdev, bool use_srq) +{ + struct ib_device *device = sdev->device; + int ret = 0; + + if (!use_srq) { + srpt_free_srq(sdev); + sdev->use_srq = false; + } else if (use_srq && !sdev->srq) { + ret = srpt_alloc_srq(sdev); + } + pr_debug("%s(%s): use_srq = %d; ret = %d\n", __func__, device->name, + sdev->use_srq, ret); + return ret; +} + /** * srpt_add_one() - Infiniband device addition callback function. */ @@ -2517,7 +2585,6 @@ static void srpt_add_one(struct ib_device *device) { struct srpt_device *sdev; struct srpt_port *sport; - struct ib_srq_init_attr srq_attr; int i; pr_debug("device = %p\n", device); @@ -2539,38 +2606,7 @@ static void srpt_add_one(struct ib_device *device) sdev->srq_size = min(srpt_srq_size, sdev->device->attrs.max_srq_wr); - srq_attr.event_handler = srpt_srq_event; - srq_attr.srq_context = (void *)sdev; - srq_attr.attr.max_wr = sdev->srq_size; - srq_attr.attr.max_sge = 1; - srq_attr.attr.srq_limit = 0; - srq_attr.srq_type = IB_SRQT_BASIC; - - sdev->srq = sdev->port[0].port_attrib.use_srq ? - ib_create_srq(sdev->pd, &srq_attr) : ERR_PTR(-ENOTSUPP); - if (IS_ERR(sdev->srq)) { - pr_debug("ib_create_srq() failed: %ld\n", PTR_ERR(sdev->srq)); - - /* SRQ not supported. */ - sdev->use_srq = false; - } else { - pr_debug("create SRQ #wr= %d max_allow=%d dev= %s\n", - sdev->srq_size, sdev->device->attrs.max_srq_wr, - device->name); - - sdev->use_srq = true; - - sdev->ioctx_ring = (struct srpt_recv_ioctx **) - srpt_alloc_ioctx_ring(sdev, sdev->srq_size, - sizeof(*sdev->ioctx_ring[0]), - srp_max_req_size, - DMA_FROM_DEVICE); - if (!sdev->ioctx_ring) - goto err_pd; - - for (i = 0; i < sdev->srq_size; ++i) - srpt_post_recv(sdev, NULL, sdev->ioctx_ring[i]); - } + srpt_use_srq(sdev, sdev->port[0].port_attrib.use_srq); if (!srpt_service_guid) srpt_service_guid = be64_to_cpu(device->node_guid); @@ -2630,12 +2666,7 @@ static void srpt_add_one(struct ib_device *device) err_cm: ib_destroy_cm_id(sdev->cm_id); err_ring: - if (sdev->use_srq) - ib_destroy_srq(sdev->srq); - srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev, - sdev->srq_size, srp_max_req_size, - DMA_FROM_DEVICE); -err_pd: + srpt_free_srq(sdev); ib_dealloc_pd(sdev->pd); free_dev: kfree(sdev); @@ -2678,10 +2709,8 @@ static void srpt_remove_one(struct ib_device *device, void *client_data) spin_unlock(&srpt_dev_lock); srpt_release_sdev(sdev); - if (sdev->use_srq) - ib_destroy_srq(sdev->srq); - srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev, - sdev->srq_size, srp_max_req_size, DMA_FROM_DEVICE); + srpt_free_srq(sdev); + ib_dealloc_pd(sdev->pd); kfree(sdev); From 01b3ee13c28456a61e61e5729b8addfaf16d5909 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 3 Nov 2017 16:20:54 -0700 Subject: [PATCH 263/296] IB/srpt: Introduce srpt_disconnect_ch_sync() Except for changing a BUG_ON() call into a WARN_ON_ONCE() call, this patch does not change any functionality. Signed-off-by: Bart Van Assche Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srpt/ib_srpt.c | 51 ++++++++++++++++++--------- 1 file changed, 35 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index a5f232e9b1f3..1aeb8d6ae6d9 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1787,6 +1787,40 @@ static int srpt_disconnect_ch(struct srpt_rdma_ch *ch) return ret; } +/* + * Send DREQ and wait for DREP. Return true if and only if this function + * changed the state of @ch. + */ +static bool srpt_disconnect_ch_sync(struct srpt_rdma_ch *ch) + __must_hold(&sdev->mutex) +{ + DECLARE_COMPLETION_ONSTACK(release_done); + struct srpt_device *sdev = ch->sport->sdev; + bool wait; + + lockdep_assert_held(&sdev->mutex); + + pr_debug("ch %s-%d state %d\n", ch->sess_name, ch->qp->qp_num, + ch->state); + + WARN_ON(ch->release_done); + ch->release_done = &release_done; + wait = !list_empty(&ch->list); + srpt_disconnect_ch(ch); + mutex_unlock(&sdev->mutex); + + if (!wait) + goto out; + + while (wait_for_completion_timeout(&release_done, 180 * HZ) == 0) + pr_info("%s(%s-%d state %d): still waiting ...\n", __func__, + ch->sess_name, ch->qp->qp_num, ch->state); + +out: + mutex_lock(&sdev->mutex); + return wait; +} + static void __srpt_close_all_ch(struct srpt_device *sdev) { struct srpt_rdma_ch *ch; @@ -2791,27 +2825,12 @@ static void srpt_release_cmd(struct se_cmd *se_cmd) */ static void srpt_close_session(struct se_session *se_sess) { - DECLARE_COMPLETION_ONSTACK(release_done); struct srpt_rdma_ch *ch = se_sess->fabric_sess_ptr; struct srpt_device *sdev = ch->sport->sdev; - bool wait; - - pr_debug("ch %s-%d state %d\n", ch->sess_name, ch->qp->qp_num, - ch->state); mutex_lock(&sdev->mutex); - BUG_ON(ch->release_done); - ch->release_done = &release_done; - wait = !list_empty(&ch->list); - srpt_disconnect_ch(ch); + srpt_disconnect_ch_sync(ch); mutex_unlock(&sdev->mutex); - - if (!wait) - return; - - while (wait_for_completion_timeout(&release_done, 180 * HZ) == 0) - pr_info("%s(%s-%d state %d): still waiting ...\n", __func__, - ch->sess_name, ch->qp->qp_num, ch->state); } /** From 8b6dc529ea62bc021e0cdd5b0d4342ba470b5c92 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 3 Nov 2017 16:20:55 -0700 Subject: [PATCH 264/296] IB/srpt: Wait until channel release has finished during module unload Introduce the helper function srpt_set_enabled(). Protect sport->enabled changes with sdev->mutex. Makes configfs writes into 'enabled' wait until all channel resources have been freed. Wait until channel release has finished during kernel module unload. Signed-off-by: Bart Van Assche Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srpt/ib_srpt.c | 43 +++++++++++++-------------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 1aeb8d6ae6d9..ca602eb50078 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1821,19 +1821,31 @@ static bool srpt_disconnect_ch_sync(struct srpt_rdma_ch *ch) return wait; } -static void __srpt_close_all_ch(struct srpt_device *sdev) +static void srpt_set_enabled(struct srpt_port *sport, bool enabled) + __must_hold(&sdev->mutex) { + struct srpt_device *sdev = sport->sdev; struct srpt_rdma_ch *ch; lockdep_assert_held(&sdev->mutex); + if (sport->enabled == enabled) + return; + sport->enabled = enabled; + if (sport->enabled) + return; + +again: list_for_each_entry(ch, &sdev->rch_list, list) { - if (srpt_disconnect_ch(ch) >= 0) - pr_info("Closing channel %s-%d because target %s has been disabled\n", - ch->sess_name, ch->qp->qp_num, - sdev->device->name); - srpt_close_ch(ch); + if (ch->sport == sport) { + pr_info("%s: closing channel %s-%d\n", + sdev->device->name, ch->sess_name, + ch->qp->qp_num); + if (srpt_disconnect_ch_sync(ch)) + goto again; + } } + } static void srpt_free_ch(struct kref *kref) @@ -2496,8 +2508,7 @@ static int srpt_release_sdev(struct srpt_device *sdev) mutex_lock(&sdev->mutex); for (i = 0; i < ARRAY_SIZE(sdev->port); i++) - sdev->port[i].enabled = false; - __srpt_close_all_ch(sdev); + srpt_set_enabled(&sdev->port[i], false); mutex_unlock(&sdev->mutex); res = wait_event_interruptible(sdev->ch_releaseQ, @@ -3083,7 +3094,6 @@ static ssize_t srpt_tpg_enable_store(struct config_item *item, struct se_portal_group *se_tpg = to_tpg(item); struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); struct srpt_device *sdev = sport->sdev; - struct srpt_rdma_ch *ch; unsigned long tmp; int ret; @@ -3097,24 +3107,11 @@ static ssize_t srpt_tpg_enable_store(struct config_item *item, pr_err("Illegal value for srpt_tpg_store_enable: %lu\n", tmp); return -EINVAL; } - if (sport->enabled == tmp) - goto out; - sport->enabled = tmp; - if (sport->enabled) - goto out; mutex_lock(&sdev->mutex); - list_for_each_entry(ch, &sdev->rch_list, list) { - if (ch->sport == sport) { - pr_debug("%s: ch %p %s-%d\n", __func__, ch, - ch->sess_name, ch->qp->qp_num); - srpt_disconnect_ch(ch); - srpt_close_ch(ch); - } - } + srpt_set_enabled(sport, tmp); mutex_unlock(&sdev->mutex); -out: return count; } From 57b0c46054999479272116ba3469006343856759 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 3 Nov 2017 16:20:56 -0700 Subject: [PATCH 265/296] IB/srpt: Ensure that modifying the use_srq configfs attribute works The use_srq configfs attribute is created after it is read. Hence modify srpt_tpg_attrib_use_srq_store() such that this function switches dynamically between non-SRQ and SRQ mode. Reported-by: Mike Marciniszyn Signed-off-by: Bart Van Assche Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srpt/ib_srpt.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index ca602eb50078..fde4f6741316 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -3054,7 +3054,9 @@ static ssize_t srpt_tpg_attrib_use_srq_store(struct config_item *item, { struct se_portal_group *se_tpg = attrib_to_tpg(item); struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); + struct srpt_device *sdev = sport->sdev; unsigned long val; + bool enabled; int ret; ret = kstrtoul(page, 0, &val); @@ -3062,7 +3064,17 @@ static ssize_t srpt_tpg_attrib_use_srq_store(struct config_item *item, return ret; if (val != !!val) return -EINVAL; + + ret = mutex_lock_interruptible(&sdev->mutex); + if (ret < 0) + return ret; + enabled = sport->enabled; + /* Log out all initiator systems before changing 'use_srq'. */ + srpt_set_enabled(sport, false); sport->port_attrib.use_srq = val; + srpt_use_srq(sdev, sport->port_attrib.use_srq); + srpt_set_enabled(sport, enabled); + mutex_unlock(&sdev->mutex); return count; } From 641f348bbdf1dcd30870bef8b0bd663aaf24f2ed Mon Sep 17 00:00:00 2001 From: Jan Sokolowski Date: Mon, 6 Nov 2017 06:38:16 -0800 Subject: [PATCH 266/296] IB/hfi1: Allow MgmtAllowed on B2B setups HFI's are hard-wired to send Device Info frames with MgmtAllowed bit set to 0. This means in B2B setups, MgmtAllowed would never be allowed, which prevents remote opa management tools from working properly. Assume MgmtAllowed if a neighbor is also an HFI. Fixes: 98b9ee2002a8 ("IB/hfi1: Cache neighbor secure data after link up") Reviewed-by: Sebastian Sanchez Reviewed-by: Michael J. Ruhl Signed-off-by: Jan Sokolowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 39 +--------------------------- drivers/infiniband/hw/hfi1/hfi.h | 3 +++ drivers/infiniband/hw/hfi1/intr.c | 43 +++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 38 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 05e03a05fcc9..2523b6291f95 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1036,7 +1036,6 @@ static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits, u8 *flag_bits, u16 *link_widths); static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id, u8 *device_rev); -static void read_mgmt_allowed(struct hfi1_devdata *dd, u8 *mgmt_allowed); static void read_local_lni(struct hfi1_devdata *dd, u8 *enable_lane_rx); static int read_tx_settings(struct hfi1_devdata *dd, u8 *enable_lane_tx, u8 *tx_polarity_inversion, @@ -7197,27 +7196,6 @@ static int lcb_to_port_ltp(int lcb_crc) return port_ltp; } -/* - * Our neighbor has indicated that we are allowed to act as a fabric - * manager, so place the full management partition key in the second - * (0-based) pkey array position (see OPAv1, section 20.2.2.6.8). Note - * that we should already have the limited management partition key in - * array element 1, and also that the port is not yet up when - * add_full_mgmt_pkey() is invoked. - */ -static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd) -{ - struct hfi1_devdata *dd = ppd->dd; - - /* Sanity check - ppd->pkeys[2] should be 0, or already initialized */ - if (!((ppd->pkeys[2] == 0) || (ppd->pkeys[2] == FULL_MGMT_P_KEY))) - dd_dev_warn(dd, "%s pkey[2] already set to 0x%x, resetting it to 0x%x\n", - __func__, ppd->pkeys[2], FULL_MGMT_P_KEY); - ppd->pkeys[2] = FULL_MGMT_P_KEY; - (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0); - hfi1_event_pkey_change(ppd->dd, ppd->port); -} - static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd) { if (ppd->pkeys[2] != 0) { @@ -7414,11 +7392,7 @@ void handle_verify_cap(struct work_struct *work) &partner_supported_crc); read_vc_remote_link_width(dd, &remote_tx_rate, &link_widths); read_remote_device_id(dd, &device_id, &device_rev); - /* - * And the 'MgmtAllowed' information, which is exchanged during - * LNI, is also be available at this point. - */ - read_mgmt_allowed(dd, &ppd->mgmt_allowed); + /* print the active widths */ get_link_widths(dd, &active_tx, &active_rx); dd_dev_info(dd, @@ -7546,9 +7520,6 @@ void handle_verify_cap(struct work_struct *work) write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */ set_8051_lcb_access(dd); - if (ppd->mgmt_allowed) - add_full_mgmt_pkey(ppd); - /* tell the 8051 to go to LinkUp */ set_link_state(ppd, HLS_GOING_UP); } @@ -8960,14 +8931,6 @@ static void read_local_lni(struct hfi1_devdata *dd, u8 *enable_lane_rx) *enable_lane_rx = (frame >> ENABLE_LANE_RX_SHIFT) & ENABLE_LANE_RX_MASK; } -static void read_mgmt_allowed(struct hfi1_devdata *dd, u8 *mgmt_allowed) -{ - u32 frame; - - read_8051_config(dd, REMOTE_LNI_INFO, GENERAL_CONFIG, &frame); - *mgmt_allowed = (frame >> MGMT_ALLOWED_SHIFT) & MGMT_ALLOWED_MASK; -} - static void read_last_local_state(struct hfi1_devdata *dd, u32 *lls) { read_8051_config(dd, LAST_LOCAL_STATE_COMPLETE, GENERAL_CONFIG, lls); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 08d394f384c6..4a9b4d7efe63 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -95,6 +95,9 @@ #define DROP_PACKET_OFF 0 #define DROP_PACKET_ON 1 +#define NEIGHBOR_TYPE_HFI 0 +#define NEIGHBOR_TYPE_SWITCH 1 + extern unsigned long hfi1_cap_mask; #define HFI1_CAP_KGET_MASK(mask, cap) ((mask) & HFI1_CAP_##cap) #define HFI1_CAP_UGET_MASK(mask, cap) \ diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c index 3e3184ddab5b..387305b768e9 100644 --- a/drivers/infiniband/hw/hfi1/intr.c +++ b/drivers/infiniband/hw/hfi1/intr.c @@ -55,6 +55,40 @@ #define LINK_UP_DELAY 500 /* in microseconds */ +static void set_mgmt_allowed(struct hfi1_pportdata *ppd) +{ + u32 frame; + struct hfi1_devdata *dd = ppd->dd; + + if (ppd->neighbor_type == NEIGHBOR_TYPE_HFI) { + ppd->mgmt_allowed = 1; + } else { + read_8051_config(dd, REMOTE_LNI_INFO, GENERAL_CONFIG, &frame); + ppd->mgmt_allowed = (frame >> MGMT_ALLOWED_SHIFT) + & MGMT_ALLOWED_MASK; + } +} + +/* + * Our neighbor has indicated that we are allowed to act as a fabric + * manager, so place the full management partition key in the second + * (0-based) pkey array position. Note that we should already have + * the limited management partition key in array element 1, and also + * that the port is not yet up when add_full_mgmt_pkey() is invoked. + */ +static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd) +{ + struct hfi1_devdata *dd = ppd->dd; + + /* Sanity check - ppd->pkeys[2] should be 0, or already initialized */ + if (!((ppd->pkeys[2] == 0) || (ppd->pkeys[2] == FULL_MGMT_P_KEY))) + dd_dev_warn(dd, "%s pkey[2] already set to 0x%x, resetting it to 0x%x\n", + __func__, ppd->pkeys[2], FULL_MGMT_P_KEY); + ppd->pkeys[2] = FULL_MGMT_P_KEY; + (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0); + hfi1_event_pkey_change(ppd->dd, ppd->port); +} + /** * format_hwmsg - format a single hwerror message * @msg message buffer @@ -163,6 +197,15 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup) /* HW needs LINK_UP_DELAY to settle, give it that chance */ udelay(LINK_UP_DELAY); + /* + * 'MgmtAllowed' information, which is exchanged during + * LNI, is available at this point. + */ + set_mgmt_allowed(ppd); + + if (ppd->mgmt_allowed) + add_full_mgmt_pkey(ppd); + /* physical link went up */ ppd->linkup = 1; ppd->offline_disabled_reason = From 22a3ffa78086e31223865c24aeb7d1c3970e2984 Mon Sep 17 00:00:00 2001 From: Jakub Byczkowski Date: Mon, 6 Nov 2017 06:38:23 -0800 Subject: [PATCH 267/296] IB/hfi1: Reduce 8051 command timeout Timeout of 20 seconds is too long for active wait performed for 8051 command completion. It was required for scenarios when transition to polling was requested before offline.quiet state was reached. Currently wait for offline.quiet is properly implemented and timeout can be reduced to 1 second. Reviewed-by: Dean Luick Reviewed-by: Duane McCrory Signed-off-by: Jakub Byczkowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index fc122c1b1eea..133e313feca4 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -560,7 +560,7 @@ enum { /* timeouts */ #define LINK_RESTART_DELAY 1000 /* link restart delay, in ms */ #define TIMEOUT_8051_START 5000 /* 8051 start timeout, in ms */ -#define DC8051_COMMAND_TIMEOUT 20000 /* DC8051 command timeout, in ms */ +#define DC8051_COMMAND_TIMEOUT 1000 /* DC8051 command timeout, in ms */ #define FREEZE_STATUS_TIMEOUT 20 /* wait for freeze indicators, in ms */ #define VL_STATUS_CLEAR_TIMEOUT 5000 /* per-VL status clear, in ms */ #define CCE_STATUS_TIMEOUT 10 /* time to clear CCE Status, in ms */ From 0e31a2e195220da576c32647277f849509588391 Mon Sep 17 00:00:00 2001 From: Kamenee Arumugam Date: Mon, 6 Nov 2017 06:38:30 -0800 Subject: [PATCH 268/296] IB/hfi1: Remove wrapper function in mmu_rb Wrapper functions were used to call the same function mmu_notifier_mem_invalidate for 2 callbacks in mmu_notifier. The commit 7def96f0a973 ("IB/hfi1: update to new mmu_notifier semantic") removed the invalidate_page callback. Therefore, the wrapper function is no longer needed. Reviewed-by: Michael J. Ruhl Reviewed-by: Alex Estrin Signed-off-by: Kamenee Arumugam Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/mmu_rb.c | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c index 175002c046ed..e7b3ce123da6 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.c +++ b/drivers/infiniband/hw/hfi1/mmu_rb.c @@ -67,12 +67,9 @@ struct mmu_rb_handler { static unsigned long mmu_node_start(struct mmu_rb_node *); static unsigned long mmu_node_last(struct mmu_rb_node *); -static inline void mmu_notifier_range_start(struct mmu_notifier *, - struct mm_struct *, - unsigned long, unsigned long); -static void mmu_notifier_mem_invalidate(struct mmu_notifier *, - struct mm_struct *, - unsigned long, unsigned long); +static void mmu_notifier_range_start(struct mmu_notifier *, + struct mm_struct *, + unsigned long, unsigned long); static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *, unsigned long, unsigned long); static void do_remove(struct mmu_rb_handler *handler, @@ -286,17 +283,10 @@ void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler, handler->ops->remove(handler->ops_arg, node); } -static inline void mmu_notifier_range_start(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long start, - unsigned long end) -{ - mmu_notifier_mem_invalidate(mn, mm, start, end); -} - -static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long start, unsigned long end) +static void mmu_notifier_range_start(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) { struct mmu_rb_handler *handler = container_of(mn, struct mmu_rb_handler, mn); From d61ea0751aa097182291c7ceed447bc73e020109 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 6 Nov 2017 06:38:38 -0800 Subject: [PATCH 269/296] IB/hfi1: Fix a wrapping test to insure the correct timeout The "2 * UINT_MAX" statement: if ((u64)(ts - cce->timestamp) > 2 * UINT_MAX) { is equivalent to: if ((u64)(ts - cce->timestamp) > UINT_MAX - 1) { This results in a premature timeout of the cong log entry. Fix by using unsigned 64 bit integers, removing casts, and using an algebraic equivalent test to avoid the "2 * UINT_MAX" issue. Also make use of kernel API to get nanoseconds instead of open coding. Reported-by: Dan Carpenter Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/mad.c | 6 +++--- drivers/infiniband/hw/hfi1/mad.h | 2 +- drivers/infiniband/hw/hfi1/rc.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index e648f589bade..0a867e5b65f7 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -3760,7 +3760,7 @@ static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am, struct hfi1_ibport *ibp = to_iport(ibdev, port); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); struct opa_hfi1_cong_log *cong_log = (struct opa_hfi1_cong_log *)data; - s64 ts; + u64 ts; int i; if (am || smp_length_check(sizeof(*cong_log), max_len)) { @@ -3778,7 +3778,7 @@ static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am, ppd->threshold_cong_event_map, sizeof(cong_log->threshold_cong_event_map)); /* keep timestamp in units of 1.024 usec */ - ts = ktime_to_ns(ktime_get()) / 1024; + ts = ktime_get_ns() / 1024; cong_log->current_time_stamp = cpu_to_be32(ts); for (i = 0; i < OPA_CONG_LOG_ELEMS; i++) { struct opa_hfi1_cong_log_event_internal *cce = @@ -3790,7 +3790,7 @@ static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am, * required to wrap the counter are supposed to * be zeroed (CA10-49 IBTA, release 1.2.1, V1). */ - if ((u64)(ts - cce->timestamp) > (2 * UINT_MAX)) + if ((ts - cce->timestamp) / 2 > U32_MAX) continue; memcpy(cong_log->events[i].local_qp_cn_entry, &cce->lqpn, 3); memcpy(cong_log->events[i].remote_qp_number_cn_entry, diff --git a/drivers/infiniband/hw/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h index a4f8ac16332c..c4938f3d97c8 100644 --- a/drivers/infiniband/hw/hfi1/mad.h +++ b/drivers/infiniband/hw/hfi1/mad.h @@ -239,7 +239,7 @@ struct opa_hfi1_cong_log_event_internal { u8 sl; u8 svc_type; u32 rlid; - s64 timestamp; /* wider than 32 bits to detect 32 bit rollover */ + u64 timestamp; /* wider than 32 bits to detect 32 bit rollover */ }; struct opa_hfi1_cong_log_event { diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 32d7bbe9738c..3f21b050714f 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -1965,7 +1965,7 @@ static void log_cca_event(struct hfi1_pportdata *ppd, u8 sl, u32 rlid, cc_event->svc_type = svc_type; cc_event->rlid = rlid; /* keep timestamp in units of 1.024 usec */ - cc_event->timestamp = ktime_to_ns(ktime_get()) / 1024; + cc_event->timestamp = ktime_get_ns() / 1024; spin_unlock_irqrestore(&ppd->cc_log_lock, flags); } From e4c397eed9c93fc1cb34f7d8df96afeb320d535d Mon Sep 17 00:00:00 2001 From: Jan Sokolowski Date: Mon, 6 Nov 2017 06:38:45 -0800 Subject: [PATCH 270/296] IB/hfi1: Remove unnecessary if check A for loop condition of data_iovs in user_sdma_free_request is unnecessarily repeated before the loop as an if check. Remove the if enveloping the loop. Reviewed-by: Jakub Byczkowski Signed-off-by: Jan Sokolowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 01b9a9c6c2af..833d0b7277bc 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -1428,6 +1428,8 @@ static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq) static void user_sdma_free_request(struct user_sdma_request *req, bool unpin) { + int i; + if (!list_empty(&req->txps)) { struct sdma_txreq *t, *p; @@ -1439,22 +1441,20 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin) kmem_cache_free(req->pq->txreq_cache, tx); } } - if (req->data_iovs) { - struct sdma_mmu_node *node; - int i; - for (i = 0; i < req->data_iovs; i++) { - node = req->iovs[i].node; - if (!node) - continue; + for (i = 0; i < req->data_iovs; i++) { + struct sdma_mmu_node *node = req->iovs[i].node; - if (unpin) - hfi1_mmu_rb_remove(req->pq->handler, - &node->rb); - else - atomic_dec(&node->refcount); - } + if (!node) + continue; + + if (unpin) + hfi1_mmu_rb_remove(req->pq->handler, + &node->rb); + else + atomic_dec(&node->refcount); } + kfree(req->tids); clear_bit(req->info.comp_idx, req->pq->req_in_use); } From cc9a97ea2c74e8270f3d77d1fd4711c6fc866d7f Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Mon, 6 Nov 2017 06:38:52 -0800 Subject: [PATCH 271/296] IB/hfi1: Do not allocate PIO send contexts for VNIC OPA VNIC does not use PIO contexts and instead only uses SDMA engines. Do not allocate PIO contexts for VNIC ports. Reviewed-by: Michael J. Ruhl Signed-off-by: Niranjana Vishwanathapura Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 10 +++---- drivers/infiniband/hw/hfi1/driver.c | 5 ++-- drivers/infiniband/hw/hfi1/init.c | 3 +-- drivers/infiniband/hw/hfi1/pio.c | 17 ------------ drivers/infiniband/hw/hfi1/pio.h | 6 ----- drivers/infiniband/hw/hfi1/vnic_main.c | 37 ++------------------------ 6 files changed, 10 insertions(+), 68 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 2523b6291f95..bb3b65a14df0 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -6816,7 +6816,8 @@ static void rxe_kernel_unfreeze(struct hfi1_devdata *dd) rcd = hfi1_rcd_get_by_index(dd, i); /* Ensure all non-user contexts(including vnic) are enabled */ - if (!rcd || !rcd->sc || (rcd->sc->type == SC_USER)) { + if (!rcd || + (i >= dd->first_dyn_alloc_ctxt && !rcd->is_vnic)) { hfi1_rcd_put(rcd); continue; } @@ -8093,8 +8094,7 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source) rcd = hfi1_rcd_get_by_index(dd, source); if (rcd) { /* Check for non-user contexts, including vnic */ - if ((source < dd->first_dyn_alloc_ctxt) || - (rcd->sc && (rcd->sc->type == SC_KERNEL))) + if (source < dd->first_dyn_alloc_ctxt || rcd->is_vnic) rcd->do_interrupt(rcd, 0); else handle_user_interrupt(rcd); @@ -8124,8 +8124,8 @@ static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source) rcd = hfi1_rcd_get_by_index(dd, source); if (rcd) { /* only pay attention to user urgent interrupts */ - if ((source >= dd->first_dyn_alloc_ctxt) && - (!rcd->sc || (rcd->sc->type == SC_USER))) + if (source >= dd->first_dyn_alloc_ctxt && + !rcd->is_vnic) handle_user_interrupt(rcd); hfi1_rcd_put(rcd); diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 07a32a1c1303..4f65ac671044 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -929,10 +929,9 @@ void set_all_slowpath(struct hfi1_devdata *dd) rcd = hfi1_rcd_get_by_index(dd, i); if (!rcd) continue; - if ((i < dd->first_dyn_alloc_ctxt) || - (rcd->sc && (rcd->sc->type == SC_KERNEL))) { + if (i < dd->first_dyn_alloc_ctxt || rcd->is_vnic) rcd->do_interrupt = &handle_receive_interrupt; - } + hfi1_rcd_put(rcd); } } diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index f29e7a327f29..8e3b3e7d829a 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1807,8 +1807,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) amt = PAGE_ALIGN(rcd->rcvhdrq_cnt * rcd->rcvhdrqentsize * sizeof(u32)); - if ((rcd->ctxt < dd->first_dyn_alloc_ctxt) || - (rcd->sc && (rcd->sc->type == SC_KERNEL))) + if (rcd->ctxt < dd->first_dyn_alloc_ctxt || rcd->is_vnic) gfp_flags = GFP_KERNEL; else gfp_flags = GFP_USER; diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 7108a4b5e94c..c929f5b08b15 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -703,7 +703,6 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, { struct send_context_info *sci; struct send_context *sc = NULL; - int req_type = type; dma_addr_t dma; unsigned long flags; u64 reg; @@ -730,13 +729,6 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, return NULL; } - /* - * VNIC contexts are dynamically allocated. - * Hence, pick a user context for VNIC. - */ - if (type == SC_VNIC) - type = SC_USER; - spin_lock_irqsave(&dd->sc_lock, flags); ret = sc_hw_alloc(dd, type, &sw_index, &hw_context); if (ret) { @@ -746,15 +738,6 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, return NULL; } - /* - * VNIC contexts are used by kernel driver. - * Hence, mark them as kernel contexts. - */ - if (req_type == SC_VNIC) { - dd->send_contexts[sw_index].type = SC_KERNEL; - type = SC_KERNEL; - } - sci = &dd->send_contexts[sw_index]; sci->sc = sc; diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h index 99ca5edb0b43..058b08f459ab 100644 --- a/drivers/infiniband/hw/hfi1/pio.h +++ b/drivers/infiniband/hw/hfi1/pio.h @@ -54,12 +54,6 @@ #define SC_USER 3 /* must be the last one: it may take all left */ #define SC_MAX 4 /* count of send context types */ -/* - * SC_VNIC types are allocated (dynamically) from the user context pool, - * (SC_USER) and used by kernel driver as kernel contexts (SC_KERNEL). - */ -#define SC_VNIC SC_MAX - /* invalid send context index */ #define INVALID_SCI 0xff diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index 1a17708be46a..5d65582fe4d9 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -67,8 +67,6 @@ static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt) unsigned int rcvctrl_ops = 0; int ret; - hfi1_init_ctxt(uctxt->sc); - uctxt->do_interrupt = &handle_receive_interrupt; /* Now allocate the RcvHdr queue and eager buffers. */ @@ -96,8 +94,6 @@ static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt) rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB; hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt); - - uctxt->is_vnic = true; done: return ret; } @@ -122,20 +118,7 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd, HFI1_CAP_KGET(NODROP_EGR_FULL) | HFI1_CAP_KGET(DMA_RTAIL); uctxt->seq_cnt = 1; - - /* Allocate and enable a PIO send context */ - uctxt->sc = sc_alloc(dd, SC_VNIC, uctxt->rcvhdrqentsize, - uctxt->numa_id); - - ret = uctxt->sc ? 0 : -ENOMEM; - if (ret) - goto bail; - - dd_dev_dbg(dd, "allocated vnic send context %u(%u)\n", - uctxt->sc->sw_index, uctxt->sc->hw_context); - ret = sc_enable(uctxt->sc); - if (ret) - goto bail; + uctxt->is_vnic = true; if (dd->num_msix_entries) hfi1_set_vnic_msix_info(uctxt); @@ -144,11 +127,7 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd, dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt); *vnic_ctxt = uctxt; - return ret; -bail: - hfi1_free_ctxt(uctxt); - dd_dev_dbg(dd, "vnic allocation failed. rc %d\n", ret); - return ret; + return 0; } static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, @@ -170,18 +149,6 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, HFI1_RCVCTRL_ONE_PKT_EGR_DIS | HFI1_RCVCTRL_NO_RHQ_DROP_DIS | HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt); - /* - * VNIC contexts are allocated from user context pool. - * Release them back to user context pool. - * - * Reset context integrity checks to default. - * (writes to CSRs probably belong in chip.c) - */ - write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE, - hfi1_pkt_default_send_ctxt_mask(dd, SC_USER)); - sc_disable(uctxt->sc); - - dd->send_contexts[uctxt->sc->sw_index].type = SC_USER; uctxt->event_flags = 0; From a276672ed71eb247ef3b728e35a37bacb7749213 Mon Sep 17 00:00:00 2001 From: Grzegorz Morys Date: Mon, 6 Nov 2017 06:38:59 -0800 Subject: [PATCH 272/296] IB/hfi1: Prohibit invalid Init to Armed state transition It is invalid to change Link state from Init to Armed if IsSmConfigurationStarted bit is not set in Attribute modifier for Set subnet management method in case of PortInfo and PortStateInfo attribute. Set response MAD status field bits accordingly to react correctly in such situations and avoid changing Link state. Reviewed-by: Ira Weiny Reviewed-by: Michael J. Ruhl Signed-off-by: Grzegorz Morys Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/mad.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 0a867e5b65f7..1277e1c929ce 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -1238,8 +1238,7 @@ static int port_states_transition_allowed(struct hfi1_pportdata *ppd, } static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp, - u32 logical_state, u32 phys_state, - int suppress_idle_sma) + u32 logical_state, u32 phys_state) { struct hfi1_devdata *dd = ppd->dd; u32 link_state; @@ -1320,7 +1319,7 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp, break; case IB_PORT_ARMED: ret = set_link_state(ppd, HLS_UP_ARMED); - if ((ret == 0) && (suppress_idle_sma == 0)) + if (!ret) send_idle_sma(dd, SMA_IDLE_ARM); break; case IB_PORT_ACTIVE: @@ -1614,8 +1613,10 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, if (ls_new == ls_old || (ls_new == IB_PORT_ARMED)) ppd->is_sm_config_started = 1; } else if (ls_new == IB_PORT_ARMED) { - if (ppd->is_sm_config_started == 0) + if (ppd->is_sm_config_started == 0) { invalid = 1; + smp->status |= IB_SMP_INVALID_FIELD; + } } } @@ -1632,9 +1633,11 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, * is down or is being set to down. */ - ret = set_port_states(ppd, smp, ls_new, ps_new, invalid); - if (ret) - return ret; + if (!invalid) { + ret = set_port_states(ppd, smp, ls_new, ps_new); + if (ret) + return ret; + } ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len, max_len); @@ -2111,17 +2114,18 @@ static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data, if (ls_new == ls_old || (ls_new == IB_PORT_ARMED)) ppd->is_sm_config_started = 1; } else if (ls_new == IB_PORT_ARMED) { - if (ppd->is_sm_config_started == 0) + if (ppd->is_sm_config_started == 0) { invalid = 1; + smp->status |= IB_SMP_INVALID_FIELD; + } } } - ret = set_port_states(ppd, smp, ls_new, ps_new, invalid); - if (ret) - return ret; - - if (invalid) - smp->status |= IB_SMP_INVALID_FIELD; + if (!invalid) { + ret = set_port_states(ppd, smp, ls_new, ps_new); + if (ret) + return ret; + } return __subn_get_opa_psi(smp, am, data, ibdev, port, resp_len, max_len); From e8d5aff65092aabf89624d18e048c9bf1ce65a2c Mon Sep 17 00:00:00 2001 From: Jan Sokolowski Date: Mon, 6 Nov 2017 06:39:07 -0800 Subject: [PATCH 273/296] IB/hfi1: Send 'reboot' as planned down remote reason On host shutdown, driver sends 'SMA_Disabled' as a reason for link down. This is incorrect. Send 'reboot' as a linkdown reason. Signed-off-by: Jan Sokolowski Reviewed-by: Jakub Byczkowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index bb3b65a14df0..4f057e8ffe50 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -9786,9 +9786,9 @@ void hfi1_quiet_serdes(struct hfi1_pportdata *ppd) cancel_delayed_work_sync(&ppd->start_link_work); ppd->offline_disabled_reason = - HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED); - set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SMA_DISABLED, 0, - OPA_LINKDOWN_REASON_SMA_DISABLED); + HFI1_ODR_MASK(OPA_LINKDOWN_REASON_REBOOT); + set_link_down_reason(ppd, OPA_LINKDOWN_REASON_REBOOT, 0, + OPA_LINKDOWN_REASON_REBOOT); set_link_state(ppd, HLS_DN_OFFLINE); /* disable the port */ From 19b57c6c449970763bee089466119e09a17199b1 Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Mon, 6 Nov 2017 06:39:15 -0800 Subject: [PATCH 274/296] IB/core: Convert OPA AH to IB for Extended LIDs only When deciding to convert an OPA AH to IB we were incorrectly including the IB multicast range. At this layer, all Extended LIDs will be larger than IB_LID_PERMISSIVE. Change comparison accordingly. Fixes: d541e45500bd ("IB/core: Convert ah_attr from OPA to IB when copying to user") Reviewed-by: Ira Weiny Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_marshall.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c index aeb2824efbc7..bb372b4713a4 100644 --- a/drivers/infiniband/core/uverbs_marshall.c +++ b/drivers/infiniband/core/uverbs_marshall.c @@ -69,8 +69,7 @@ void ib_copy_ah_attr_to_user(struct ib_device *device, memset(&dst->grh.reserved, 0, sizeof(dst->grh.reserved)); if ((ah_attr->type == RDMA_AH_ATTR_TYPE_OPA) && - (rdma_ah_get_dlid(ah_attr) >= - be16_to_cpu(IB_MULTICAST_LID_BASE)) && + (rdma_ah_get_dlid(ah_attr) > be16_to_cpu(IB_LID_PERMISSIVE)) && (!rdma_ah_conv_opa_to_ib(device, &conv_ah, ah_attr))) src = &conv_ah; From b64581adba467c916590c3922f64ec6c2b59a2c3 Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Mon, 6 Nov 2017 06:39:22 -0800 Subject: [PATCH 275/296] IB/hfi1: Mask upper 16Bits of Extended LID prior to rvt_cq_entry Pass only the lower 16Bits of an Extended LIDs to rvt_cq_entry to avoid triggering a WARN_ON_ONCE during conversion there. These upper 16Bits are okay to drop as they are obtained elsewhere. Fixes: 62ede7779904 ("Add OPA extended LID support") Reviewed-by: Ira Weiny Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/rc.c | 2 +- drivers/infiniband/hw/hfi1/ruc.c | 2 +- drivers/infiniband/hw/hfi1/uc.c | 2 +- drivers/infiniband/hw/hfi1/ud.c | 6 +++--- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 3f21b050714f..fd01a760259f 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -2219,7 +2219,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) wc.opcode = IB_WC_RECV; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; - wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr); + wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX; /* * It seems that IB mandates the presence of an SL in a * work completion only for the UD transport (see section diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index d450d4d4bda3..1a3ed6e81b31 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -560,7 +560,7 @@ static void ruc_loopback(struct rvt_qp *sqp) wc.byte_len = wqe->length; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; - wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr); + wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX; wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr); wc.port_num = 1; /* Signal completion event if the solicited bit is set. */ diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index fcd81866e332..1ee80a5f258c 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -462,7 +462,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet) wc.status = IB_WC_SUCCESS; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; - wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr); + wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX; /* * It seems that IB mandates the presence of an SL in a * work completion only for the UD transport (see section diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 84c7dbec0c28..5260b27f5631 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -265,8 +265,8 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) } else { wc.pkey_index = 0; } - wc.slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) & - ((1 << ppd->lmc) - 1)); + wc.slid = (ppd->lid | (rdma_ah_get_path_bits(ah_attr) & + ((1 << ppd->lmc) - 1))) & U16_MAX; /* Check for loopback when the port lid is not set */ if (wc.slid == 0 && sqp->ibqp.qp_type == IB_QPT_GSI) wc.slid = be16_to_cpu(IB_LID_PERMISSIVE); @@ -1037,7 +1037,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) } if (slid_is_permissive) slid = be32_to_cpu(OPA_LID_PERMISSIVE); - wc.slid = slid; + wc.slid = slid & U16_MAX; wc.sl = sl_from_sc; /* From 685894dd9bd03b92803fbc72565789713f067d4c Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Mon, 6 Nov 2017 06:39:29 -0800 Subject: [PATCH 276/296] IB/hfi1: Handle initial value of 0 for CCTI setting When the driver is loaded it sets the default CCTI value to be 0. When the FM starts and CCA is disabled the driver sets the max value to 65535 due the driver subtracting 1 from 0 and the fact that the CCTI value is a u16. Special case the subtraction to find the index for a 0 value. Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/mad.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 1277e1c929ce..cf8dba34fe30 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -3701,7 +3701,11 @@ static void apply_cc_state(struct hfi1_pportdata *ppd) *new_cc_state = *old_cc_state; - new_cc_state->cct.ccti_limit = ppd->total_cct_entry - 1; + if (ppd->total_cct_entry) + new_cc_state->cct.ccti_limit = ppd->total_cct_entry - 1; + else + new_cc_state->cct.ccti_limit = 0; + memcpy(new_cc_state->cct.entries, ppd->ccti_entries, ppd->total_cct_entry * sizeof(struct ib_cc_table_entry)); From 9b40183c08e48f1d26d06d39f29808c9c6037561 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Mon, 6 Nov 2017 08:07:29 -0800 Subject: [PATCH 277/296] RDMA/bnxt_re: Add memory barriers when processing CQ/EQ entries The code determines if the next ring entry is valid before proceeding further to read the rest of the entry. The CPU can re-order and read the rest of the entry first, possibly reading a stale entry, if DMA of a new entry happens right after reading it. Signed-off-by: Somnath Kotur Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 21 +++++++++++++++++++++ drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 4 ++++ 2 files changed, 25 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index a82044dacce0..31ea9f43f7d3 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -292,6 +292,12 @@ static void bnxt_qplib_service_nq(unsigned long data) if (!NQE_CMP_VALID(nqe, raw_cons, hwq->max_elements)) break; + /* + * The valid test of the entry must be done first before + * reading any further. + */ + dma_rmb(); + type = le16_to_cpu(nqe->info10_type) & NQ_BASE_TYPE_MASK; switch (type) { case NQ_BASE_TYPE_CQ_NOTIFICATION: @@ -1113,6 +1119,11 @@ static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp) hw_cqe = &hw_cqe_ptr[CQE_PG(i)][CQE_IDX(i)]; if (!CQE_CMP_VALID(hw_cqe, i, cq_hwq->max_elements)) continue; + /* + * The valid test of the entry must be done first before + * reading any further. + */ + dma_rmb(); switch (hw_cqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK) { case CQ_BASE_CQE_TYPE_REQ: case CQ_BASE_CQE_TYPE_TERMINAL: @@ -1896,6 +1907,11 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, /* If the next hwcqe is VALID */ if (CQE_CMP_VALID(peek_hwcqe, peek_raw_cq_cons, cq->hwq.max_elements)) { + /* + * The valid test of the entry must be done first before + * reading any further. + */ + dma_rmb(); /* If the next hwcqe is a REQ */ if ((peek_hwcqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK) == @@ -2440,6 +2456,11 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, if (!CQE_CMP_VALID(hw_cqe, raw_cons, cq->hwq.max_elements)) break; + /* + * The valid test of the entry must be done first before + * reading any further. + */ + dma_rmb(); /* From the device's respective CQE format to qplib_wc*/ switch (hw_cqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK) { case CQ_BASE_CQE_TYPE_REQ: diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index a7b5de3e193c..bb5574adf195 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -359,6 +359,10 @@ static void bnxt_qplib_service_creq(unsigned long data) creqe = &creq_ptr[get_creq_pg(sw_cons)][get_creq_idx(sw_cons)]; if (!CREQ_CMP_VALID(creqe, raw_cons, creq->max_elements)) break; + /* The valid test of the entry must be done first before + * reading any further. + */ + dma_rmb(); type = creqe->type & CREQ_BASE_TYPE_MASK; switch (type) { From 237379fc339750cbe6680f6a3565ab5c24e32229 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Mon, 6 Nov 2017 08:07:30 -0800 Subject: [PATCH 278/296] RDMA/bnxt_re: Set QP state in case of response completion errors Moves the driver QP state to error in case of response completion errors. Handles the scenarios which doesn't generate a terminal CQE. Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 31ea9f43f7d3..805a6124a48c 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -2118,6 +2118,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq, *pcqe = cqe; if (hwcqe->status != CQ_RES_RC_STATUS_OK) { + qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR; /* Add qp to flush list of the CQ */ bnxt_qplib_lock_buddy_cq(qp, cq); __bnxt_qplib_add_flush_qp(qp); @@ -2181,6 +2182,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, *pcqe = cqe; if (hwcqe->status != CQ_RES_RC_STATUS_OK) { + qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR; /* Add qp to flush list of the CQ */ bnxt_qplib_lock_buddy_cq(qp, cq); __bnxt_qplib_add_flush_qp(qp); @@ -2268,6 +2270,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq, *pcqe = cqe; if (hwcqe->status != CQ_RES_RC_STATUS_OK) { + qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR; /* Add qp to flush list of the CQ */ bnxt_qplib_lock_buddy_cq(qp, cq); __bnxt_qplib_add_flush_qp(qp); From c88a7858d721af5e2d059e3b0b751fed0504e814 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Mon, 6 Nov 2017 08:07:31 -0800 Subject: [PATCH 279/296] RDMA/bnxt_re: Flush CQ notification Work Queue before destroying QP Destroy_qp shall wait for any outstanding CQ notification to be flushed out before proceeding with QP destroy. Flushing the WQ before destroying the QP. Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 1 + drivers/infiniband/hw/bnxt_re/qplib_fp.c | 7 +++++++ drivers/infiniband/hw/bnxt_re/qplib_fp.h | 1 + 3 files changed, 9 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index adc42d7ff4f8..c29f33af334b 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -785,6 +785,7 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp) struct bnxt_re_dev *rdev = qp->rdev; int rc; + bnxt_qplib_flush_cqn_wq(&qp->qplib_qp); bnxt_qplib_del_flush_qp(&qp->qplib_qp); rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp); if (rc) { diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 805a6124a48c..c0f813366ad6 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -2537,3 +2537,10 @@ void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type) atomic_set(&cq->arm_state, 1); spin_unlock_irqrestore(&cq->hwq.lock, flags); } + +void bnxt_qplib_flush_cqn_wq(struct bnxt_qplib_qp *qp) +{ + flush_workqueue(qp->scq->nq->cqn_wq); + if (qp->scq != qp->rcq) + flush_workqueue(qp->rcq->nq->cqn_wq); +} diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index 8ead70ca1c1d..c582d4ec8173 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -478,4 +478,5 @@ void bnxt_qplib_release_cq_locks(struct bnxt_qplib_qp *qp, int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, int num_cqes); +void bnxt_qplib_flush_cqn_wq(struct bnxt_qplib_qp *qp); #endif /* __BNXT_QPLIB_FP_H__ */ From 051276658b86729909fb6e28e1fabab11c109615 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Mon, 6 Nov 2017 08:07:32 -0800 Subject: [PATCH 280/296] RDMA/bnxt_re: synchronize poll_cq and req_notify_cq verbs Synchronize poll_cq and req_notify_cq verbs using cq_lock, instead of the lower level qplib->hwq.lock. Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index c29f33af334b..526ab3f10a90 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -2996,8 +2996,10 @@ int bnxt_re_req_notify_cq(struct ib_cq *ib_cq, enum ib_cq_notify_flags ib_cqn_flags) { struct bnxt_re_cq *cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq); - int type = 0; + int type = 0, rc = 0; + unsigned long flags; + spin_lock_irqsave(&cq->cq_lock, flags); /* Trigger on the very next completion */ if (ib_cqn_flags & IB_CQ_NEXT_COMP) type = DBR_DBR_TYPE_CQ_ARMALL; @@ -3007,12 +3009,15 @@ int bnxt_re_req_notify_cq(struct ib_cq *ib_cq, /* Poll to see if there are missed events */ if ((ib_cqn_flags & IB_CQ_REPORT_MISSED_EVENTS) && - !(bnxt_qplib_is_cq_empty(&cq->qplib_cq))) - return 1; - + !(bnxt_qplib_is_cq_empty(&cq->qplib_cq))) { + rc = 1; + goto exit; + } bnxt_qplib_req_notify_cq(&cq->qplib_cq, type); - return 0; +exit: + spin_unlock_irqrestore(&cq->cq_lock, flags); + return rc; } /* Memory Regions */ From cb9fd89f91337aaca9c96d265930f22b31462e5e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 10 Nov 2017 23:10:31 +0100 Subject: [PATCH 281/296] RDMA/core: avoid uninitialized variable warning in create_udata As Dan pointed out, the rework I did makes it harder for smatch and other static checkers to figure out what is going on with the uninitialized pointers. By open-coding the call in create_udata(), we make it more readable for both humans and tools. Reported-by: Dan Carpenter Fixes: 12f727721eee ("IB/uverbs: clean up INIT_UDATA_BUF_OR_NULL usage") Signed-off-by: Arnd Bergmann Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_std_types.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index b095bce7f238..c3ee5d9b336d 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -227,27 +227,26 @@ static void create_udata(struct uverbs_attr_bundle *ctx, * to use uverbs_attr_bundle instead of ib_udata. * Assume attr == 0 is input and attr == 1 is output. */ - void __user *inbuf; - size_t inbuf_len = 0; - void __user *outbuf; - size_t outbuf_len = 0; const struct uverbs_attr *uhw_in = uverbs_attr_get(ctx, UVERBS_UHW_IN); const struct uverbs_attr *uhw_out = uverbs_attr_get(ctx, UVERBS_UHW_OUT); if (!IS_ERR(uhw_in)) { - inbuf = uhw_in->ptr_attr.ptr; - inbuf_len = uhw_in->ptr_attr.len; + udata->inbuf = uhw_in->ptr_attr.ptr; + udata->inlen = uhw_in->ptr_attr.len; + } else { + udata->inbuf = NULL; + udata->inlen = 0; } if (!IS_ERR(uhw_out)) { - outbuf = uhw_out->ptr_attr.ptr; - outbuf_len = uhw_out->ptr_attr.len; + udata->outbuf = uhw_out->ptr_attr.ptr; + udata->outlen = uhw_out->ptr_attr.len; + } else { + udata->outbuf = NULL; + udata->outlen = 0; } - - ib_uverbs_init_udata_buf_or_null(udata, inbuf, outbuf, inbuf_len, - outbuf_len); } static int uverbs_create_cq_handler(struct ib_device *ib_dev, From 8b10ba783c9d0c69d53e7d78ff7f2cd921f80729 Mon Sep 17 00:00:00 2001 From: Bryan Tan Date: Mon, 6 Nov 2017 11:48:53 -0800 Subject: [PATCH 282/296] RDMA/vmw_pvrdma: Add shared receive queue support Add the required functions needed to support SRQs. Currently, kernel clients are not supported. SRQs will only be available in userspace. Reviewed-by: Adit Ranadive Reviewed-by: Aditya Sarwade Reviewed-by: Jorgen Hansen Reviewed-by: Nitish Bhat Signed-off-by: Bryan Tan Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/hw/vmw_pvrdma/Makefile | 2 +- drivers/infiniband/hw/vmw_pvrdma/pvrdma.h | 25 ++ .../infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h | 54 +++ .../infiniband/hw/vmw_pvrdma/pvrdma_main.c | 59 +++- drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c | 55 ++- drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c | 319 ++++++++++++++++++ .../infiniband/hw/vmw_pvrdma/pvrdma_verbs.c | 3 + .../infiniband/hw/vmw_pvrdma/pvrdma_verbs.h | 18 + include/uapi/rdma/vmw_pvrdma-abi.h | 2 + 9 files changed, 523 insertions(+), 14 deletions(-) create mode 100644 drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c diff --git a/drivers/infiniband/hw/vmw_pvrdma/Makefile b/drivers/infiniband/hw/vmw_pvrdma/Makefile index 0194ed19f542..2f52e0a044a0 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/Makefile +++ b/drivers/infiniband/hw/vmw_pvrdma/Makefile @@ -1,3 +1,3 @@ obj-$(CONFIG_INFINIBAND_VMWARE_PVRDMA) += vmw_pvrdma.o -vmw_pvrdma-y := pvrdma_cmd.o pvrdma_cq.o pvrdma_doorbell.o pvrdma_main.o pvrdma_misc.o pvrdma_mr.o pvrdma_qp.o pvrdma_verbs.o +vmw_pvrdma-y := pvrdma_cmd.o pvrdma_cq.o pvrdma_doorbell.o pvrdma_main.o pvrdma_misc.o pvrdma_mr.o pvrdma_qp.o pvrdma_srq.o pvrdma_verbs.o diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h index 984aa3484928..63bc2efc34eb 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h @@ -162,6 +162,22 @@ struct pvrdma_ah { struct pvrdma_av av; }; +struct pvrdma_srq { + struct ib_srq ibsrq; + int offset; + spinlock_t lock; /* SRQ lock. */ + int wqe_cnt; + int wqe_size; + int max_gs; + struct ib_umem *umem; + struct pvrdma_ring_state *ring; + struct pvrdma_page_dir pdir; + u32 srq_handle; + int npages; + refcount_t refcnt; + wait_queue_head_t wait; +}; + struct pvrdma_qp { struct ib_qp ibqp; u32 qp_handle; @@ -171,6 +187,7 @@ struct pvrdma_qp { struct ib_umem *rumem; struct ib_umem *sumem; struct pvrdma_page_dir pdir; + struct pvrdma_srq *srq; int npages; int npages_send; int npages_recv; @@ -210,6 +227,8 @@ struct pvrdma_dev { struct pvrdma_page_dir cq_pdir; struct pvrdma_cq **cq_tbl; spinlock_t cq_tbl_lock; + struct pvrdma_srq **srq_tbl; + spinlock_t srq_tbl_lock; struct pvrdma_qp **qp_tbl; spinlock_t qp_tbl_lock; struct pvrdma_uar_table uar_table; @@ -221,6 +240,7 @@ struct pvrdma_dev { bool ib_active; atomic_t num_qps; atomic_t num_cqs; + atomic_t num_srqs; atomic_t num_pds; atomic_t num_ahs; @@ -256,6 +276,11 @@ static inline struct pvrdma_cq *to_vcq(struct ib_cq *ibcq) return container_of(ibcq, struct pvrdma_cq, ibcq); } +static inline struct pvrdma_srq *to_vsrq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct pvrdma_srq, ibsrq); +} + static inline struct pvrdma_user_mr *to_vmr(struct ib_mr *ibmr) { return container_of(ibmr, struct pvrdma_user_mr, ibmr); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h index df0a6b525021..6fd5a8f4e2f6 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h @@ -339,6 +339,10 @@ enum { PVRDMA_CMD_DESTROY_UC, PVRDMA_CMD_CREATE_BIND, PVRDMA_CMD_DESTROY_BIND, + PVRDMA_CMD_CREATE_SRQ, + PVRDMA_CMD_MODIFY_SRQ, + PVRDMA_CMD_QUERY_SRQ, + PVRDMA_CMD_DESTROY_SRQ, PVRDMA_CMD_MAX, }; @@ -361,6 +365,10 @@ enum { PVRDMA_CMD_DESTROY_UC_RESP_NOOP, PVRDMA_CMD_CREATE_BIND_RESP_NOOP, PVRDMA_CMD_DESTROY_BIND_RESP_NOOP, + PVRDMA_CMD_CREATE_SRQ_RESP, + PVRDMA_CMD_MODIFY_SRQ_RESP, + PVRDMA_CMD_QUERY_SRQ_RESP, + PVRDMA_CMD_DESTROY_SRQ_RESP, PVRDMA_CMD_MAX_RESP, }; @@ -495,6 +503,46 @@ struct pvrdma_cmd_destroy_cq { u8 reserved[4]; }; +struct pvrdma_cmd_create_srq { + struct pvrdma_cmd_hdr hdr; + u64 pdir_dma; + u32 pd_handle; + u32 nchunks; + struct pvrdma_srq_attr attrs; + u8 srq_type; + u8 reserved[7]; +}; + +struct pvrdma_cmd_create_srq_resp { + struct pvrdma_cmd_resp_hdr hdr; + u32 srqn; + u8 reserved[4]; +}; + +struct pvrdma_cmd_modify_srq { + struct pvrdma_cmd_hdr hdr; + u32 srq_handle; + u32 attr_mask; + struct pvrdma_srq_attr attrs; +}; + +struct pvrdma_cmd_query_srq { + struct pvrdma_cmd_hdr hdr; + u32 srq_handle; + u8 reserved[4]; +}; + +struct pvrdma_cmd_query_srq_resp { + struct pvrdma_cmd_resp_hdr hdr; + struct pvrdma_srq_attr attrs; +}; + +struct pvrdma_cmd_destroy_srq { + struct pvrdma_cmd_hdr hdr; + u32 srq_handle; + u8 reserved[4]; +}; + struct pvrdma_cmd_create_qp { struct pvrdma_cmd_hdr hdr; u64 pdir_dma; @@ -594,6 +642,10 @@ union pvrdma_cmd_req { struct pvrdma_cmd_destroy_qp destroy_qp; struct pvrdma_cmd_create_bind create_bind; struct pvrdma_cmd_destroy_bind destroy_bind; + struct pvrdma_cmd_create_srq create_srq; + struct pvrdma_cmd_modify_srq modify_srq; + struct pvrdma_cmd_query_srq query_srq; + struct pvrdma_cmd_destroy_srq destroy_srq; }; union pvrdma_cmd_resp { @@ -608,6 +660,8 @@ union pvrdma_cmd_resp { struct pvrdma_cmd_create_qp_resp create_qp_resp; struct pvrdma_cmd_query_qp_resp query_qp_resp; struct pvrdma_cmd_destroy_qp_resp destroy_qp_resp; + struct pvrdma_cmd_create_srq_resp create_srq_resp; + struct pvrdma_cmd_query_srq_resp query_srq_resp; }; #endif /* __PVRDMA_DEV_API_H__ */ diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 6ce709a67959..1f4e18717a00 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -118,6 +118,7 @@ static int pvrdma_init_device(struct pvrdma_dev *dev) spin_lock_init(&dev->cmd_lock); sema_init(&dev->cmd_sema, 1); atomic_set(&dev->num_qps, 0); + atomic_set(&dev->num_srqs, 0); atomic_set(&dev->num_cqs, 0); atomic_set(&dev->num_pds, 0); atomic_set(&dev->num_ahs, 0); @@ -254,9 +255,32 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) goto err_cq_free; spin_lock_init(&dev->qp_tbl_lock); + /* Check if SRQ is supported by backend */ + if (dev->dsr->caps.max_srq) { + dev->ib_dev.uverbs_cmd_mask |= + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | + (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); + + dev->ib_dev.create_srq = pvrdma_create_srq; + dev->ib_dev.modify_srq = pvrdma_modify_srq; + dev->ib_dev.query_srq = pvrdma_query_srq; + dev->ib_dev.destroy_srq = pvrdma_destroy_srq; + dev->ib_dev.post_srq_recv = pvrdma_post_srq_recv; + + dev->srq_tbl = kcalloc(dev->dsr->caps.max_srq, + sizeof(struct pvrdma_srq *), + GFP_KERNEL); + if (!dev->srq_tbl) + goto err_qp_free; + } + spin_lock_init(&dev->srq_tbl_lock); + ret = ib_register_device(&dev->ib_dev, NULL); if (ret) - goto err_qp_free; + goto err_srq_free; for (i = 0; i < ARRAY_SIZE(pvrdma_class_attributes); ++i) { ret = device_create_file(&dev->ib_dev.dev, @@ -271,6 +295,8 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) err_class: ib_unregister_device(&dev->ib_dev); +err_srq_free: + kfree(dev->srq_tbl); err_qp_free: kfree(dev->qp_tbl); err_cq_free: @@ -353,6 +379,35 @@ static void pvrdma_cq_event(struct pvrdma_dev *dev, u32 cqn, int type) } } +static void pvrdma_srq_event(struct pvrdma_dev *dev, u32 srqn, int type) +{ + struct pvrdma_srq *srq; + unsigned long flags; + + spin_lock_irqsave(&dev->srq_tbl_lock, flags); + if (dev->srq_tbl) + srq = dev->srq_tbl[srqn % dev->dsr->caps.max_srq]; + else + srq = NULL; + if (srq) + refcount_inc(&srq->refcnt); + spin_unlock_irqrestore(&dev->srq_tbl_lock, flags); + + if (srq && srq->ibsrq.event_handler) { + struct ib_srq *ibsrq = &srq->ibsrq; + struct ib_event e; + + e.device = ibsrq->device; + e.element.srq = ibsrq; + e.event = type; /* 1:1 mapping for now. */ + ibsrq->event_handler(&e, ibsrq->srq_context); + } + if (srq) { + if (refcount_dec_and_test(&srq->refcnt)) + wake_up(&srq->wait); + } +} + static void pvrdma_dispatch_event(struct pvrdma_dev *dev, int port, enum ib_event_type event) { @@ -423,6 +478,7 @@ static irqreturn_t pvrdma_intr1_handler(int irq, void *dev_id) case PVRDMA_EVENT_SRQ_ERR: case PVRDMA_EVENT_SRQ_LIMIT_REACHED: + pvrdma_srq_event(dev, eqe->info, eqe->type); break; case PVRDMA_EVENT_PORT_ACTIVE: @@ -1059,6 +1115,7 @@ static void pvrdma_pci_remove(struct pci_dev *pdev) iounmap(dev->regs); kfree(dev->sgid_tbl); kfree(dev->cq_tbl); + kfree(dev->srq_tbl); kfree(dev->qp_tbl); pvrdma_uar_table_cleanup(dev); iounmap(dev->driver_uar.map); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index ed34d5a581fa..10420a18d02f 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -198,6 +198,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, struct pvrdma_create_qp ucmd; unsigned long flags; int ret; + bool is_srq = !!init_attr->srq; if (init_attr->create_flags) { dev_warn(&dev->pdev->dev, @@ -214,6 +215,12 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, return ERR_PTR(-EINVAL); } + if (is_srq && !dev->dsr->caps.max_srq) { + dev_warn(&dev->pdev->dev, + "SRQs not supported by device\n"); + return ERR_PTR(-EINVAL); + } + if (!atomic_add_unless(&dev->num_qps, 1, dev->dsr->caps.max_qp)) return ERR_PTR(-ENOMEM); @@ -252,26 +259,36 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, goto err_qp; } - /* set qp->sq.wqe_cnt, shift, buf_size.. */ - qp->rumem = ib_umem_get(pd->uobject->context, - ucmd.rbuf_addr, - ucmd.rbuf_size, 0, 0); - if (IS_ERR(qp->rumem)) { - ret = PTR_ERR(qp->rumem); - goto err_qp; + if (!is_srq) { + /* set qp->sq.wqe_cnt, shift, buf_size.. */ + qp->rumem = ib_umem_get(pd->uobject->context, + ucmd.rbuf_addr, + ucmd.rbuf_size, 0, 0); + if (IS_ERR(qp->rumem)) { + ret = PTR_ERR(qp->rumem); + goto err_qp; + } + qp->srq = NULL; + } else { + qp->rumem = NULL; + qp->srq = to_vsrq(init_attr->srq); } qp->sumem = ib_umem_get(pd->uobject->context, ucmd.sbuf_addr, ucmd.sbuf_size, 0, 0); if (IS_ERR(qp->sumem)) { - ib_umem_release(qp->rumem); + if (!is_srq) + ib_umem_release(qp->rumem); ret = PTR_ERR(qp->sumem); goto err_qp; } qp->npages_send = ib_umem_page_count(qp->sumem); - qp->npages_recv = ib_umem_page_count(qp->rumem); + if (!is_srq) + qp->npages_recv = ib_umem_page_count(qp->rumem); + else + qp->npages_recv = 0; qp->npages = qp->npages_send + qp->npages_recv; } else { qp->is_kernel = true; @@ -312,12 +329,14 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, if (!qp->is_kernel) { pvrdma_page_dir_insert_umem(&qp->pdir, qp->sumem, 0); - pvrdma_page_dir_insert_umem(&qp->pdir, qp->rumem, - qp->npages_send); + if (!is_srq) + pvrdma_page_dir_insert_umem(&qp->pdir, + qp->rumem, + qp->npages_send); } else { /* Ring state is always the first page. */ qp->sq.ring = qp->pdir.pages[0]; - qp->rq.ring = &qp->sq.ring[1]; + qp->rq.ring = is_srq ? NULL : &qp->sq.ring[1]; } break; default: @@ -333,6 +352,10 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, cmd->pd_handle = to_vpd(pd)->pd_handle; cmd->send_cq_handle = to_vcq(init_attr->send_cq)->cq_handle; cmd->recv_cq_handle = to_vcq(init_attr->recv_cq)->cq_handle; + if (is_srq) + cmd->srq_handle = to_vsrq(init_attr->srq)->srq_handle; + else + cmd->srq_handle = 0; cmd->max_send_wr = init_attr->cap.max_send_wr; cmd->max_recv_wr = init_attr->cap.max_recv_wr; cmd->max_send_sge = init_attr->cap.max_send_sge; @@ -340,6 +363,8 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, cmd->max_inline_data = init_attr->cap.max_inline_data; cmd->sq_sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0; cmd->qp_type = ib_qp_type_to_pvrdma(init_attr->qp_type); + cmd->is_srq = is_srq; + cmd->lkey = 0; cmd->access_flags = IB_ACCESS_LOCAL_WRITE; cmd->total_chunks = qp->npages; cmd->send_chunks = qp->npages_send - PVRDMA_QP_NUM_HEADER_PAGES; @@ -815,6 +840,12 @@ int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, return -EINVAL; } + if (qp->srq) { + dev_warn(&dev->pdev->dev, "QP associated with SRQ\n"); + *bad_wr = wr; + return -EINVAL; + } + spin_lock_irqsave(&qp->rq.lock, flags); while (wr) { diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c new file mode 100644 index 000000000000..826ccb864596 --- /dev/null +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -0,0 +1,319 @@ +/* + * Copyright (c) 2016-2017 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of EITHER the GNU General Public License + * version 2 as published by the Free Software Foundation or the BSD + * 2-Clause License. This program is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED + * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License version 2 for more details at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. + * + * You should have received a copy of the GNU General Public License + * along with this program available in the file COPYING in the main + * directory of this source tree. + * + * The BSD 2-Clause License + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include "pvrdma.h" + +int pvrdma_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + /* No support for kernel clients. */ + return -EOPNOTSUPP; +} + +/** + * pvrdma_query_srq - query shared receive queue + * @ibsrq: the shared receive queue to query + * @srq_attr: attributes to query and return to client + * + * @return: 0 for success, otherwise returns an errno. + */ +int pvrdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) +{ + struct pvrdma_dev *dev = to_vdev(ibsrq->device); + struct pvrdma_srq *srq = to_vsrq(ibsrq); + union pvrdma_cmd_req req; + union pvrdma_cmd_resp rsp; + struct pvrdma_cmd_query_srq *cmd = &req.query_srq; + struct pvrdma_cmd_query_srq_resp *resp = &rsp.query_srq_resp; + int ret; + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_QUERY_SRQ; + cmd->srq_handle = srq->srq_handle; + + ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_QUERY_SRQ_RESP); + if (ret < 0) { + dev_warn(&dev->pdev->dev, + "could not query shared receive queue, error: %d\n", + ret); + return -EINVAL; + } + + srq_attr->srq_limit = resp->attrs.srq_limit; + srq_attr->max_wr = resp->attrs.max_wr; + srq_attr->max_sge = resp->attrs.max_sge; + + return 0; +} + +/** + * pvrdma_create_srq - create shared receive queue + * @pd: protection domain + * @init_attr: shared receive queue attributes + * @udata: user data + * + * @return: the ib_srq pointer on success, otherwise returns an errno. + */ +struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) +{ + struct pvrdma_srq *srq = NULL; + struct pvrdma_dev *dev = to_vdev(pd->device); + union pvrdma_cmd_req req; + union pvrdma_cmd_resp rsp; + struct pvrdma_cmd_create_srq *cmd = &req.create_srq; + struct pvrdma_cmd_create_srq_resp *resp = &rsp.create_srq_resp; + struct pvrdma_create_srq ucmd; + unsigned long flags; + int ret; + + if (!(pd->uobject && udata)) { + /* No support for kernel clients. */ + dev_warn(&dev->pdev->dev, + "no shared receive queue support for kernel client\n"); + return ERR_PTR(-EOPNOTSUPP); + } + + if (init_attr->srq_type != IB_SRQT_BASIC) { + dev_warn(&dev->pdev->dev, + "shared receive queue type %d not supported\n", + init_attr->srq_type); + return ERR_PTR(-EINVAL); + } + + if (init_attr->attr.max_wr > dev->dsr->caps.max_srq_wr || + init_attr->attr.max_sge > dev->dsr->caps.max_srq_sge) { + dev_warn(&dev->pdev->dev, + "shared receive queue size invalid\n"); + return ERR_PTR(-EINVAL); + } + + if (!atomic_add_unless(&dev->num_srqs, 1, dev->dsr->caps.max_srq)) + return ERR_PTR(-ENOMEM); + + srq = kmalloc(sizeof(*srq), GFP_KERNEL); + if (!srq) { + ret = -ENOMEM; + goto err_srq; + } + + spin_lock_init(&srq->lock); + refcount_set(&srq->refcnt, 1); + init_waitqueue_head(&srq->wait); + + dev_dbg(&dev->pdev->dev, + "create shared receive queue from user space\n"); + + if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { + ret = -EFAULT; + goto err_srq; + } + + srq->umem = ib_umem_get(pd->uobject->context, + ucmd.buf_addr, + ucmd.buf_size, 0, 0); + if (IS_ERR(srq->umem)) { + ret = PTR_ERR(srq->umem); + goto err_srq; + } + + srq->npages = ib_umem_page_count(srq->umem); + + if (srq->npages < 0 || srq->npages > PVRDMA_PAGE_DIR_MAX_PAGES) { + dev_warn(&dev->pdev->dev, + "overflow pages in shared receive queue\n"); + ret = -EINVAL; + goto err_umem; + } + + ret = pvrdma_page_dir_init(dev, &srq->pdir, srq->npages, false); + if (ret) { + dev_warn(&dev->pdev->dev, + "could not allocate page directory\n"); + goto err_umem; + } + + pvrdma_page_dir_insert_umem(&srq->pdir, srq->umem, 0); + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_CREATE_SRQ; + cmd->srq_type = init_attr->srq_type; + cmd->nchunks = srq->npages; + cmd->pd_handle = to_vpd(pd)->pd_handle; + cmd->attrs.max_wr = init_attr->attr.max_wr; + cmd->attrs.max_sge = init_attr->attr.max_sge; + cmd->attrs.srq_limit = init_attr->attr.srq_limit; + cmd->pdir_dma = srq->pdir.dir_dma; + + ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_SRQ_RESP); + if (ret < 0) { + dev_warn(&dev->pdev->dev, + "could not create shared receive queue, error: %d\n", + ret); + goto err_page_dir; + } + + srq->srq_handle = resp->srqn; + spin_lock_irqsave(&dev->srq_tbl_lock, flags); + dev->srq_tbl[srq->srq_handle % dev->dsr->caps.max_srq] = srq; + spin_unlock_irqrestore(&dev->srq_tbl_lock, flags); + + /* Copy udata back. */ + if (ib_copy_to_udata(udata, &srq->srq_handle, sizeof(__u32))) { + dev_warn(&dev->pdev->dev, "failed to copy back udata\n"); + pvrdma_destroy_srq(&srq->ibsrq); + return ERR_PTR(-EINVAL); + } + + return &srq->ibsrq; + +err_page_dir: + pvrdma_page_dir_cleanup(dev, &srq->pdir); +err_umem: + ib_umem_release(srq->umem); +err_srq: + kfree(srq); + atomic_dec(&dev->num_srqs); + + return ERR_PTR(ret); +} + +static void pvrdma_free_srq(struct pvrdma_dev *dev, struct pvrdma_srq *srq) +{ + unsigned long flags; + + spin_lock_irqsave(&dev->srq_tbl_lock, flags); + dev->srq_tbl[srq->srq_handle] = NULL; + spin_unlock_irqrestore(&dev->srq_tbl_lock, flags); + + refcount_dec(&srq->refcnt); + wait_event(srq->wait, !refcount_read(&srq->refcnt)); + + /* There is no support for kernel clients, so this is safe. */ + ib_umem_release(srq->umem); + + pvrdma_page_dir_cleanup(dev, &srq->pdir); + + kfree(srq); + + atomic_dec(&dev->num_srqs); +} + +/** + * pvrdma_destroy_srq - destroy shared receive queue + * @srq: the shared receive queue to destroy + * + * @return: 0 for success. + */ +int pvrdma_destroy_srq(struct ib_srq *srq) +{ + struct pvrdma_srq *vsrq = to_vsrq(srq); + union pvrdma_cmd_req req; + struct pvrdma_cmd_destroy_srq *cmd = &req.destroy_srq; + struct pvrdma_dev *dev = to_vdev(srq->device); + int ret; + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_DESTROY_SRQ; + cmd->srq_handle = vsrq->srq_handle; + + ret = pvrdma_cmd_post(dev, &req, NULL, 0); + if (ret < 0) + dev_warn(&dev->pdev->dev, + "destroy shared receive queue failed, error: %d\n", + ret); + + pvrdma_free_srq(dev, vsrq); + + return 0; +} + +/** + * pvrdma_modify_srq - modify shared receive queue attributes + * @ibsrq: the shared receive queue to modify + * @attr: the shared receive queue's new attributes + * @attr_mask: attributes mask + * @udata: user data + * + * @returns 0 on success, otherwise returns an errno. + */ +int pvrdma_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) +{ + struct pvrdma_srq *vsrq = to_vsrq(ibsrq); + union pvrdma_cmd_req req; + struct pvrdma_cmd_modify_srq *cmd = &req.modify_srq; + struct pvrdma_dev *dev = to_vdev(ibsrq->device); + int ret; + + /* Only support SRQ limit. */ + if (!(attr_mask & IB_SRQ_LIMIT)) + return -EINVAL; + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_MODIFY_SRQ; + cmd->srq_handle = vsrq->srq_handle; + cmd->attrs.srq_limit = attr->srq_limit; + cmd->attr_mask = attr_mask; + + ret = pvrdma_cmd_post(dev, &req, NULL, 0); + if (ret < 0) { + dev_warn(&dev->pdev->dev, + "could not modify shared receive queue, error: %d\n", + ret); + + return -EINVAL; + } + + return ret; +} diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index 48776f5ffb0e..16b96616ef7e 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -85,6 +85,9 @@ int pvrdma_query_device(struct ib_device *ibdev, props->max_sge = dev->dsr->caps.max_sge; props->max_sge_rd = PVRDMA_GET_CAP(dev, dev->dsr->caps.max_sge, dev->dsr->caps.max_sge_rd); + props->max_srq = dev->dsr->caps.max_srq; + props->max_srq_wr = dev->dsr->caps.max_srq_wr; + props->max_srq_sge = dev->dsr->caps.max_srq_sge; props->max_cq = dev->dsr->caps.max_cq; props->max_cqe = dev->dsr->caps.max_cqe; props->max_mr = dev->dsr->caps.max_mr; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h index 002a9b066e70..b7b25728a7e5 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h @@ -324,6 +324,13 @@ enum pvrdma_mw_type { PVRDMA_MW_TYPE_2 = 2, }; +struct pvrdma_srq_attr { + u32 max_wr; + u32 max_sge; + u32 srq_limit; + u32 reserved; +}; + struct pvrdma_qp_attr { enum pvrdma_qp_state qp_state; enum pvrdma_qp_state cur_qp_state; @@ -420,6 +427,17 @@ int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, struct ib_udata *udata); int pvrdma_destroy_ah(struct ib_ah *ah); + +struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata); +int pvrdma_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); +int pvrdma_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); +int pvrdma_destroy_srq(struct ib_srq *srq); +int pvrdma_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); + struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); diff --git a/include/uapi/rdma/vmw_pvrdma-abi.h b/include/uapi/rdma/vmw_pvrdma-abi.h index c6569b0032ec..846c6f4859db 100644 --- a/include/uapi/rdma/vmw_pvrdma-abi.h +++ b/include/uapi/rdma/vmw_pvrdma-abi.h @@ -158,6 +158,8 @@ struct pvrdma_resize_cq { struct pvrdma_create_srq { __u64 buf_addr; + __u32 buf_size; + __u32 reserved; }; struct pvrdma_create_srq_resp { From e08ce2e82b2fc5cdd07de170e8b9e8327625005c Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Tue, 7 Nov 2017 10:49:09 +0200 Subject: [PATCH 283/296] RDMA/core: Make function rdma_copy_addr return void Function returns zero - make it void. While there make struct net_device const. Signed-off-by: Yuval Shaia Reviewed-by: Parav Pandit Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/addr.c | 27 ++++++++++++++------------- drivers/infiniband/core/cma.c | 8 ++------ include/rdma/ib_addr.h | 5 +++-- 3 files changed, 19 insertions(+), 21 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index d2f74721b3ba..f4e8185bccd3 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -229,8 +229,9 @@ void rdma_addr_unregister_client(struct rdma_addr_client *client) } EXPORT_SYMBOL(rdma_addr_unregister_client); -int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, - const unsigned char *dst_dev_addr) +void rdma_copy_addr(struct rdma_dev_addr *dev_addr, + const struct net_device *dev, + const unsigned char *dst_dev_addr) { dev_addr->dev_type = dev->type; memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN); @@ -238,7 +239,6 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, if (dst_dev_addr) memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN); dev_addr->bound_dev_if = dev->ifindex; - return 0; } EXPORT_SYMBOL(rdma_copy_addr); @@ -247,15 +247,14 @@ int rdma_translate_ip(const struct sockaddr *addr, u16 *vlan_id) { struct net_device *dev; - int ret = -EADDRNOTAVAIL; if (dev_addr->bound_dev_if) { dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); if (!dev) return -ENODEV; - ret = rdma_copy_addr(dev_addr, dev, NULL); + rdma_copy_addr(dev_addr, dev, NULL); dev_put(dev); - return ret; + return 0; } switch (addr->sa_family) { @@ -264,9 +263,9 @@ int rdma_translate_ip(const struct sockaddr *addr, ((const struct sockaddr_in *)addr)->sin_addr.s_addr); if (!dev) - return ret; + return -EADDRNOTAVAIL; - ret = rdma_copy_addr(dev_addr, dev, NULL); + rdma_copy_addr(dev_addr, dev, NULL); dev_addr->bound_dev_if = dev->ifindex; if (vlan_id) *vlan_id = rdma_vlan_dev_vlan_id(dev); @@ -279,7 +278,7 @@ int rdma_translate_ip(const struct sockaddr *addr, if (ipv6_chk_addr(dev_addr->net, &((const struct sockaddr_in6 *)addr)->sin6_addr, dev, 1)) { - ret = rdma_copy_addr(dev_addr, dev, NULL); + rdma_copy_addr(dev_addr, dev, NULL); dev_addr->bound_dev_if = dev->ifindex; if (vlan_id) *vlan_id = rdma_vlan_dev_vlan_id(dev); @@ -290,7 +289,7 @@ int rdma_translate_ip(const struct sockaddr *addr, break; #endif } - return ret; + return 0; } EXPORT_SYMBOL(rdma_translate_ip); @@ -336,7 +335,7 @@ static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, const void *daddr) { struct neighbour *n; - int ret; + int ret = 0; n = dst_neigh_lookup(dst, daddr); @@ -346,7 +345,7 @@ static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, neigh_event_send(n, NULL); ret = -ENODATA; } else { - ret = rdma_copy_addr(dev_addr, dst->dev, n->ha); + rdma_copy_addr(dev_addr, dst->dev, n->ha); } rcu_read_unlock(); @@ -494,7 +493,9 @@ static int addr_resolve_neigh(struct dst_entry *dst, if (!(dst->dev->flags & IFF_NOARP)) return fetch_ha(dst, addr, dst_in, seq); - return rdma_copy_addr(addr, dst->dev, NULL); + rdma_copy_addr(addr, dst->dev, NULL); + + return 0; } static int addr_resolve(struct sockaddr *src_in, diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index fa79c7076ccd..1fdb473b5df7 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1846,9 +1846,7 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; if (net_dev) { - ret = rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL); - if (ret) - goto err; + rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL); } else { if (!cma_protocol_roce(listen_id) && cma_any_addr(cma_src_addr(id_priv))) { @@ -1894,9 +1892,7 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, goto err; if (net_dev) { - ret = rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL); - if (ret) - goto err; + rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL); } else { if (!cma_any_addr(cma_src_addr(id_priv))) { ret = cma_translate_addr(cma_src_addr(id_priv), diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index b2a10c762304..18c564f60e93 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -125,8 +125,9 @@ int rdma_resolve_ip_route(struct sockaddr *src_addr, void rdma_addr_cancel(struct rdma_dev_addr *addr); -int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, - const unsigned char *dst_dev_addr); +void rdma_copy_addr(struct rdma_dev_addr *dev_addr, + const struct net_device *dev, + const unsigned char *dst_dev_addr); int rdma_addr_size(struct sockaddr *addr); From f4e96c1a717176dc52843a41b549aaa9128279c2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 7 Nov 2017 08:45:17 -0600 Subject: [PATCH 284/296] IB/ocrdma_hw: remove unnecessary code in ocrdma_mbx_dealloc_lkey Check on return value and goto label mbx_err are unnecessary. Addresses-Coverity-ID: 1268780 Signed-off-by: Gustavo A. R. Silva Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index f8c14c7baedb..0ba695a88b62 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1947,7 +1947,7 @@ int ocrdma_mbx_alloc_lkey(struct ocrdma_dev *dev, struct ocrdma_hw_mr *hwmr, int ocrdma_mbx_dealloc_lkey(struct ocrdma_dev *dev, int fr_mr, u32 lkey) { - int status = -ENOMEM; + int status; struct ocrdma_dealloc_lkey *cmd; cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_DEALLOC_LKEY, sizeof(*cmd)); @@ -1956,9 +1956,7 @@ int ocrdma_mbx_dealloc_lkey(struct ocrdma_dev *dev, int fr_mr, u32 lkey) cmd->lkey = lkey; cmd->rsvd_frmr = fr_mr ? 1 : 0; status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); - if (status) - goto mbx_err; -mbx_err: + kfree(cmd); return status; } From 877add28178a7fa3c68f29c450d050a8e6513f08 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Tue, 7 Nov 2017 18:33:26 +0200 Subject: [PATCH 285/296] IB/core: Only maintain real QPs in the security lists When modify QP is called on a shared QP update the security context for the real QP. When security is subsequently enforced the shared QP handles will be checked as well. Without this change shared QP handles get added to the port/pkey lists, which is a bug, because not all shared QP handles will be checked for access. Also the shared QP security context wouldn't get removed from the port/pkey lists causing access to free memory and list corruption when they are destroyed. Cc: stable@vger.kernel.org Fixes: d291f1a65232 ("IB/core: Enforce PKey security on QPs") Signed-off-by: Daniel Jurgens Reviewed-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/security.c | 51 ++++++++++++++++-------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index 455cba7f9640..23278ed5be45 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -87,16 +87,14 @@ static int enforce_qp_pkey_security(u16 pkey, if (ret) return ret; - if (qp_sec->qp == qp_sec->qp->real_qp) { - list_for_each_entry(shared_qp_sec, - &qp_sec->shared_qp_list, - shared_qp_list) { - ret = security_ib_pkey_access(shared_qp_sec->security, - subnet_prefix, - pkey); - if (ret) - return ret; - } + list_for_each_entry(shared_qp_sec, + &qp_sec->shared_qp_list, + shared_qp_list) { + ret = security_ib_pkey_access(shared_qp_sec->security, + subnet_prefix, + pkey); + if (ret) + return ret; } return 0; } @@ -560,15 +558,22 @@ int ib_security_modify_qp(struct ib_qp *qp, int ret = 0; struct ib_ports_pkeys *tmp_pps; struct ib_ports_pkeys *new_pps; - bool special_qp = (qp->qp_type == IB_QPT_SMI || - qp->qp_type == IB_QPT_GSI || - qp->qp_type >= IB_QPT_RESERVED1); + struct ib_qp *real_qp = qp->real_qp; + bool special_qp = (real_qp->qp_type == IB_QPT_SMI || + real_qp->qp_type == IB_QPT_GSI || + real_qp->qp_type >= IB_QPT_RESERVED1); bool pps_change = ((qp_attr_mask & (IB_QP_PKEY_INDEX | IB_QP_PORT)) || (qp_attr_mask & IB_QP_ALT_PATH)); + /* The port/pkey settings are maintained only for the real QP. Open + * handles on the real QP will be in the shared_qp_list. When + * enforcing security on the real QP all the shared QPs will be + * checked as well. + */ + if (pps_change && !special_qp) { - mutex_lock(&qp->qp_sec->mutex); - new_pps = get_new_pps(qp, + mutex_lock(&real_qp->qp_sec->mutex); + new_pps = get_new_pps(real_qp, qp_attr, qp_attr_mask); @@ -586,14 +591,14 @@ int ib_security_modify_qp(struct ib_qp *qp, if (!ret) ret = check_qp_port_pkey_settings(new_pps, - qp->qp_sec); + real_qp->qp_sec); } if (!ret) - ret = qp->device->modify_qp(qp->real_qp, - qp_attr, - qp_attr_mask, - udata); + ret = real_qp->device->modify_qp(real_qp, + qp_attr, + qp_attr_mask, + udata); if (pps_change && !special_qp) { /* Clean up the lists and free the appropriate @@ -602,8 +607,8 @@ int ib_security_modify_qp(struct ib_qp *qp, if (ret) { tmp_pps = new_pps; } else { - tmp_pps = qp->qp_sec->ports_pkeys; - qp->qp_sec->ports_pkeys = new_pps; + tmp_pps = real_qp->qp_sec->ports_pkeys; + real_qp->qp_sec->ports_pkeys = new_pps; } if (tmp_pps) { @@ -611,7 +616,7 @@ int ib_security_modify_qp(struct ib_qp *qp, port_pkey_list_remove(&tmp_pps->alt); } kfree(tmp_pps); - mutex_unlock(&qp->qp_sec->mutex); + mutex_unlock(&real_qp->qp_sec->mutex); } return ret; } From 84511455ac5bd2fec100c5b07b21f725eaa536f6 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Wed, 8 Nov 2017 02:48:45 -0500 Subject: [PATCH 286/296] RDMA/bnxt_re: report vlan_id and sl in qp1 recv completion In a real RoCE v2 network it is possible to have two Sections of network have same IP hence same gid. However those may have different vlans. During connection resolution it is important to report the actual vlan on which the MAD packet was received instead of relying on other means to resolve vlan-id. ib_find_gid_index should not be used to resolve the vlan-id using sgid of the local system where the packet was received. Our device has the capability to report the actual VLAN-ID in the GSI qp completions. Since we have the capability our driver should move away from resolving the vlan-id with the help of SGID at the destination port. Signed-off-by: Devesh Sharma Reported-by: Parav Pandit Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 35 +++++++++++++++++++++++- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 1 + 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 526ab3f10a90..2032db7db766 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -2769,6 +2769,32 @@ static void bnxt_re_process_res_rawqp1_wc(struct ib_wc *wc, wc->wc_flags |= IB_WC_GRH; } +static bool bnxt_re_is_vlan_pkt(struct bnxt_qplib_cqe *orig_cqe, + u16 *vid, u8 *sl) +{ + bool ret = false; + u32 metadata; + u16 tpid; + + metadata = orig_cqe->raweth_qp1_metadata; + if (orig_cqe->raweth_qp1_flags2 & + CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_VLAN) { + tpid = ((metadata & + CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_TPID_MASK) >> + CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_TPID_SFT); + if (tpid == ETH_P_8021Q) { + *vid = metadata & + CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_VID_MASK; + *sl = (metadata & + CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_PRI_MASK) >> + CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_PRI_SFT; + ret = true; + } + } + + return ret; +} + static void bnxt_re_process_res_rc_wc(struct ib_wc *wc, struct bnxt_qplib_cqe *cqe) { @@ -2788,12 +2814,14 @@ static void bnxt_re_process_res_shadow_qp_wc(struct bnxt_re_qp *qp, struct ib_wc *wc, struct bnxt_qplib_cqe *cqe) { - u32 tbl_idx; struct bnxt_re_dev *rdev = qp->rdev; struct bnxt_re_qp *qp1_qp = NULL; struct bnxt_qplib_cqe *orig_cqe = NULL; struct bnxt_re_sqp_entries *sqp_entry = NULL; int nw_type; + u32 tbl_idx; + u16 vlan_id; + u8 sl; tbl_idx = cqe->wr_id; @@ -2808,6 +2836,11 @@ static void bnxt_re_process_res_shadow_qp_wc(struct bnxt_re_qp *qp, wc->ex.imm_data = orig_cqe->immdata; wc->src_qp = orig_cqe->src_qp; memcpy(wc->smac, orig_cqe->smac, ETH_ALEN); + if (bnxt_re_is_vlan_pkt(orig_cqe, &vlan_id, &sl)) { + wc->vlan_id = vlan_id; + wc->sl = sl; + wc->wc_flags |= IB_WC_WITH_VLAN; + } wc->port_num = 1; wc->vendor_err = orig_cqe->status; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index c0f813366ad6..61764f7aa79b 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -2254,6 +2254,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq, cqe->raweth_qp1_flags = le16_to_cpu(hwcqe->raweth_qp1_flags); cqe->raweth_qp1_flags2 = le32_to_cpu(hwcqe->raweth_qp1_flags2); + cqe->raweth_qp1_metadata = le32_to_cpu(hwcqe->raweth_qp1_metadata); rq = &qp->rq; if (wr_id_idx > rq->hwq.max_elements) { From 1c8f1da5d851b92aeb81dbbb9ebd516f6e2588f5 Mon Sep 17 00:00:00 2001 From: Bharat Potnuri Date: Thu, 9 Nov 2017 17:47:33 +0530 Subject: [PATCH 287/296] iw_cxgb4: Fix possible circular dependency locking warning Locking sequence of iw_cxgb4 and RoCE drivers in ib_register_device() is slightly different and this leads to possible circular dependency locking warning when both the devices are brought up. Here is the locking sequence upto ib_register_device(): iw_cxgb4: rtnl_mutex(net stack) --> uld_mutex --> device_mutex RoCE drivers: device_mutex --> rtnl_mutex Here is the possibility of cross locking: CPU #0 (iw_cxgb4) CPU #1 (RoCE drivers) -> on interface up cxgb4_up() executed with rtnl_mutex held -> hold uld_mutex and try registering ib device -> In ib_register_device() hold device_mutex -> hold device mutex in ib_register_device -> try acquiring rtnl_mutex in ib_enum_roce_netdev() Current patch schedules the ib_register_device() functionality of iw_cxgb4 to a workqueue to prevent the possible cross-locking. Also rename the labels in c4iw_reister_device(). Signed-off-by: Potnuri Bharat Teja Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/device.c | 28 ++++++++++++-------------- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 10 ++++++++- drivers/infiniband/hw/cxgb4/provider.c | 26 +++++++++++++++--------- 3 files changed, 39 insertions(+), 25 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 33bfddf3b064..af77d128d242 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -64,14 +64,9 @@ module_param(c4iw_wr_log_size_order, int, 0444); MODULE_PARM_DESC(c4iw_wr_log_size_order, "Number of entries (log2) in the work request timing log."); -struct uld_ctx { - struct list_head entry; - struct cxgb4_lld_info lldi; - struct c4iw_dev *dev; -}; - static LIST_HEAD(uld_ctx_list); static DEFINE_MUTEX(dev_mutex); +struct workqueue_struct *reg_workq; #define DB_FC_RESUME_SIZE 64 #define DB_FC_RESUME_DELAY 1 @@ -912,7 +907,7 @@ static void c4iw_rdev_close(struct c4iw_rdev *rdev) c4iw_destroy_resource(&rdev->resource); } -static void c4iw_dealloc(struct uld_ctx *ctx) +void c4iw_dealloc(struct uld_ctx *ctx) { c4iw_rdev_close(&ctx->dev->rdev); WARN_ON_ONCE(!idr_is_empty(&ctx->dev->cqidr)); @@ -1208,8 +1203,6 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state) case CXGB4_STATE_UP: pr_info("%s: Up\n", pci_name(ctx->lldi.pdev)); if (!ctx->dev) { - int ret; - ctx->dev = c4iw_alloc(&ctx->lldi); if (IS_ERR(ctx->dev)) { pr_err("%s: initialization failed: %ld\n", @@ -1218,12 +1211,9 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state) ctx->dev = NULL; break; } - ret = c4iw_register_device(ctx->dev); - if (ret) { - pr_err("%s: RDMA registration failed: %d\n", - pci_name(ctx->lldi.pdev), ret); - c4iw_dealloc(ctx); - } + + INIT_WORK(&ctx->reg_work, c4iw_register_device); + queue_work(reg_workq, &ctx->reg_work); } break; case CXGB4_STATE_DOWN: @@ -1551,6 +1541,12 @@ static int __init c4iw_init_module(void) if (!c4iw_debugfs_root) pr_warn("could not create debugfs entry, continuing\n"); + reg_workq = create_singlethread_workqueue("Register_iWARP_device"); + if (!reg_workq) { + pr_err("Failed creating workqueue to register iwarp device\n"); + return -ENOMEM; + } + cxgb4_register_uld(CXGB4_ULD_RDMA, &c4iw_uld_info); return 0; @@ -1567,6 +1563,8 @@ static void __exit c4iw_exit_module(void) kfree(ctx); } mutex_unlock(&dev_mutex); + flush_workqueue(reg_workq); + destroy_workqueue(reg_workq); cxgb4_unregister_uld(CXGB4_ULD_RDMA); c4iw_cm_term(); debugfs_remove_recursive(c4iw_debugfs_root); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 8d525825d50c..470f97a79ebb 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -322,6 +322,13 @@ struct c4iw_dev { wait_queue_head_t wait; }; +struct uld_ctx { + struct list_head entry; + struct cxgb4_lld_info lldi; + struct c4iw_dev *dev; + struct work_struct reg_work; +}; + static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev) { return container_of(ibdev, struct c4iw_dev, ibdev); @@ -988,7 +995,7 @@ void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev); void c4iw_ocqp_pool_destroy(struct c4iw_rdev *rdev); void c4iw_destroy_resource(struct c4iw_resource *rscp); int c4iw_destroy_ctrl_qp(struct c4iw_rdev *rdev); -int c4iw_register_device(struct c4iw_dev *dev); +void c4iw_register_device(struct work_struct *work); void c4iw_unregister_device(struct c4iw_dev *dev); int __init c4iw_cm_init(void); void c4iw_cm_term(void); @@ -1014,6 +1021,7 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int c4iw_dealloc_mw(struct ib_mw *mw); +void c4iw_dealloc(struct uld_ctx *ctx); struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, struct ib_udata *udata); struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 30206a57dbe7..1b5c6cd2ac4d 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -531,10 +531,12 @@ static void get_dev_fw_str(struct ib_device *dev, char *str) FW_HDR_FW_VER_BUILD_G(c4iw_dev->rdev.lldi.fw_vers)); } -int c4iw_register_device(struct c4iw_dev *dev) +void c4iw_register_device(struct work_struct *work) { int ret; int i; + struct uld_ctx *ctx = container_of(work, struct uld_ctx, reg_work); + struct c4iw_dev *dev = ctx->dev; pr_debug("c4iw_dev %p\n", dev); strlcpy(dev->ibdev.name, "cxgb4_%d", IB_DEVICE_NAME_MAX); @@ -609,8 +611,10 @@ int c4iw_register_device(struct c4iw_dev *dev) dev->ibdev.get_dev_fw_str = get_dev_fw_str; dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); - if (!dev->ibdev.iwcm) - return -ENOMEM; + if (!dev->ibdev.iwcm) { + ret = -ENOMEM; + goto err_dealloc_ctx; + } dev->ibdev.iwcm->connect = c4iw_connect; dev->ibdev.iwcm->accept = c4iw_accept_cr; @@ -625,20 +629,24 @@ int c4iw_register_device(struct c4iw_dev *dev) ret = ib_register_device(&dev->ibdev, NULL); if (ret) - goto bail1; + goto err_kfree_iwcm; for (i = 0; i < ARRAY_SIZE(c4iw_class_attributes); ++i) { ret = device_create_file(&dev->ibdev.dev, c4iw_class_attributes[i]); if (ret) - goto bail2; + goto err_unregister_device; } - return 0; -bail2: + return; +err_unregister_device: ib_unregister_device(&dev->ibdev); -bail1: +err_kfree_iwcm: kfree(dev->ibdev.iwcm); - return ret; +err_dealloc_ctx: + pr_err("%s - Failed registering iwarp device: %d\n", + pci_name(ctx->lldi.pdev), ret); + c4iw_dealloc(ctx); + return; } void c4iw_unregister_device(struct c4iw_dev *dev) From cbb40fadd31c6bbc59104e58ac95c6ef492d038b Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 9 Nov 2017 07:14:43 -0800 Subject: [PATCH 288/296] iw_cxgb4: only call the cq comp_handler when the cq is armed The ULPs completion handler should only be called if the CQ is armed for notification. Signed-off-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/ev.c | 8 +++++--- drivers/infiniband/hw/cxgb4/qp.c | 20 ++++++++++++-------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c index b8c7cc938bce..a252d5c40ae3 100644 --- a/drivers/infiniband/hw/cxgb4/ev.c +++ b/drivers/infiniband/hw/cxgb4/ev.c @@ -109,9 +109,11 @@ static void post_qp_event(struct c4iw_dev *dev, struct c4iw_cq *chp, if (qhp->ibqp.event_handler) (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context); - spin_lock_irqsave(&chp->comp_handler_lock, flag); - (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); - spin_unlock_irqrestore(&chp->comp_handler_lock, flag); + if (t4_clear_cq_armed(&chp->cq)) { + spin_lock_irqsave(&chp->comp_handler_lock, flag); + (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); + spin_unlock_irqrestore(&chp->comp_handler_lock, flag); + } } void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 1374b41201a9..fefc5fed1778 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -813,10 +813,12 @@ static void complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr) t4_swcq_produce(cq); spin_unlock_irqrestore(&schp->lock, flag); - spin_lock_irqsave(&schp->comp_handler_lock, flag); - (*schp->ibcq.comp_handler)(&schp->ibcq, - schp->ibcq.cq_context); - spin_unlock_irqrestore(&schp->comp_handler_lock, flag); + if (t4_clear_cq_armed(&schp->cq)) { + spin_lock_irqsave(&schp->comp_handler_lock, flag); + (*schp->ibcq.comp_handler)(&schp->ibcq, + schp->ibcq.cq_context); + spin_unlock_irqrestore(&schp->comp_handler_lock, flag); + } } static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr) @@ -842,10 +844,12 @@ static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr) t4_swcq_produce(cq); spin_unlock_irqrestore(&rchp->lock, flag); - spin_lock_irqsave(&rchp->comp_handler_lock, flag); - (*rchp->ibcq.comp_handler)(&rchp->ibcq, - rchp->ibcq.cq_context); - spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); + if (t4_clear_cq_armed(&rchp->cq)) { + spin_lock_irqsave(&rchp->comp_handler_lock, flag); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, + rchp->ibcq.cq_context); + spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); + } } int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, From bc52e9ca74b9a395897bb640c6671b2cbf716032 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 9 Nov 2017 07:21:26 -0800 Subject: [PATCH 289/296] iw_cxgb4: atomically flush the qp __flush_qp() has a race condition where during the flush operation, the qp lock is released allowing another thread to possibly post a WR, which corrupts the queue state, possibly causing crashes. The lock was released to preserve the cq/qp locking hierarchy of cq first, then qp. However releasing the qp lock is not necessary; both RQ and SQ CQ locks can be acquired first, followed by the qp lock, and then the RQ and SQ flushing can be done w/o unlocking. Signed-off-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/qp.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index fefc5fed1778..5ee7fe433136 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1255,31 +1255,34 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, pr_debug("qhp %p rchp %p schp %p\n", qhp, rchp, schp); - /* locking hierarchy: cq lock first, then qp lock. */ + /* locking hierarchy: cqs lock first, then qp lock. */ spin_lock_irqsave(&rchp->lock, flag); + if (schp != rchp) + spin_lock(&schp->lock); spin_lock(&qhp->lock); if (qhp->wq.flushed) { spin_unlock(&qhp->lock); + if (schp != rchp) + spin_unlock(&schp->lock); spin_unlock_irqrestore(&rchp->lock, flag); return; } qhp->wq.flushed = 1; + t4_set_wq_in_error(&qhp->wq); c4iw_flush_hw_cq(rchp); c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); - spin_unlock(&qhp->lock); - spin_unlock_irqrestore(&rchp->lock, flag); - /* locking hierarchy: cq lock first, then qp lock. */ - spin_lock_irqsave(&schp->lock, flag); - spin_lock(&qhp->lock); if (schp != rchp) c4iw_flush_hw_cq(schp); sq_flushed = c4iw_flush_sq(qhp); + spin_unlock(&qhp->lock); - spin_unlock_irqrestore(&schp->lock, flag); + if (schp != rchp) + spin_unlock(&schp->lock); + spin_unlock_irqrestore(&rchp->lock, flag); if (schp == rchp) { if (t4_clear_cq_armed(&rchp->cq) && @@ -1313,8 +1316,8 @@ static void flush_qp(struct c4iw_qp *qhp) rchp = to_c4iw_cq(qhp->ibqp.recv_cq); schp = to_c4iw_cq(qhp->ibqp.send_cq); - t4_set_wq_in_error(&qhp->wq); if (qhp->ibqp.uobject) { + t4_set_wq_in_error(&qhp->wq); t4_set_cq_in_error(&rchp->cq); spin_lock_irqsave(&rchp->comp_handler_lock, flag); (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); From 869ddcf8b351ace5bf8860f3cd6265dccb382426 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Mon, 13 Nov 2017 10:51:13 +0200 Subject: [PATCH 290/296] IB/uverbs: Allow CQ moderation with modify CQ Uverbs support in modify_cq for CQ moderation only. Gives ability to change cq_max_count and cq_period. CQ moderation enhance performance by moderating the number of CQEs needed to create an event instead of application having to suffer from event per-CQE. To achieve CQ moderation the application needs to set cq_max_count and cq_period. cq_max_count - defines the number of CQEs needed to create an event. cq_period - defines the timeout (micro seconds) between last event and a new one that will occur even if cq_max_count was not satisfied Signed-off-by: Yonatan Cohen Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs.h | 1 + drivers/infiniband/core/uverbs_cmd.c | 42 +++++++++++++++++++++++++++ drivers/infiniband/core/uverbs_main.c | 1 + include/rdma/ib_verbs.h | 4 +++ include/uapi/rdma/ib_user_verbs.h | 15 +++++++++- 5 files changed, 62 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index ee2739ae4305..deccefb71a6b 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -306,5 +306,6 @@ IB_UVERBS_DECLARE_EX_CMD(destroy_wq); IB_UVERBS_DECLARE_EX_CMD(create_rwq_ind_table); IB_UVERBS_DECLARE_EX_CMD(destroy_rwq_ind_table); IB_UVERBS_DECLARE_EX_CMD(modify_qp); +IB_UVERBS_DECLARE_EX_CMD(modify_cq); #endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 8ca36843ef38..3c2673cd4090 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3856,3 +3856,45 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, err = ib_copy_to_udata(ucore, &resp, resp.response_length); return err; } + +int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_modify_cq cmd = {}; + struct ib_cq *cq; + size_t required_cmd_sz; + int ret; + + required_cmd_sz = offsetof(typeof(cmd), reserved) + + sizeof(cmd.reserved); + if (ucore->inlen < required_cmd_sz) + return -EINVAL; + + /* sanity checks */ + if (ucore->inlen > sizeof(cmd) && + !ib_is_udata_cleared(ucore, sizeof(cmd), + ucore->inlen - sizeof(cmd))) + return -EOPNOTSUPP; + + ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + if (ret) + return ret; + + if (!cmd.attr_mask || cmd.reserved) + return -EINVAL; + + if (cmd.attr_mask > IB_CQ_MODERATE) + return -EOPNOTSUPP; + + cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext); + if (!cq) + return -EINVAL; + + ret = ib_modify_cq(cq, cmd.attr.cq_count, cmd.attr.cq_period); + + uobj_put_obj_read(cq); + + return ret; +} diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index b5febfd84ee5..381fd9c096ae 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -128,6 +128,7 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table, [IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table, [IB_USER_VERBS_EX_CMD_MODIFY_QP] = ib_uverbs_ex_modify_qp, + [IB_USER_VERBS_EX_CMD_MODIFY_CQ] = ib_uverbs_ex_modify_cq, }; static void ib_uverbs_add_one(struct ib_device *device); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 0b671982bbb3..8e0d3780ce4e 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -311,6 +311,10 @@ struct ib_cq_init_attr { u32 flags; }; +enum ib_cq_attr_mask { + IB_CQ_MODERATE = 1 << 0, +}; + struct ib_device_attr { u64 fw_ver; __be64 sys_image_guid; diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index d4e0b53bfc75..cfa09d6095d6 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -100,7 +100,8 @@ enum { IB_USER_VERBS_EX_CMD_MODIFY_WQ, IB_USER_VERBS_EX_CMD_DESTROY_WQ, IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL, - IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL + IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL, + IB_USER_VERBS_EX_CMD_MODIFY_CQ }; /* @@ -1150,6 +1151,18 @@ struct ib_uverbs_ex_destroy_rwq_ind_table { __u32 ind_tbl_handle; }; +struct ib_uverbs_cq_moderation { + __u16 cq_count; + __u16 cq_period; +}; + +struct ib_uverbs_ex_modify_cq { + __u32 cq_handle; + __u32 attr_mask; + struct ib_uverbs_cq_moderation attr; + __u32 reserved; +}; + #define IB_DEVICE_NAME_MAX 64 #endif /* IB_USER_VERBS_H */ From 34d9a270e74a412f041b528c33b75e3e6bc7a242 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Mon, 13 Nov 2017 10:51:14 +0200 Subject: [PATCH 291/296] IB/mlx4: Exposing modify CQ callback to uverbs layer Exposed mlx4_ib_modify_cq to be called from ib device verb list. Signed-off-by: Yonatan Cohen Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 3 +++ drivers/infiniband/hw/mlx4/mlx4_ib.h | 1 + include/linux/mlx4/cq.h | 3 +++ 3 files changed, 7 insertions(+) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index d8f0b94f1dc4..28c10ffc19e2 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2750,6 +2750,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.get_dev_fw_str = get_fw_ver_str; ibdev->ib_dev.disassociate_ucontext = mlx4_ib_disassociate_ucontext; + ibdev->ib_dev.uverbs_ex_cmd_mask |= + (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ); + if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) && ((mlx4_ib_port_link_layer(&ibdev->ib_dev, 1) == IB_LINK_LAYER_ETHERNET) || diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 719dae354066..e14919c15b06 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -47,6 +47,7 @@ #include #include #include +#include #define MLX4_IB_DRV_NAME "mlx4_ib" diff --git a/include/linux/mlx4/cq.h b/include/linux/mlx4/cq.h index 09cebe528488..508e8cc5ee86 100644 --- a/include/linux/mlx4/cq.h +++ b/include/linux/mlx4/cq.h @@ -136,6 +136,9 @@ enum { MLX4_CQE_BAD_FCS = 1 << 4, }; +#define MLX4_MAX_CQ_PERIOD (BIT(16) - 1) +#define MLX4_MAX_CQ_COUNT (BIT(16) - 1) + static inline void mlx4_cq_arm(struct mlx4_cq *cq, u32 cmd, void __iomem *uar_page, spinlock_t *doorbell_lock) From b0e9df6da25890448ebd134b7f647f16bced9abc Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Mon, 13 Nov 2017 10:51:15 +0200 Subject: [PATCH 292/296] IB/mlx5: Exposing modify CQ callback to uverbs layer Exposed mlx5_ib_modify_cq to be called from ib device verb list. Signed-off-by: Yonatan Cohen Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/cq.c | 3 +++ drivers/infiniband/hw/mlx5/main.c | 3 ++- include/linux/mlx5/cq.h | 3 +++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 01b218a3c277..18705cbcdc8c 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -1149,6 +1149,9 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) if (!MLX5_CAP_GEN(dev->mdev, cq_moderation)) return -ENOSYS; + if (cq_period > MLX5_MAX_CQ_PERIOD) + return -EINVAL; + err = mlx5_core_modify_cq_moderation(dev->mdev, &mcq->mcq, cq_period, cq_count); if (err) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index d2aadb13637b..5a0ed6a499f4 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4022,7 +4022,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP); + (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ); dev->ib_dev.query_device = mlx5_ib_query_device; dev->ib_dev.query_port = mlx5_ib_query_port; diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index cc718e245b1e..6be357b219ec 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -128,6 +128,9 @@ enum { CQE_SIZE_128_PAD = 2, }; +#define MLX5_MAX_CQ_PERIOD (BIT(__mlx5_bit_sz(cqc, cq_period)) - 1) +#define MLX5_MAX_CQ_COUNT (BIT(__mlx5_bit_sz(cqc, cq_max_count)) - 1) + static inline int cqe_sz_to_mlx_sz(u8 size, int padding_128_en) { return padding_128_en ? CQE_SIZE_128_PAD : From 18bd90729237dc6ddbad01bc9618148224f03590 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Mon, 13 Nov 2017 10:51:16 +0200 Subject: [PATCH 293/296] IB/uverbs: Add CQ moderation capability to query_device The query_device function can now obtain the maximum values for cq_max_count and cq_period, needed for CQ moderation. cq_max_count is a 16 bits number that determines the number of CQEs to accumulate before generating an event. cq_period is a 16 bits number that determines the timeout in micro seconds from the last event generated, upon which a new event will be generated even if cq_max_count was not reached. Signed-off-by: Yonatan Cohen Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 9 +++++++++ include/rdma/ib_verbs.h | 6 ++++++ include/uapi/rdma/ib_user_verbs.h | 7 +++++++ 3 files changed, 22 insertions(+) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 3c2673cd4090..53143e4b1c50 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3852,6 +3852,15 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, resp.tm_caps.max_sge = attr.tm_caps.max_sge; resp.tm_caps.flags = attr.tm_caps.flags; resp.response_length += sizeof(resp.tm_caps); + + if (ucore->outlen < resp.response_length + sizeof(resp.cq_moderation_caps)) + goto end; + + resp.cq_moderation_caps.max_cq_moderation_count = + attr.cq_caps.max_cq_moderation_count; + resp.cq_moderation_caps.max_cq_moderation_period = + attr.cq_caps.max_cq_moderation_period; + resp.response_length += sizeof(resp.cq_moderation_caps); end: err = ib_copy_to_udata(ucore, &resp, resp.response_length); return err; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 8e0d3780ce4e..0b484c023fa9 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -315,6 +315,11 @@ enum ib_cq_attr_mask { IB_CQ_MODERATE = 1 << 0, }; +struct ib_cq_caps { + u16 max_cq_moderation_count; + u16 max_cq_moderation_period; +}; + struct ib_device_attr { u64 fw_ver; __be64 sys_image_guid; @@ -365,6 +370,7 @@ struct ib_device_attr { u32 max_wq_type_rq; u32 raw_packet_caps; /* Use ib_raw_packet_caps enum */ struct ib_tm_caps tm_caps; + struct ib_cq_caps cq_caps; }; enum ib_mtu { diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index cfa09d6095d6..5186fb12629b 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -125,6 +125,12 @@ struct ib_uverbs_comp_event_desc { __u64 cq_handle; }; +struct ib_uverbs_cq_moderation_caps { + __u16 max_cq_moderation_count; + __u16 max_cq_moderation_period; + __u32 reserved; +}; + /* * All commands from userspace should start with a __u32 command field * followed by __u16 in_words and out_words fields (which give the @@ -263,6 +269,7 @@ struct ib_uverbs_ex_query_device_resp { __u32 max_wq_type_rq; __u32 raw_packet_caps; struct ib_uverbs_tm_caps tm_caps; + struct ib_uverbs_cq_moderation_caps cq_moderation_caps; }; struct ib_uverbs_query_port { From 0fd586de65ae6a249433e1b496f5ed9ce199be73 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Mon, 13 Nov 2017 10:51:17 +0200 Subject: [PATCH 294/296] IB/mlx4: Add CQ moderation capability to query_device query_device can now obtain the maximum values for cq_max_count and cq_period, needed for cq moderation. Signed-off-by: Yonatan Cohen Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 28c10ffc19e2..8c8a16791a3f 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -563,6 +563,9 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->max_wq_type_rq = props->max_qp; } + props->cq_caps.max_cq_moderation_count = MLX4_MAX_CQ_COUNT; + props->cq_caps.max_cq_moderation_period = MLX4_MAX_CQ_PERIOD; + if (!mlx4_is_slave(dev->dev)) err = mlx4_get_internal_clock_params(dev->dev, &clock_params); From 87ab3f524efd171699d21bbc98b1268acb476da5 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Mon, 13 Nov 2017 10:51:18 +0200 Subject: [PATCH 295/296] IB/mlx5: Add CQ moderation capability to query_device query_device can now obtain the maximum values for cq_max_count and cq_period, needed for cq moderation. Signed-off-by: Yonatan Cohen Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 5a0ed6a499f4..543d0a4c8bf3 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -790,6 +790,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->tm_caps.max_sge = MLX5_TM_MAX_SGE; } + if (MLX5_CAP_GEN(dev->mdev, cq_moderation)) { + props->cq_caps.max_cq_moderation_count = + MLX5_MAX_CQ_COUNT; + props->cq_caps.max_cq_moderation_period = + MLX5_MAX_CQ_PERIOD; + } + if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) { resp.cqe_comp_caps.max_num = MLX5_CAP_GEN(dev->mdev, cqe_compression) ? From 4190b4e96954e2c3597021d29435c3f8db8d3129 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 13 Nov 2017 10:51:19 +0200 Subject: [PATCH 296/296] RDMA/core: Rename kernel modify_cq to better describe its usage Current ib_modify_cq() is used to set CQ moderation parameters. This patch renames ib_modify_cq() to be rdma_set_cq_moderation(), because the kernel version of RDMA API doesn't need to follow already exposed to user's API pattern (create_XXX/modify_XXX/query_XXX/destroy_XXX) and better to have more accurate name which describes the actual usage. Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 2 +- drivers/infiniband/core/verbs.c | 4 ++-- drivers/infiniband/ulp/ipoib/ipoib_ethtool.c | 5 +++-- include/rdma/ib_verbs.h | 4 ++-- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 53143e4b1c50..16d55710b116 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3901,7 +3901,7 @@ int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file, if (!cq) return -EINVAL; - ret = ib_modify_cq(cq, cmd.attr.cq_count, cmd.attr.cq_period); + ret = rdma_set_cq_moderation(cq, cmd.attr.cq_count, cmd.attr.cq_period); uobj_put_obj_read(cq); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index d8f1a5d34f4f..3fb8fb6cc824 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1550,12 +1550,12 @@ struct ib_cq *ib_create_cq(struct ib_device *device, } EXPORT_SYMBOL(ib_create_cq); -int ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) +int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period) { return cq->device->modify_cq ? cq->device->modify_cq(cq, cq_count, cq_period) : -ENOSYS; } -EXPORT_SYMBOL(ib_modify_cq); +EXPORT_SYMBOL(rdma_set_cq_moderation); int ib_destroy_cq(struct ib_cq *cq) { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index 8dc1e6225cc8..2706bf26cbac 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -99,8 +99,9 @@ static int ipoib_set_coalesce(struct net_device *dev, coal->rx_max_coalesced_frames > 0xffff) return -EINVAL; - ret = ib_modify_cq(priv->recv_cq, coal->rx_max_coalesced_frames, - coal->rx_coalesce_usecs); + ret = rdma_set_cq_moderation(priv->recv_cq, + coal->rx_max_coalesced_frames, + coal->rx_coalesce_usecs); if (ret && ret != -ENOSYS) { ipoib_warn(priv, "failed modifying CQ (%d)\n", ret); return ret; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 0b484c023fa9..fd84cda5ed7c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3169,13 +3169,13 @@ struct ib_cq *ib_create_cq(struct ib_device *device, int ib_resize_cq(struct ib_cq *cq, int cqe); /** - * ib_modify_cq - Modifies moderation params of the CQ + * rdma_set_cq_moderation - Modifies moderation params of the CQ * @cq: The CQ to modify. * @cq_count: number of CQEs that will trigger an event * @cq_period: max period of time in usec before triggering an event * */ -int ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); +int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period); /** * ib_destroy_cq - Destroys the specified CQ.