diff --git a/block/Kconfig b/block/Kconfig index 89cd28f8d051..3ab42bbb06d5 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -206,4 +206,9 @@ config BLK_MQ_VIRTIO depends on BLOCK && VIRTIO default y +config BLK_MQ_RDMA + bool + depends on BLOCK && INFINIBAND + default y + source block/Kconfig.iosched diff --git a/block/Makefile b/block/Makefile index 2b281cf258a0..9396ebc85d24 100644 --- a/block/Makefile +++ b/block/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o obj-$(CONFIG_BLK_MQ_VIRTIO) += blk-mq-virtio.o +obj-$(CONFIG_BLK_MQ_RDMA) += blk-mq-rdma.o obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o obj-$(CONFIG_BLK_WBT) += blk-wbt.o obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o diff --git a/block/blk-mq-rdma.c b/block/blk-mq-rdma.c new file mode 100644 index 000000000000..996167f1de18 --- /dev/null +++ b/block/blk-mq-rdma.c @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2017 Sagi Grimberg. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ +#include +#include +#include + +/** + * blk_mq_rdma_map_queues - provide a default queue mapping for rdma device + * @set: tagset to provide the mapping for + * @dev: rdma device associated with @set. + * @first_vec: first interrupt vectors to use for queues (usually 0) + * + * This function assumes the rdma device @dev has at least as many available + * interrupt vetors as @set has queues. It will then query it's affinity mask + * and built queue mapping that maps a queue to the CPUs that have irq affinity + * for the corresponding vector. + * + * In case either the driver passed a @dev with less vectors than + * @set->nr_hw_queues, or @dev does not provide an affinity mask for a + * vector, we fallback to the naive mapping. + */ +int blk_mq_rdma_map_queues(struct blk_mq_tag_set *set, + struct ib_device *dev, int first_vec) +{ + const struct cpumask *mask; + unsigned int queue, cpu; + + for (queue = 0; queue < set->nr_hw_queues; queue++) { + mask = ib_get_vector_affinity(dev, first_vec + queue); + if (!mask) + goto fallback; + + for_each_cpu(cpu, mask) + set->mq_map[cpu] = queue; + } + + return 0; + +fallback: + return blk_mq_map_queues(set); +} +EXPORT_SYMBOL_GPL(blk_mq_rdma_map_queues); diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 2b4d613a3474..d5ca101057b7 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1175,6 +1175,11 @@ static void cm_format_req(struct cm_req_msg *req_msg, { struct sa_path_rec *pri_path = param->primary_path; struct sa_path_rec *alt_path = param->alternate_path; + bool pri_ext = false; + + if (pri_path->rec_type == SA_PATH_REC_TYPE_OPA) + pri_ext = opa_is_extended_lid(pri_path->opa.dlid, + pri_path->opa.slid); cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID, cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ)); @@ -1202,18 +1207,24 @@ static void cm_format_req(struct cm_req_msg *req_msg, cm_req_set_srq(req_msg, param->srq); } + req_msg->primary_local_gid = pri_path->sgid; + req_msg->primary_remote_gid = pri_path->dgid; + if (pri_ext) { + req_msg->primary_local_gid.global.interface_id + = OPA_MAKE_ID(be32_to_cpu(pri_path->opa.slid)); + req_msg->primary_remote_gid.global.interface_id + = OPA_MAKE_ID(be32_to_cpu(pri_path->opa.dlid)); + } if (pri_path->hop_limit <= 1) { - req_msg->primary_local_lid = + req_msg->primary_local_lid = pri_ext ? 0 : htons(ntohl(sa_path_get_slid(pri_path))); - req_msg->primary_remote_lid = + req_msg->primary_remote_lid = pri_ext ? 0 : htons(ntohl(sa_path_get_dlid(pri_path))); } else { /* Work-around until there's a way to obtain remote LID info */ req_msg->primary_local_lid = IB_LID_PERMISSIVE; req_msg->primary_remote_lid = IB_LID_PERMISSIVE; } - req_msg->primary_local_gid = pri_path->sgid; - req_msg->primary_remote_gid = pri_path->dgid; cm_req_set_primary_flow_label(req_msg, pri_path->flow_label); cm_req_set_primary_packet_rate(req_msg, pri_path->rate); req_msg->primary_traffic_class = pri_path->traffic_class; @@ -1225,17 +1236,29 @@ static void cm_format_req(struct cm_req_msg *req_msg, pri_path->packet_life_time)); if (alt_path) { + bool alt_ext = false; + + if (alt_path->rec_type == SA_PATH_REC_TYPE_OPA) + alt_ext = opa_is_extended_lid(alt_path->opa.dlid, + alt_path->opa.slid); + + req_msg->alt_local_gid = alt_path->sgid; + req_msg->alt_remote_gid = alt_path->dgid; + if (alt_ext) { + req_msg->alt_local_gid.global.interface_id + = OPA_MAKE_ID(be32_to_cpu(alt_path->opa.slid)); + req_msg->alt_remote_gid.global.interface_id + = OPA_MAKE_ID(be32_to_cpu(alt_path->opa.dlid)); + } if (alt_path->hop_limit <= 1) { - req_msg->alt_local_lid = + req_msg->alt_local_lid = alt_ext ? 0 : htons(ntohl(sa_path_get_slid(alt_path))); - req_msg->alt_remote_lid = + req_msg->alt_remote_lid = alt_ext ? 0 : htons(ntohl(sa_path_get_dlid(alt_path))); } else { req_msg->alt_local_lid = IB_LID_PERMISSIVE; req_msg->alt_remote_lid = IB_LID_PERMISSIVE; } - req_msg->alt_local_gid = alt_path->sgid; - req_msg->alt_remote_gid = alt_path->dgid; cm_req_set_alt_flow_label(req_msg, alt_path->flow_label); cm_req_set_alt_packet_rate(req_msg, alt_path->rate); @@ -1405,16 +1428,63 @@ static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid, (be32_to_cpu(local_qpn) > be32_to_cpu(remote_qpn)))); } +static bool cm_req_has_alt_path(struct cm_req_msg *req_msg) +{ + return ((req_msg->alt_local_lid) || + (ib_is_opa_gid(&req_msg->alt_local_gid))); +} + +static void cm_path_set_rec_type(struct ib_device *ib_device, u8 port_num, + struct sa_path_rec *path, union ib_gid *gid) +{ + if (ib_is_opa_gid(gid) && rdma_cap_opa_ah(ib_device, port_num)) + path->rec_type = SA_PATH_REC_TYPE_OPA; + else + path->rec_type = SA_PATH_REC_TYPE_IB; +} + +static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg, + struct sa_path_rec *primary_path, + struct sa_path_rec *alt_path) +{ + u32 lid; + + if (primary_path->rec_type != SA_PATH_REC_TYPE_OPA) { + sa_path_set_dlid(primary_path, + htonl(ntohs(req_msg->primary_local_lid))); + sa_path_set_slid(primary_path, + htonl(ntohs(req_msg->primary_remote_lid))); + } else { + lid = opa_get_lid_from_gid(&req_msg->primary_local_gid); + sa_path_set_dlid(primary_path, cpu_to_be32(lid)); + + lid = opa_get_lid_from_gid(&req_msg->primary_remote_gid); + sa_path_set_slid(primary_path, cpu_to_be32(lid)); + } + + if (!cm_req_has_alt_path(req_msg)) + return; + + if (alt_path->rec_type != SA_PATH_REC_TYPE_OPA) { + sa_path_set_dlid(alt_path, + htonl(ntohs(req_msg->alt_local_lid))); + sa_path_set_slid(alt_path, + htonl(ntohs(req_msg->alt_remote_lid))); + } else { + lid = opa_get_lid_from_gid(&req_msg->alt_local_gid); + sa_path_set_dlid(alt_path, cpu_to_be32(lid)); + + lid = opa_get_lid_from_gid(&req_msg->alt_remote_gid); + sa_path_set_slid(alt_path, cpu_to_be32(lid)); + } +} + static void cm_format_paths_from_req(struct cm_req_msg *req_msg, struct sa_path_rec *primary_path, struct sa_path_rec *alt_path) { primary_path->dgid = req_msg->primary_local_gid; primary_path->sgid = req_msg->primary_remote_gid; - sa_path_set_dlid(primary_path, - htonl(ntohs(req_msg->primary_local_lid))); - sa_path_set_slid(primary_path, - htonl(ntohs(req_msg->primary_remote_lid))); primary_path->flow_label = cm_req_get_primary_flow_label(req_msg); primary_path->hop_limit = req_msg->primary_hop_limit; primary_path->traffic_class = req_msg->primary_traffic_class; @@ -1431,13 +1501,9 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg, primary_path->packet_life_time -= (primary_path->packet_life_time > 0); primary_path->service_id = req_msg->service_id; - if (req_msg->alt_local_lid) { + if (cm_req_has_alt_path(req_msg)) { alt_path->dgid = req_msg->alt_local_gid; alt_path->sgid = req_msg->alt_remote_gid; - sa_path_set_dlid(alt_path, - htonl(ntohs(req_msg->alt_local_lid))); - sa_path_set_slid(alt_path, - htonl(ntohs(req_msg->alt_remote_lid))); alt_path->flow_label = cm_req_get_alt_flow_label(req_msg); alt_path->hop_limit = req_msg->alt_hop_limit; alt_path->traffic_class = req_msg->alt_traffic_class; @@ -1454,6 +1520,7 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg, alt_path->packet_life_time -= (alt_path->packet_life_time > 0); alt_path->service_id = req_msg->service_id; } + cm_format_path_lid_from_req(req_msg, primary_path, alt_path); } static u16 cm_get_bth_pkey(struct cm_work *work) @@ -1703,7 +1770,7 @@ static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc) { if (!cm_req_get_primary_subnet_local(req_msg)) { if (req_msg->primary_local_lid == IB_LID_PERMISSIVE) { - req_msg->primary_local_lid = cpu_to_be16(wc->slid); + req_msg->primary_local_lid = ib_slid_be16(wc->slid); cm_req_set_primary_sl(req_msg, wc->sl); } @@ -1713,7 +1780,7 @@ static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc) if (!cm_req_get_alt_subnet_local(req_msg)) { if (req_msg->alt_local_lid == IB_LID_PERMISSIVE) { - req_msg->alt_local_lid = cpu_to_be16(wc->slid); + req_msg->alt_local_lid = ib_slid_be16(wc->slid); cm_req_set_alt_sl(req_msg, wc->sl); } @@ -1784,9 +1851,12 @@ static int cm_req_handler(struct cm_work *work) dev_net(gid_attr.ndev)); dev_put(gid_attr.ndev); } else { - work->path[0].rec_type = SA_PATH_REC_TYPE_IB; + cm_path_set_rec_type(work->port->cm_dev->ib_device, + work->port->port_num, + &work->path[0], + &req_msg->primary_local_gid); } - if (req_msg->alt_local_lid) + if (cm_req_has_alt_path(req_msg)) work->path[1].rec_type = work->path[0].rec_type; cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]); @@ -1811,16 +1881,19 @@ static int cm_req_handler(struct cm_work *work) dev_net(gid_attr.ndev)); dev_put(gid_attr.ndev); } else { - work->path[0].rec_type = SA_PATH_REC_TYPE_IB; + cm_path_set_rec_type(work->port->cm_dev->ib_device, + work->port->port_num, + &work->path[0], + &req_msg->primary_local_gid); } - if (req_msg->alt_local_lid) + if (cm_req_has_alt_path(req_msg)) work->path[1].rec_type = work->path[0].rec_type; ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID, &work->path[0].sgid, sizeof work->path[0].sgid, NULL, 0); goto rejected; } - if (req_msg->alt_local_lid) { + if (cm_req_has_alt_path(req_msg)) { ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av, cm_id_priv); if (ret) { @@ -2843,6 +2916,11 @@ static void cm_format_lap(struct cm_lap_msg *lap_msg, const void *private_data, u8 private_data_len) { + bool alt_ext = false; + + if (alternate_path->rec_type == SA_PATH_REC_TYPE_OPA) + alt_ext = opa_is_extended_lid(alternate_path->opa.dlid, + alternate_path->opa.slid); cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID, cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_LAP)); lap_msg->local_comm_id = cm_id_priv->id.local_id; @@ -2856,6 +2934,12 @@ static void cm_format_lap(struct cm_lap_msg *lap_msg, htons(ntohl(sa_path_get_dlid(alternate_path))); lap_msg->alt_local_gid = alternate_path->sgid; lap_msg->alt_remote_gid = alternate_path->dgid; + if (alt_ext) { + lap_msg->alt_local_gid.global.interface_id + = OPA_MAKE_ID(be32_to_cpu(alternate_path->opa.slid)); + lap_msg->alt_remote_gid.global.interface_id + = OPA_MAKE_ID(be32_to_cpu(alternate_path->opa.dlid)); + } cm_lap_set_flow_label(lap_msg, alternate_path->flow_label); cm_lap_set_traffic_class(lap_msg, alternate_path->traffic_class); lap_msg->alt_hop_limit = alternate_path->hop_limit; @@ -2924,16 +3008,29 @@ out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); } EXPORT_SYMBOL(ib_send_cm_lap); +static void cm_format_path_lid_from_lap(struct cm_lap_msg *lap_msg, + struct sa_path_rec *path) +{ + u32 lid; + + if (path->rec_type != SA_PATH_REC_TYPE_OPA) { + sa_path_set_dlid(path, htonl(ntohs(lap_msg->alt_local_lid))); + sa_path_set_slid(path, htonl(ntohs(lap_msg->alt_remote_lid))); + } else { + lid = opa_get_lid_from_gid(&lap_msg->alt_local_gid); + sa_path_set_dlid(path, cpu_to_be32(lid)); + + lid = opa_get_lid_from_gid(&lap_msg->alt_remote_gid); + sa_path_set_slid(path, cpu_to_be32(lid)); + } +} + static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv, struct sa_path_rec *path, struct cm_lap_msg *lap_msg) { - memset(path, 0, sizeof *path); - path->rec_type = SA_PATH_REC_TYPE_IB; path->dgid = lap_msg->alt_local_gid; path->sgid = lap_msg->alt_remote_gid; - sa_path_set_dlid(path, htonl(ntohs(lap_msg->alt_local_lid))); - sa_path_set_slid(path, htonl(ntohs(lap_msg->alt_remote_lid))); path->flow_label = cm_lap_get_flow_label(lap_msg); path->hop_limit = lap_msg->alt_hop_limit; path->traffic_class = cm_lap_get_traffic_class(lap_msg); @@ -2947,6 +3044,7 @@ static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv, path->packet_life_time_selector = IB_SA_EQ; path->packet_life_time = cm_lap_get_local_ack_timeout(lap_msg); path->packet_life_time -= (path->packet_life_time > 0); + cm_format_path_lid_from_lap(lap_msg, path); } static int cm_lap_handler(struct cm_work *work) @@ -2965,6 +3063,11 @@ static int cm_lap_handler(struct cm_work *work) return -EINVAL; param = &work->cm_event.param.lap_rcvd; + memset(&work->path[0], 0, sizeof(work->path[1])); + cm_path_set_rec_type(work->port->cm_dev->ib_device, + work->port->port_num, + &work->path[0], + &lap_msg->alt_local_gid); param->alternate_path = &work->path[0]; cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg); work->cm_event.private_data = &lap_msg->private_data; diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 11ae67514e13..6b54280530c9 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -38,6 +38,7 @@ #include #include +#include #include #include "mad_priv.h" diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index 0d3cca0a8890..e5cf09c66fe6 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -64,7 +64,7 @@ struct mad_rmpp_recv { __be64 tid; u32 src_qp; - u16 slid; + u32 slid; u8 mgmt_class; u8 class_version; u8 method; diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 112099c86a19..f2a7f62c2834 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -618,7 +618,7 @@ static ssize_t ib_ucm_init_qp_attr(struct ib_ucm_file *file, if (result) goto out; - ib_copy_qp_attr_to_user(&resp, &qp_attr); + ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr); if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 276f0ef835bd..eb85b546e223 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -248,14 +248,15 @@ static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst, dst->qp_num = src->qp_num; } -static void ucma_copy_ud_event(struct rdma_ucm_ud_param *dst, +static void ucma_copy_ud_event(struct ib_device *device, + struct rdma_ucm_ud_param *dst, struct rdma_ud_param *src) { if (src->private_data_len) memcpy(dst->private_data, src->private_data, src->private_data_len); dst->private_data_len = src->private_data_len; - ib_copy_ah_attr_to_user(&dst->ah_attr, &src->ah_attr); + ib_copy_ah_attr_to_user(device, &dst->ah_attr, &src->ah_attr); dst->qp_num = src->qp_num; dst->qkey = src->qkey; } @@ -335,7 +336,8 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id, uevent->resp.event = event->event; uevent->resp.status = event->status; if (cm_id->qp_type == IB_QPT_UD) - ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud); + ucma_copy_ud_event(cm_id->device, &uevent->resp.param.ud, + &event->param.ud); else ucma_copy_conn_event(&uevent->resp.param.conn, &event->param.conn); @@ -1157,7 +1159,7 @@ static ssize_t ucma_init_qp_attr(struct ucma_file *file, if (ret) goto out; - ib_copy_qp_attr_to_user(&resp, &qp_attr); + ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr); if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) ret = -EFAULT; diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 36a6f5c8914c..ff3c67a7aaad 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -229,7 +229,7 @@ static void recv_handler(struct ib_mad_agent *agent, packet->mad.hdr.status = 0; packet->mad.hdr.length = hdr_size(file) + mad_recv_wc->mad_len; packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp); - packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid); + packet->mad.hdr.lid = ib_slid_be16(mad_recv_wc->wc->slid); packet->mad.hdr.sl = mad_recv_wc->wc->sl; packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits; packet->mad.hdr.pkey_index = mad_recv_wc->wc->pkey_index; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 60535c754db3..670176b670a0 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -275,8 +275,13 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, resp.bad_pkey_cntr = attr.bad_pkey_cntr; resp.qkey_viol_cntr = attr.qkey_viol_cntr; resp.pkey_tbl_len = attr.pkey_tbl_len; - resp.lid = attr.lid; - resp.sm_lid = attr.sm_lid; + if (rdma_cap_opa_ah(ib_dev, cmd.port_num)) { + resp.lid = OPA_TO_IB_UCAST_LID(attr.lid); + resp.sm_lid = OPA_TO_IB_UCAST_LID(attr.sm_lid); + } else { + resp.lid = (u16)attr.lid; + resp.sm_lid = (u16)attr.sm_lid; + } resp.lmc = attr.lmc; resp.max_vl_num = attr.max_vl_num; resp.sm_sl = attr.sm_sl; @@ -1185,7 +1190,8 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, return ret ? ret : in_len; } -static int copy_wc_to_user(void __user *dest, struct ib_wc *wc) +static int copy_wc_to_user(struct ib_device *ib_dev, void __user *dest, + struct ib_wc *wc) { struct ib_uverbs_wc tmp; @@ -1199,7 +1205,10 @@ static int copy_wc_to_user(void __user *dest, struct ib_wc *wc) tmp.src_qp = wc->src_qp; tmp.wc_flags = wc->wc_flags; tmp.pkey_index = wc->pkey_index; - tmp.slid = wc->slid; + if (rdma_cap_opa_ah(ib_dev, wc->port_num)) + tmp.slid = OPA_TO_IB_UCAST_LID(wc->slid); + else + tmp.slid = ib_slid_cpu16(wc->slid); tmp.sl = wc->sl; tmp.dlid_path_bits = wc->dlid_path_bits; tmp.port_num = wc->port_num; @@ -1243,7 +1252,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, if (!ret) break; - ret = copy_wc_to_user(data_ptr, &wc); + ret = copy_wc_to_user(ib_dev, data_ptr, &wc); if (ret) goto out_put; diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c index 94fd989c9060..bd0acf376af0 100644 --- a/drivers/infiniband/core/uverbs_marshall.c +++ b/drivers/infiniband/core/uverbs_marshall.c @@ -33,10 +33,47 @@ #include #include -void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst, - struct rdma_ah_attr *src) +#define OPA_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL) +static int rdma_ah_conv_opa_to_ib(struct ib_device *dev, + struct rdma_ah_attr *ib, + struct rdma_ah_attr *opa) { + struct ib_port_attr port_attr; + int ret = 0; + + /* Do structure copy and the over-write fields */ + *ib = *opa; + + ib->type = RDMA_AH_ATTR_TYPE_IB; + rdma_ah_set_grh(ib, NULL, 0, 0, 1, 0); + + if (ib_query_port(dev, opa->port_num, &port_attr)) { + /* Set to default subnet to indicate error */ + rdma_ah_set_subnet_prefix(ib, OPA_DEFAULT_GID_PREFIX); + ret = -EINVAL; + } else { + rdma_ah_set_subnet_prefix(ib, + cpu_to_be64(port_attr.subnet_prefix)); + } + rdma_ah_set_interface_id(ib, OPA_MAKE_ID(rdma_ah_get_dlid(opa))); + return ret; +} + +void ib_copy_ah_attr_to_user(struct ib_device *device, + struct ib_uverbs_ah_attr *dst, + struct rdma_ah_attr *ah_attr) +{ + struct rdma_ah_attr *src = ah_attr; + struct rdma_ah_attr conv_ah; + memset(&dst->grh.reserved, 0, sizeof(dst->grh.reserved)); + + if ((ah_attr->type == RDMA_AH_ATTR_TYPE_OPA) && + (rdma_ah_get_dlid(ah_attr) >= + be16_to_cpu(IB_MULTICAST_LID_BASE)) && + (!rdma_ah_conv_opa_to_ib(device, &conv_ah, ah_attr))) + src = &conv_ah; + dst->dlid = rdma_ah_get_dlid(src); dst->sl = rdma_ah_get_sl(src); dst->src_path_bits = rdma_ah_get_path_bits(src); @@ -57,7 +94,8 @@ void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst, } EXPORT_SYMBOL(ib_copy_ah_attr_to_user); -void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst, +void ib_copy_qp_attr_to_user(struct ib_device *device, + struct ib_uverbs_qp_attr *dst, struct ib_qp_attr *src) { dst->qp_state = src->qp_state; @@ -76,8 +114,8 @@ void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst, dst->max_recv_sge = src->cap.max_recv_sge; dst->max_inline_data = src->cap.max_inline_data; - ib_copy_ah_attr_to_user(&dst->ah_attr, &src->ah_attr); - ib_copy_ah_attr_to_user(&dst->alt_ah_attr, &src->alt_ah_attr); + ib_copy_ah_attr_to_user(device, &dst->ah_attr, &src->ah_attr); + ib_copy_ah_attr_to_user(device, &dst->alt_ah_attr, &src->alt_ah_attr); dst->pkey_index = src->pkey_index; dst->alt_pkey_index = src->alt_pkey_index; diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 0a3e2dfdf56e..8daa3a5f7e95 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -4216,7 +4216,7 @@ static int opa_local_smp_check(struct hfi1_ibport *ibp, const struct ib_wc *in_wc) { struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - u16 slid = in_wc->slid; + u16 slid = ib_slid_cpu16(in_wc->slid); u16 pkey; if (in_wc->pkey_index >= ARRAY_SIZE(ppd->pkeys)) diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index ea24230ea0d4..5a897b0106a9 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -528,7 +528,7 @@ static int set_guid_rec(struct ib_device *ibdev, memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec)); - guid_info_rec.lid = cpu_to_be16(attr.lid); + guid_info_rec.lid = cpu_to_be16((u16)attr.lid); guid_info_rec.block_num = index; memcpy(guid_info_rec.guid_info_list, rec_det->all_recs, diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 21d31cb1325f..04fb44e7699e 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -169,7 +169,7 @@ int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags, op_modifier |= 0x4; - in_modifier |= in_wc->slid << 16; + in_modifier |= ib_slid_cpu16(in_wc->slid) << 16; } err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma, in_modifier, @@ -625,7 +625,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, memcpy((char *)&tun_mad->hdr.slid_mac_47_32, &(wc->smac[4]), 2); } else { tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12); - tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid); + tun_mad->hdr.slid_mac_47_32 = ib_slid_be16(wc->slid); } ib_dma_sync_single_for_device(&dev->ib_dev, @@ -826,7 +826,7 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, } } - slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); + slid = in_wc ? ib_slid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE); if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) { forward_trap(to_mdev(ibdev), port_num, in_mad); @@ -860,7 +860,7 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, in_mad->mad_hdr.method == IB_MGMT_METHOD_SET && in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO && !ib_query_port(ibdev, port_num, &pattr)) - prev_lid = pattr.lid; + prev_lid = (u16)pattr.lid; err = mlx4_MAD_IFC(to_mdev(ibdev), (mad_flags & IB_MAD_IGNORE_MKEY ? MLX4_MAD_IFC_IGNORE_MKEY : 0) | diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 95db929bdc34..cd2264ac88ae 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -78,7 +78,7 @@ static int process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, u16 slid; int err; - slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); + slid = in_wc ? ib_slid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE); if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 9279631d8da0..483110ec3b80 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3816,6 +3816,14 @@ static void init_delay_drop(struct mlx5_ib_dev *dev) mlx5_ib_warn(dev, "Failed to init delay drop debugfs\n"); } +const struct cpumask *mlx5_ib_get_vector_affinity(struct ib_device *ibdev, + int comp_vector) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + + return mlx5_get_vector_affinity(dev->mdev, comp_vector); +} + static void *mlx5_ib_add(struct mlx5_core_dev *mdev) { struct mlx5_ib_dev *dev; @@ -3946,6 +3954,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; dev->ib_dev.get_port_immutable = mlx5_port_immutable; dev->ib_dev.get_dev_fw_str = get_dev_fw_str; + dev->ib_dev.get_vector_affinity = mlx5_ib_get_vector_affinity; if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads)) dev->ib_dev.alloc_rdma_netdev = mlx5_ib_alloc_rdma_netdev; diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index 9d83a53c0c67..e19ae0b9b439 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -1921,7 +1921,7 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, (in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0); MTHCA_PUT(inbox, val, MAD_IFC_G_PATH_OFFSET); - MTHCA_PUT(inbox, in_wc->slid, MAD_IFC_RLID_OFFSET); + MTHCA_PUT(inbox, ib_slid_cpu16(in_wc->slid), MAD_IFC_RLID_OFFSET); MTHCA_PUT(inbox, in_wc->pkey_index, MAD_IFC_PKEY_OFFSET); if (in_grh) @@ -1929,7 +1929,7 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, op_modifier |= 0x4; - in_modifier |= in_wc->slid << 16; + in_modifier |= ib_slid_cpu16(in_wc->slid) << 16; } err = mthca_cmd_box(dev, inmailbox->dma, outmailbox->dma, diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 7df3db71777a..a9caadab22cf 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -205,7 +205,7 @@ int mthca_process_mad(struct ib_device *ibdev, u16 *out_mad_pkey_index) { int err; - u16 slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); + u16 slid = in_wc ? ib_slid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE); u16 prev_lid = 0; struct ib_port_attr pattr; const struct ib_mad *in_mad = (const struct ib_mad *)in; @@ -256,7 +256,7 @@ int mthca_process_mad(struct ib_device *ibdev, in_mad->mad_hdr.method == IB_MGMT_METHOD_SET && in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO && !ib_query_port(ibdev, port_num, &pattr)) - prev_lid = pattr.lid; + prev_lid = (u16)pattr.lid; err = mthca_MAD_IFC(to_mdev(ibdev), mad_flags & IB_MAD_IGNORE_MKEY, diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 0ae2ff8cf81e..0335a3df74d5 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -107,7 +107,7 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited) wc->uqueue[head].src_qp = entry->src_qp; wc->uqueue[head].wc_flags = entry->wc_flags; wc->uqueue[head].pkey_index = entry->pkey_index; - wc->uqueue[head].slid = entry->slid; + wc->uqueue[head].slid = ib_slid_cpu16(entry->slid); wc->uqueue[head].sl = entry->sl; wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits; wc->uqueue[head].port_num = entry->port_num; diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index ff50a7bd66d8..9e738104c2a1 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -366,7 +366,7 @@ struct ipoib_dev_priv { u32 qkey; union ib_gid local_gid; - u16 local_lid; + u32 local_lid; unsigned int admin_mtu; unsigned int mcast_mtu; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index cc1183851af5..1b817e51b84b 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -328,8 +328,8 @@ struct srpt_port { u8 port_guid[24]; u8 port_gid[64]; u8 port; - u16 sm_lid; - u16 lid; + u32 sm_lid; + u32 lid; union ib_gid gid; struct work_struct work; struct se_portal_group port_guid_tpg; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index e1b7ddfecd01..909123243a85 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -587,7 +587,6 @@ struct mlx5e_channel { struct mlx5_core_dev *mdev; struct mlx5e_tstamp *tstamp; int ix; - int cpu; }; struct mlx5e_channels { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 1eac5003084f..fb647561c592 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -71,6 +71,11 @@ struct mlx5e_channel_param { struct mlx5e_cq_param icosq_cq; }; +static int mlx5e_get_node(struct mlx5e_priv *priv, int ix) +{ + return pci_irq_get_node(priv->mdev->pdev, MLX5_EQ_VEC_COMP_BASE + ix); +} + static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) { return MLX5_CAP_GEN(mdev, striding_rq) && @@ -397,7 +402,7 @@ static void mlx5e_enable_async_events(struct mlx5e_priv *priv) static void mlx5e_disable_async_events(struct mlx5e_priv *priv) { clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state); - synchronize_irq(mlx5_get_msix_vec(priv->mdev, MLX5_EQ_VEC_ASYNC)); + synchronize_irq(pci_irq_vector(priv->mdev->pdev, MLX5_EQ_VEC_ASYNC)); } static inline int mlx5e_get_wqe_mtt_sz(void) @@ -444,16 +449,17 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int wq_sz = mlx5_wq_ll_get_size(&rq->wq); int mtt_sz = mlx5e_get_wqe_mtt_sz(); int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1; + int node = mlx5e_get_node(c->priv, c->ix); int i; rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info), - GFP_KERNEL, cpu_to_node(c->cpu)); + GFP_KERNEL, node); if (!rq->mpwqe.info) goto err_out; /* We allocate more than mtt_sz as we will align the pointer */ - rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL, - cpu_to_node(c->cpu)); + rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, + GFP_KERNEL, node); if (unlikely(!rq->mpwqe.mtt_no_align)) goto err_free_wqe_info; @@ -561,7 +567,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, int err; int i; - rqp->wq.db_numa_node = cpu_to_node(c->cpu); + rqp->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->wq, &rq->wq_ctrl); @@ -628,7 +634,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, default: /* MLX5_WQ_TYPE_LINKED_LIST */ rq->wqe.frag_info = kzalloc_node(wq_sz * sizeof(*rq->wqe.frag_info), - GFP_KERNEL, cpu_to_node(c->cpu)); + GFP_KERNEL, + mlx5e_get_node(c->priv, c->ix)); if (!rq->wqe.frag_info) { err = -ENOMEM; goto err_rq_wq_destroy; @@ -993,13 +1000,13 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, sq->uar_map = mdev->mlx5e_res.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; - param->wq.db_numa_node = cpu_to_node(c->cpu); + param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); if (err) return err; sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; - err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu)); + err = mlx5e_alloc_xdpsq_db(sq, mlx5e_get_node(c->priv, c->ix)); if (err) goto err_sq_wq_destroy; @@ -1047,13 +1054,13 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c, sq->channel = c; sq->uar_map = mdev->mlx5e_res.bfreg.map; - param->wq.db_numa_node = cpu_to_node(c->cpu); + param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); if (err) return err; sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; - err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu)); + err = mlx5e_alloc_icosq_db(sq, mlx5e_get_node(c->priv, c->ix)); if (err) goto err_sq_wq_destroy; @@ -1119,13 +1126,13 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, if (MLX5_IPSEC_DEV(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); - param->wq.db_numa_node = cpu_to_node(c->cpu); + param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); if (err) return err; sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; - err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu)); + err = mlx5e_alloc_txqsq_db(sq, mlx5e_get_node(c->priv, c->ix)); if (err) goto err_sq_wq_destroy; @@ -1497,8 +1504,8 @@ static int mlx5e_alloc_cq(struct mlx5e_channel *c, struct mlx5_core_dev *mdev = c->priv->mdev; int err; - param->wq.buf_numa_node = cpu_to_node(c->cpu); - param->wq.db_numa_node = cpu_to_node(c->cpu); + param->wq.buf_numa_node = mlx5e_get_node(c->priv, c->ix); + param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); param->eq_ix = c->ix; err = mlx5e_alloc_cq_common(mdev, param, cq); @@ -1597,11 +1604,6 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq) mlx5e_free_cq(cq); } -static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix) -{ - return cpumask_first(priv->mdev->priv.irq_info[ix].mask); -} - static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_channel_param *cparam) @@ -1750,11 +1752,10 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, { struct mlx5e_cq_moder icocq_moder = {0, 0}; struct net_device *netdev = priv->netdev; - int cpu = mlx5e_get_cpu(priv, ix); struct mlx5e_channel *c; int err; - c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); + c = kzalloc_node(sizeof(*c), GFP_KERNEL, mlx5e_get_node(priv, ix)); if (!c) return -ENOMEM; @@ -1762,7 +1763,6 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->mdev = priv->mdev; c->tstamp = &priv->tstamp; c->ix = ix; - c->cpu = cpu; c->pdev = &priv->mdev->pdev->dev; c->netdev = priv->netdev; c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); @@ -1848,7 +1848,8 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c) for (tc = 0; tc < c->num_tc; tc++) mlx5e_activate_txqsq(&c->sq[tc]); mlx5e_activate_rq(&c->rq); - netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix); + netif_set_xps_queue(c->netdev, + mlx5_get_vector_affinity(c->priv->mdev, c->ix), c->ix); } static void mlx5e_deactivate_channel(struct mlx5e_channel *c) @@ -3793,18 +3794,8 @@ void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev, u32 *indirection_rqt, int len, int num_channels) { - int node = mdev->priv.numa_node; - int node_num_of_cores; int i; - if (node == -1) - node = first_online_node; - - node_num_of_cores = cpumask_weight(cpumask_of_node(node)); - - if (node_num_of_cores) - num_channels = min_t(int, num_channels, node_num_of_cores); - for (i = 0; i < len; i++) indirection_rqt[i] = i % num_channels; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 849417425811..261b3c628767 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -604,7 +604,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, name, pci_name(dev->pdev)); eq->eqn = MLX5_GET(create_eq_out, out, eq_number); - eq->irqn = priv->msix_arr[vecidx].vector; + eq->irqn = pci_irq_vector(dev->pdev, vecidx); eq->dev = dev; eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; err = request_irq(eq->irqn, handler, 0, @@ -639,7 +639,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, return 0; err_irq: - free_irq(priv->msix_arr[vecidx].vector, eq); + free_irq(eq->irqn, eq); err_eq: mlx5_cmd_destroy_eq(dev, eq->eqn); @@ -680,11 +680,6 @@ int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) } EXPORT_SYMBOL_GPL(mlx5_destroy_unmap_eq); -u32 mlx5_get_msix_vec(struct mlx5_core_dev *dev, int vecidx) -{ - return dev->priv.msix_arr[MLX5_EQ_VEC_ASYNC].vector; -} - int mlx5_eq_init(struct mlx5_core_dev *dev) { int err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 89bfda419efe..1ce2543e3889 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1585,7 +1585,7 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) /* Mark this vport as disabled to discard new events */ vport->enabled = false; - synchronize_irq(mlx5_get_msix_vec(esw->dev, MLX5_EQ_VEC_ASYNC)); + synchronize_irq(pci_irq_vector(esw->dev->pdev, MLX5_EQ_VEC_ASYNC)); /* Wait for current already scheduled events to complete */ flush_workqueue(esw->work_queue); /* Disable events from this vport */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 4b6b03d6297f..8aea0a065e56 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -81,7 +81,7 @@ static void trigger_cmd_completions(struct mlx5_core_dev *dev) u64 vector; /* wait for pending handlers to complete */ - synchronize_irq(dev->priv.msix_arr[MLX5_EQ_VEC_CMD].vector); + synchronize_irq(pci_irq_vector(dev->pdev, MLX5_EQ_VEC_CMD)); spin_lock_irqsave(&dev->cmd.alloc_lock, flags); vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1); if (!vector) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 3cec683fd70f..d8e761cd5448 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -313,13 +313,15 @@ static void release_bar(struct pci_dev *pdev) pci_release_regions(pdev); } -static int mlx5_enable_msix(struct mlx5_core_dev *dev) +static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; struct mlx5_eq_table *table = &priv->eq_table; + struct irq_affinity irqdesc = { + .pre_vectors = MLX5_EQ_VEC_COMP_BASE, + }; int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq); int nvec; - int i; nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + MLX5_EQ_VEC_COMP_BASE; @@ -327,17 +329,14 @@ static int mlx5_enable_msix(struct mlx5_core_dev *dev) if (nvec <= MLX5_EQ_VEC_COMP_BASE) return -ENOMEM; - priv->msix_arr = kcalloc(nvec, sizeof(*priv->msix_arr), GFP_KERNEL); - priv->irq_info = kcalloc(nvec, sizeof(*priv->irq_info), GFP_KERNEL); - if (!priv->msix_arr || !priv->irq_info) + if (!priv->irq_info) goto err_free_msix; - for (i = 0; i < nvec; i++) - priv->msix_arr[i].entry = i; - - nvec = pci_enable_msix_range(dev->pdev, priv->msix_arr, - MLX5_EQ_VEC_COMP_BASE + 1, nvec); + nvec = pci_alloc_irq_vectors_affinity(dev->pdev, + MLX5_EQ_VEC_COMP_BASE + 1, nvec, + PCI_IRQ_MSIX | PCI_IRQ_AFFINITY, + &irqdesc); if (nvec < 0) return nvec; @@ -347,17 +346,15 @@ static int mlx5_enable_msix(struct mlx5_core_dev *dev) err_free_msix: kfree(priv->irq_info); - kfree(priv->msix_arr); return -ENOMEM; } -static void mlx5_disable_msix(struct mlx5_core_dev *dev) +static void mlx5_free_irq_vectors(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; - pci_disable_msix(dev->pdev); + pci_free_irq_vectors(dev->pdev); kfree(priv->irq_info); - kfree(priv->msix_arr); } struct mlx5_reg_host_endianness { @@ -625,65 +622,6 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev) return (u64)timer_l | (u64)timer_h1 << 32; } -static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) -{ - struct mlx5_priv *priv = &mdev->priv; - struct msix_entry *msix = priv->msix_arr; - int irq = msix[i + MLX5_EQ_VEC_COMP_BASE].vector; - - if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) { - mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); - return -ENOMEM; - } - - cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), - priv->irq_info[i].mask); - - if (IS_ENABLED(CONFIG_SMP) && - irq_set_affinity_hint(irq, priv->irq_info[i].mask)) - mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq); - - return 0; -} - -static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i) -{ - struct mlx5_priv *priv = &mdev->priv; - struct msix_entry *msix = priv->msix_arr; - int irq = msix[i + MLX5_EQ_VEC_COMP_BASE].vector; - - irq_set_affinity_hint(irq, NULL); - free_cpumask_var(priv->irq_info[i].mask); -} - -static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev) -{ - int err; - int i; - - for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) { - err = mlx5_irq_set_affinity_hint(mdev, i); - if (err) - goto err_out; - } - - return 0; - -err_out: - for (i--; i >= 0; i--) - mlx5_irq_clear_affinity_hint(mdev, i); - - return err; -} - -static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev) -{ - int i; - - for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) - mlx5_irq_clear_affinity_hint(mdev, i); -} - int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, unsigned int *irqn) { @@ -773,8 +711,8 @@ static int alloc_comp_eqs(struct mlx5_core_dev *dev) } #ifdef CONFIG_RFS_ACCEL - irq_cpu_rmap_add(dev->rmap, - dev->priv.msix_arr[i + MLX5_EQ_VEC_COMP_BASE].vector); + irq_cpu_rmap_add(dev->rmap, pci_irq_vector(dev->pdev, + MLX5_EQ_VEC_COMP_BASE + i)); #endif snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); err = mlx5_create_map_eq(dev, eq, @@ -1132,9 +1070,9 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_stop_poll; } - err = mlx5_enable_msix(dev); + err = mlx5_alloc_irq_vectors(dev); if (err) { - dev_err(&pdev->dev, "enable msix failed\n"); + dev_err(&pdev->dev, "alloc irq vectors failed\n"); goto err_cleanup_once; } @@ -1156,12 +1094,6 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_stop_eqs; } - err = mlx5_irq_set_affinity_hints(dev); - if (err) { - dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n"); - goto err_affinity_hints; - } - err = mlx5_init_fs(dev); if (err) { dev_err(&pdev->dev, "Failed to init flow steering\n"); @@ -1227,9 +1159,6 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, mlx5_cleanup_fs(dev); err_fs: - mlx5_irq_clear_affinity_hints(dev); - -err_affinity_hints: free_comp_eqs(dev); err_stop_eqs: @@ -1239,7 +1168,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, mlx5_put_uars_page(dev, priv->uar); err_disable_msix: - mlx5_disable_msix(dev); + mlx5_free_irq_vectors(dev); err_cleanup_once: if (boot) @@ -1302,11 +1231,10 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, mlx5_eswitch_detach(dev->priv.eswitch); #endif mlx5_cleanup_fs(dev); - mlx5_irq_clear_affinity_hints(dev); free_comp_eqs(dev); mlx5_stop_eqs(dev); mlx5_put_uars_page(dev, priv->uar); - mlx5_disable_msix(dev); + mlx5_free_irq_vectors(dev); if (cleanup) mlx5_cleanup_once(dev); mlx5_stop_health_poll(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 6a3d6bef7dd4..ba1d494b016d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -110,7 +110,6 @@ int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, u32 element_id); int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev); -u32 mlx5_get_msix_vec(struct mlx5_core_dev *dev, int vecidx); struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn); void mlx5_cq_tasklet_cb(unsigned long data); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index da04df1af231..4e25acc54e09 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -463,14 +464,10 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) ibdev = queue->device->dev; /* - * The admin queue is barely used once the controller is live, so don't - * bother to spread it out. + * Spread I/O queues completion vectors according their queue index. + * Admin queues can always go on completion vector 0. */ - if (idx == 0) - comp_vector = 0; - else - comp_vector = idx % ibdev->num_comp_vectors; - + comp_vector = idx == 0 ? idx : idx - 1; /* +1 for ib_stop_cq */ queue->ib_cq = ib_alloc_cq(ibdev, queue, @@ -611,10 +608,20 @@ static int nvme_rdma_connect_io_queues(struct nvme_rdma_ctrl *ctrl) static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl) { struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; + struct ib_device *ibdev = ctrl->device->dev; unsigned int nr_io_queues; int i, ret; nr_io_queues = min(opts->nr_io_queues, num_online_cpus()); + + /* + * we map queues according to the device irq vectors for + * optimal locality so we don't need more queues than + * completion vectors. + */ + nr_io_queues = min_t(unsigned int, nr_io_queues, + ibdev->num_comp_vectors); + ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); if (ret) return ret; @@ -1498,6 +1505,13 @@ static void nvme_rdma_complete_rq(struct request *rq) nvme_complete_rq(rq); } +static int nvme_rdma_map_queues(struct blk_mq_tag_set *set) +{ + struct nvme_rdma_ctrl *ctrl = set->driver_data; + + return blk_mq_rdma_map_queues(set, ctrl->device->dev, 0); +} + static const struct blk_mq_ops nvme_rdma_mq_ops = { .queue_rq = nvme_rdma_queue_rq, .complete = nvme_rdma_complete_rq, @@ -1507,6 +1521,7 @@ static const struct blk_mq_ops nvme_rdma_mq_ops = { .init_hctx = nvme_rdma_init_hctx, .poll = nvme_rdma_poll, .timeout = nvme_rdma_timeout, + .map_queues = nvme_rdma_map_queues, }; static const struct blk_mq_ops nvme_rdma_admin_mq_ops = { diff --git a/include/linux/blk-mq-rdma.h b/include/linux/blk-mq-rdma.h new file mode 100644 index 000000000000..b4ade198007d --- /dev/null +++ b/include/linux/blk-mq-rdma.h @@ -0,0 +1,10 @@ +#ifndef _LINUX_BLK_MQ_RDMA_H +#define _LINUX_BLK_MQ_RDMA_H + +struct blk_mq_tag_set; +struct ib_device; + +int blk_mq_rdma_map_queues(struct blk_mq_tag_set *set, + struct ib_device *dev, int first_vec); + +#endif /* _LINUX_BLK_MQ_RDMA_H */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 758ef40f9316..099fe311f272 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -545,7 +545,6 @@ struct mlx5_core_sriov { }; struct mlx5_irq_info { - cpumask_var_t mask; char name[MLX5_MAX_IRQ_NAME]; }; @@ -608,7 +607,6 @@ struct mlx5_port_module_event_stats { struct mlx5_priv { char name[MLX5_MAX_NAME_LEN]; struct mlx5_eq_table eq_table; - struct msix_entry *msix_arr; struct mlx5_irq_info *irq_info; /* pages stuff */ @@ -1189,4 +1187,10 @@ enum { MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32, }; +static inline const struct cpumask * +mlx5_get_vector_affinity(struct mlx5_core_dev *dev, int vector) +{ + return pci_irq_get_affinity(dev->pdev, MLX5_EQ_VEC_COMP_BASE + vector); +} + #endif /* MLX5_DRIVER_H */ diff --git a/include/rdma/ib_marshall.h b/include/rdma/ib_marshall.h index 68cef3bd50fb..8ebf84ae9ed1 100644 --- a/include/rdma/ib_marshall.h +++ b/include/rdma/ib_marshall.h @@ -38,10 +38,12 @@ #include #include -void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst, +void ib_copy_qp_attr_to_user(struct ib_device *device, + struct ib_uverbs_qp_attr *dst, struct ib_qp_attr *src); -void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst, +void ib_copy_ah_attr_to_user(struct ib_device *device, + struct ib_uverbs_ah_attr *dst, struct rdma_ah_attr *src); void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst, diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 1082b4c81b2c..5ca3ac1e9113 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -549,8 +549,8 @@ struct ib_port_attr { u32 bad_pkey_cntr; u32 qkey_viol_cntr; u16 pkey_tbl_len; - u16 lid; - u16 sm_lid; + u32 sm_lid; + u32 lid; u8 lmc; u8 max_vl_num; u8 sm_sl; @@ -951,7 +951,7 @@ struct ib_wc { u32 src_qp; int wc_flags; u16 pkey_index; - u16 slid; + u32 slid; u8 sl; u8 dlid_path_bits; u8 port_num; /* valid only for DR SMPs on switches */ @@ -2306,6 +2306,8 @@ struct ib_device { */ int (*get_port_immutable)(struct ib_device *, u8, struct ib_port_immutable *); void (*get_dev_fw_str)(struct ib_device *, char *str, size_t str_len); + const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev, + int comp_vector); }; struct ib_client { @@ -3717,4 +3719,38 @@ static inline enum rdma_ah_attr_type rdma_ah_find_type(struct ib_device *dev, else return RDMA_AH_ATTR_TYPE_IB; } + +/* Return slid in 16bit CPU encoding */ +static inline u16 ib_slid_cpu16(u32 slid) +{ + return (u16)slid; +} + +/* Return slid in 16bit BE encoding */ +static inline u16 ib_slid_be16(u32 slid) +{ + return cpu_to_be16((u16)slid); +} + +/** + * ib_get_vector_affinity - Get the affinity mappings of a given completion + * vector + * @device: the rdma device + * @comp_vector: index of completion vector + * + * Returns NULL on failure, otherwise a corresponding cpu map of the + * completion vector (returns all-cpus map if the device driver doesn't + * implement get_vector_affinity). + */ +static inline const struct cpumask * +ib_get_vector_affinity(struct ib_device *device, int comp_vector) +{ + if (comp_vector < 0 || comp_vector >= device->num_comp_vectors || + !device->get_vector_affinity) + return NULL; + + return device->get_vector_affinity(device, comp_vector); + +} + #endif /* IB_VERBS_H */ diff --git a/include/rdma/opa_addr.h b/include/rdma/opa_addr.h index eace28f1555d..9b5e642cf550 100644 --- a/include/rdma/opa_addr.h +++ b/include/rdma/opa_addr.h @@ -50,7 +50,8 @@ #define OPA_SPECIAL_OUI (0x00066AULL) #define OPA_MAKE_ID(x) (cpu_to_be64(OPA_SPECIAL_OUI << 40 | (x))) - +#define OPA_TO_IB_UCAST_LID(x) (((x) >= be16_to_cpu(IB_MULTICAST_LID_BASE)) \ + ? 0 : x) /** * ib_is_opa_gid: Returns true if the top 24 bits of the gid * contains the OPA_STL_OUI identifier. This identifies that @@ -76,4 +77,22 @@ static inline u32 opa_get_lid_from_gid(union ib_gid *gid) { return be64_to_cpu(gid->global.interface_id) & 0xFFFFFFFF; } + +/** + * opa_is_extended_lid: Returns true if dlid or slid are + * extended. + * + * @dlid: The DLID + * @slid: The SLID + */ +static inline bool opa_is_extended_lid(u32 dlid, u32 slid) +{ + if ((be32_to_cpu(dlid) >= + be16_to_cpu(IB_MULTICAST_LID_BASE)) || + (be32_to_cpu(slid) >= + be16_to_cpu(IB_MULTICAST_LID_BASE))) + return true; + else + return false; +} #endif /* OPA_ADDR_H */