linux_dsm_epyc7002/drivers/infiniband/hw/mlx5/main.c
Linus Torvalds b8ba452683 Round two of 4.6 merge window patches
- A few minor core fixups needed for the next patch series
 - The IB SRIOV series.  This has bounced around for several versions.
   Of note is the fact that the first patch in this series effects
   the net core.  It was directed to netdev and DaveM for each iteration
   of the series (three versions total).  Dave did not object, but did
   not respond either.  I've taken this as permission to move forward
   with the series.
 - The new Intel X722 iWARP driver
 - A huge set of updates to the Intel hfi1 driver.  Of particular interest
   here is that we have left the driver in staging since it still has an
   API that people object to.  Intel is working on a fix, but getting
   these patches in now helps keep me sane as the upstream and Intel's
   trees were over 300 patches apart.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJW8HR9AAoJELgmozMOVy/dDYMP+wSBALhIdV/pqVzdLCGfIUbK
 H5agonm/3b/Oj74W30w2JYqXBFfZC2LGVJy6OwocJ3wK04v/KfZbA9G+QsOuh2hQ
 Db+tFn1eoltvzrcx3k/a7x6zHGC4YyxyH9OX2B3QfRsNHeE7PG9KGp5dfEs2OH1r
 WGp3jMLAsHf7o8uKpa0jyTEUEErATaTlG+YoaJ+BGHwurgCNy8ni+wAn+EAFiJ3w
 iEJhcXB6KY69vkLsrLYuT9xxJn4udFJ3QEk8xdPkpLKsu+6Ue5i/eNQ19VfbpZgR
 c6fTc8genfIv5S+fis+0P44u1oA7Kl2JT6IZYLi35gJ60ZmxTD+7GruWP3xX/wJ2
 zuR3sTj5fjcFWenk087RSIU/EK87ONPD4g9QPdZpf3FtgleTVKk3YDlqwjqf8pgv
 cO6gQ1BcOBnixJvhjNFiX1c2hvNhb3CkgObly1JBwhcCzZhLkV7BNFPbZuDHAeAx
 VqzNEUse4hupkgiiuiGgudcJ4fsSxMW37kyfX9QC/qyk6YVuUDbrekcWI+MAKot7
 5e5dHqFExpbn1Zgvc8yfvh88H2MUQAgaYwjanWF/qpppOPRd01nTisVQIOJn7s5C
 arcWzvocpQe0GL2UsvDoWwAABXznL3bnnAoCyTWOES2RhOOcw0Ibw46Jl8FQ8gnl
 2IRxQ+ltNEscb2cwi5wE
 =t2Ko
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma

Pull more rdma updates from Doug Ledford:
 "Round two of 4.6 merge window patches.

  This is a monster pull request.  I held off on the hfi1 driver updates
  (the hfi1 driver is intimately tied to the qib driver and the new
  rdmavt software library that was created to help both of them) in my
  first pull request.  The hfi1/qib/rdmavt update is probably 90% of
  this pull request.  The hfi1 driver is being left in staging so that
  it can be fixed up in regards to the API that Al and yourself didn't
  like.  Intel has agreed to do the work, but in the meantime, this
  clears out 300+ patches in the backlog queue and brings my tree and
  their tree closer to sync.

  This also includes about 10 patches to the core and a few to mlx5 to
  create an infrastructure for configuring SRIOV ports on IB devices.
  That series includes one patch to the net core that we sent to netdev@
  and Dave Miller with each of the three revisions to the series.  We
  didn't get any response to the patch, so we took that as implicit
  approval.

  Finally, this series includes Intel's new iWARP driver for their x722
  cards.  It's not nearly the beast as the hfi1 driver.  It also has a
  linux-next merge issue, but that has been resolved and it now passes
  just fine.

  Summary:

   - A few minor core fixups needed for the next patch series

   - The IB SRIOV series.  This has bounced around for several versions.
     Of note is the fact that the first patch in this series effects the
     net core.  It was directed to netdev and DaveM for each iteration
     of the series (three versions total).  Dave did not object, but did
     not respond either.  I've taken this as permission to move forward
     with the series.

   - The new Intel X722 iWARP driver

   - A huge set of updates to the Intel hfi1 driver.  Of particular
     interest here is that we have left the driver in staging since it
     still has an API that people object to.  Intel is working on a fix,
     but getting these patches in now helps keep me sane as the upstream
     and Intel's trees were over 300 patches apart"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (362 commits)
  IB/ipoib: Allow mcast packets from other VFs
  IB/mlx5: Implement callbacks for manipulating VFs
  net/mlx5_core: Implement modify HCA vport command
  net/mlx5_core: Add VF param when querying vport counter
  IB/ipoib: Add ndo operations for configuring VFs
  IB/core: Add interfaces to control VF attributes
  IB/core: Support accessing SA in virtualized environment
  IB/core: Add subnet prefix to port info
  IB/mlx5: Fix decision on using MAD_IFC
  net/core: Add support for configuring VF GUIDs
  IB/{core, ulp} Support above 32 possible device capability flags
  IB/core: Replace setting the zero values in ib_uverbs_ex_query_device
  net/mlx5_core: Introduce offload arithmetic hardware capabilities
  net/mlx5_core: Refactor device capability function
  net/mlx5_core: Fix caching ATOMIC endian mode capability
  ib_srpt: fix a WARN_ON() message
  i40iw: Replace the obsolete crypto hash interface with shash
  IB/hfi1: Add SDMA cache eviction algorithm
  IB/hfi1: Switch to using the pin query function
  IB/hfi1: Specify mm when releasing pages
  ...
2016-03-22 15:48:44 -07:00

2503 lines
66 KiB
C

/*
* Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/highmem.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/pci.h>
#include <linux/dma-mapping.h>
#include <linux/slab.h>
#include <linux/io-mapping.h>
#include <linux/sched.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>
#include <linux/mlx5/port.h>
#include <linux/mlx5/vport.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
#include <linux/in.h>
#include <linux/etherdevice.h>
#include <linux/mlx5/fs.h>
#include "user.h"
#include "mlx5_ib.h"
#define DRIVER_NAME "mlx5_ib"
#define DRIVER_VERSION "2.2-1"
#define DRIVER_RELDATE "Feb 2014"
MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(DRIVER_VERSION);
static int deprecated_prof_sel = 2;
module_param_named(prof_sel, deprecated_prof_sel, int, 0444);
MODULE_PARM_DESC(prof_sel, "profile selector. Deprecated here. Moved to module mlx5_core");
static char mlx5_version[] =
DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
enum {
MLX5_ATOMIC_SIZE_QP_8BYTES = 1 << 3,
};
static enum rdma_link_layer
mlx5_port_type_cap_to_rdma_ll(int port_type_cap)
{
switch (port_type_cap) {
case MLX5_CAP_PORT_TYPE_IB:
return IB_LINK_LAYER_INFINIBAND;
case MLX5_CAP_PORT_TYPE_ETH:
return IB_LINK_LAYER_ETHERNET;
default:
return IB_LINK_LAYER_UNSPECIFIED;
}
}
static enum rdma_link_layer
mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
{
struct mlx5_ib_dev *dev = to_mdev(device);
int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type);
return mlx5_port_type_cap_to_rdma_ll(port_type_cap);
}
static int mlx5_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
struct mlx5_ib_dev *ibdev = container_of(this, struct mlx5_ib_dev,
roce.nb);
if ((event != NETDEV_UNREGISTER) && (event != NETDEV_REGISTER))
return NOTIFY_DONE;
write_lock(&ibdev->roce.netdev_lock);
if (ndev->dev.parent == &ibdev->mdev->pdev->dev)
ibdev->roce.netdev = (event == NETDEV_UNREGISTER) ? NULL : ndev;
write_unlock(&ibdev->roce.netdev_lock);
return NOTIFY_DONE;
}
static struct net_device *mlx5_ib_get_netdev(struct ib_device *device,
u8 port_num)
{
struct mlx5_ib_dev *ibdev = to_mdev(device);
struct net_device *ndev;
/* Ensure ndev does not disappear before we invoke dev_hold()
*/
read_lock(&ibdev->roce.netdev_lock);
ndev = ibdev->roce.netdev;
if (ndev)
dev_hold(ndev);
read_unlock(&ibdev->roce.netdev_lock);
return ndev;
}
static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
struct ib_port_attr *props)
{
struct mlx5_ib_dev *dev = to_mdev(device);
struct net_device *ndev;
enum ib_mtu ndev_ib_mtu;
u16 qkey_viol_cntr;
memset(props, 0, sizeof(*props));
props->port_cap_flags |= IB_PORT_CM_SUP;
props->port_cap_flags |= IB_PORT_IP_BASED_GIDS;
props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev,
roce_address_table_size);
props->max_mtu = IB_MTU_4096;
props->max_msg_sz = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg);
props->pkey_tbl_len = 1;
props->state = IB_PORT_DOWN;
props->phys_state = 3;
mlx5_query_nic_vport_qkey_viol_cntr(dev->mdev, &qkey_viol_cntr);
props->qkey_viol_cntr = qkey_viol_cntr;
ndev = mlx5_ib_get_netdev(device, port_num);
if (!ndev)
return 0;
if (netif_running(ndev) && netif_carrier_ok(ndev)) {
props->state = IB_PORT_ACTIVE;
props->phys_state = 5;
}
ndev_ib_mtu = iboe_get_mtu(ndev->mtu);
dev_put(ndev);
props->active_mtu = min(props->max_mtu, ndev_ib_mtu);
props->active_width = IB_WIDTH_4X; /* TODO */
props->active_speed = IB_SPEED_QDR; /* TODO */
return 0;
}
static void ib_gid_to_mlx5_roce_addr(const union ib_gid *gid,
const struct ib_gid_attr *attr,
void *mlx5_addr)
{
#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v)
char *mlx5_addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
source_l3_address);
void *mlx5_addr_mac = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
source_mac_47_32);
if (!gid)
return;
ether_addr_copy(mlx5_addr_mac, attr->ndev->dev_addr);
if (is_vlan_dev(attr->ndev)) {
MLX5_SET_RA(mlx5_addr, vlan_valid, 1);
MLX5_SET_RA(mlx5_addr, vlan_id, vlan_dev_vlan_id(attr->ndev));
}
switch (attr->gid_type) {
case IB_GID_TYPE_IB:
MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_1);
break;
case IB_GID_TYPE_ROCE_UDP_ENCAP:
MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_2);
break;
default:
WARN_ON(true);
}
if (attr->gid_type != IB_GID_TYPE_IB) {
if (ipv6_addr_v4mapped((void *)gid))
MLX5_SET_RA(mlx5_addr, roce_l3_type,
MLX5_ROCE_L3_TYPE_IPV4);
else
MLX5_SET_RA(mlx5_addr, roce_l3_type,
MLX5_ROCE_L3_TYPE_IPV6);
}
if ((attr->gid_type == IB_GID_TYPE_IB) ||
!ipv6_addr_v4mapped((void *)gid))
memcpy(mlx5_addr_l3_addr, gid, sizeof(*gid));
else
memcpy(&mlx5_addr_l3_addr[12], &gid->raw[12], 4);
}
static int set_roce_addr(struct ib_device *device, u8 port_num,
unsigned int index,
const union ib_gid *gid,
const struct ib_gid_attr *attr)
{
struct mlx5_ib_dev *dev = to_mdev(device);
u32 in[MLX5_ST_SZ_DW(set_roce_address_in)];
u32 out[MLX5_ST_SZ_DW(set_roce_address_out)];
void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num);
if (ll != IB_LINK_LAYER_ETHERNET)
return -EINVAL;
memset(in, 0, sizeof(in));
ib_gid_to_mlx5_roce_addr(gid, attr, in_addr);
MLX5_SET(set_roce_address_in, in, roce_address_index, index);
MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
memset(out, 0, sizeof(out));
return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
}
static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num,
unsigned int index, const union ib_gid *gid,
const struct ib_gid_attr *attr,
__always_unused void **context)
{
return set_roce_addr(device, port_num, index, gid, attr);
}
static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num,
unsigned int index, __always_unused void **context)
{
return set_roce_addr(device, port_num, index, NULL, NULL);
}
__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
int index)
{
struct ib_gid_attr attr;
union ib_gid gid;
if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr))
return 0;
if (!attr.ndev)
return 0;
dev_put(attr.ndev);
if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
return 0;
return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port));
}
static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
{
return !MLX5_CAP_GEN(dev->mdev, ib_virt);
}
enum {
MLX5_VPORT_ACCESS_METHOD_MAD,
MLX5_VPORT_ACCESS_METHOD_HCA,
MLX5_VPORT_ACCESS_METHOD_NIC,
};
static int mlx5_get_vport_access_method(struct ib_device *ibdev)
{
if (mlx5_use_mad_ifc(to_mdev(ibdev)))
return MLX5_VPORT_ACCESS_METHOD_MAD;
if (mlx5_ib_port_link_layer(ibdev, 1) ==
IB_LINK_LAYER_ETHERNET)
return MLX5_VPORT_ACCESS_METHOD_NIC;
return MLX5_VPORT_ACCESS_METHOD_HCA;
}
static void get_atomic_caps(struct mlx5_ib_dev *dev,
struct ib_device_attr *props)
{
u8 tmp;
u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
u8 atomic_req_8B_endianness_mode =
MLX5_CAP_ATOMIC(dev->mdev, atomic_req_8B_endianess_mode);
/* Check if HW supports 8 bytes standard atomic operations and capable
* of host endianness respond
*/
tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD;
if (((atomic_operations & tmp) == tmp) &&
(atomic_size_qp & MLX5_ATOMIC_SIZE_QP_8BYTES) &&
(atomic_req_8B_endianness_mode)) {
props->atomic_cap = IB_ATOMIC_HCA;
} else {
props->atomic_cap = IB_ATOMIC_NONE;
}
}
static int mlx5_query_system_image_guid(struct ib_device *ibdev,
__be64 *sys_image_guid)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_core_dev *mdev = dev->mdev;
u64 tmp;
int err;
switch (mlx5_get_vport_access_method(ibdev)) {
case MLX5_VPORT_ACCESS_METHOD_MAD:
return mlx5_query_mad_ifc_system_image_guid(ibdev,
sys_image_guid);
case MLX5_VPORT_ACCESS_METHOD_HCA:
err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
break;
case MLX5_VPORT_ACCESS_METHOD_NIC:
err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp);
break;
default:
return -EINVAL;
}
if (!err)
*sys_image_guid = cpu_to_be64(tmp);
return err;
}
static int mlx5_query_max_pkeys(struct ib_device *ibdev,
u16 *max_pkeys)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_core_dev *mdev = dev->mdev;
switch (mlx5_get_vport_access_method(ibdev)) {
case MLX5_VPORT_ACCESS_METHOD_MAD:
return mlx5_query_mad_ifc_max_pkeys(ibdev, max_pkeys);
case MLX5_VPORT_ACCESS_METHOD_HCA:
case MLX5_VPORT_ACCESS_METHOD_NIC:
*max_pkeys = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev,
pkey_table_size));
return 0;
default:
return -EINVAL;
}
}
static int mlx5_query_vendor_id(struct ib_device *ibdev,
u32 *vendor_id)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
switch (mlx5_get_vport_access_method(ibdev)) {
case MLX5_VPORT_ACCESS_METHOD_MAD:
return mlx5_query_mad_ifc_vendor_id(ibdev, vendor_id);
case MLX5_VPORT_ACCESS_METHOD_HCA:
case MLX5_VPORT_ACCESS_METHOD_NIC:
return mlx5_core_query_vendor_id(dev->mdev, vendor_id);
default:
return -EINVAL;
}
}
static int mlx5_query_node_guid(struct mlx5_ib_dev *dev,
__be64 *node_guid)
{
u64 tmp;
int err;
switch (mlx5_get_vport_access_method(&dev->ib_dev)) {
case MLX5_VPORT_ACCESS_METHOD_MAD:
return mlx5_query_mad_ifc_node_guid(dev, node_guid);
case MLX5_VPORT_ACCESS_METHOD_HCA:
err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp);
break;
case MLX5_VPORT_ACCESS_METHOD_NIC:
err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp);
break;
default:
return -EINVAL;
}
if (!err)
*node_guid = cpu_to_be64(tmp);
return err;
}
struct mlx5_reg_node_desc {
u8 desc[64];
};
static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc)
{
struct mlx5_reg_node_desc in;
if (mlx5_use_mad_ifc(dev))
return mlx5_query_mad_ifc_node_desc(dev, node_desc);
memset(&in, 0, sizeof(in));
return mlx5_core_access_reg(dev->mdev, &in, sizeof(in), node_desc,
sizeof(struct mlx5_reg_node_desc),
MLX5_REG_NODE_DESC, 0, 0);
}
static int mlx5_ib_query_device(struct ib_device *ibdev,
struct ib_device_attr *props,
struct ib_udata *uhw)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_core_dev *mdev = dev->mdev;
int err = -ENOMEM;
int max_rq_sg;
int max_sq_sg;
u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz);
if (uhw->inlen || uhw->outlen)
return -EINVAL;
memset(props, 0, sizeof(*props));
err = mlx5_query_system_image_guid(ibdev,
&props->sys_image_guid);
if (err)
return err;
err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys);
if (err)
return err;
err = mlx5_query_vendor_id(ibdev, &props->vendor_id);
if (err)
return err;
props->fw_ver = ((u64)fw_rev_maj(dev->mdev) << 32) |
(fw_rev_min(dev->mdev) << 16) |
fw_rev_sub(dev->mdev);
props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
IB_DEVICE_PORT_ACTIVE_EVENT |
IB_DEVICE_SYS_IMAGE_GUID |
IB_DEVICE_RC_RNR_NAK_GEN;
if (MLX5_CAP_GEN(mdev, pkv))
props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
if (MLX5_CAP_GEN(mdev, qkv))
props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
if (MLX5_CAP_GEN(mdev, apm))
props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
if (MLX5_CAP_GEN(mdev, xrc))
props->device_cap_flags |= IB_DEVICE_XRC;
if (MLX5_CAP_GEN(mdev, imaicl)) {
props->device_cap_flags |= IB_DEVICE_MEM_WINDOW |
IB_DEVICE_MEM_WINDOW_TYPE_2B;
props->max_mw = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
/* We support 'Gappy' memory registration too */
props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG;
}
props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
if (MLX5_CAP_GEN(mdev, sho)) {
props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER;
/* At this stage no support for signature handover */
props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 |
IB_PROT_T10DIF_TYPE_2 |
IB_PROT_T10DIF_TYPE_3;
props->sig_guard_cap = IB_GUARD_T10DIF_CRC |
IB_GUARD_T10DIF_CSUM;
}
if (MLX5_CAP_GEN(mdev, block_lb_mc))
props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
(MLX5_CAP_ETH(dev->mdev, csum_cap)))
props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) {
props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
props->device_cap_flags |= IB_DEVICE_UD_TSO;
}
props->vendor_part_id = mdev->pdev->device;
props->hw_ver = mdev->pdev->revision;
props->max_mr_size = ~0ull;
props->page_size_cap = ~(min_page_size - 1);
props->max_qp = 1 << MLX5_CAP_GEN(mdev, log_max_qp);
props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
max_rq_sg = MLX5_CAP_GEN(mdev, max_wqe_sz_rq) /
sizeof(struct mlx5_wqe_data_seg);
max_sq_sg = (MLX5_CAP_GEN(mdev, max_wqe_sz_sq) -
sizeof(struct mlx5_wqe_ctrl_seg)) /
sizeof(struct mlx5_wqe_data_seg);
props->max_sge = min(max_rq_sg, max_sq_sg);
props->max_sge_rd = props->max_sge;
props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
props->max_pd = 1 << MLX5_CAP_GEN(mdev, log_max_pd);
props->max_qp_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp);
props->max_qp_init_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_res_qp);
props->max_srq = 1 << MLX5_CAP_GEN(mdev, log_max_srq);
props->max_srq_wr = (1 << MLX5_CAP_GEN(mdev, log_max_srq_sz)) - 1;
props->local_ca_ack_delay = MLX5_CAP_GEN(mdev, local_ca_ack_delay);
props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
props->max_srq_sge = max_rq_sg - 1;
props->max_fast_reg_page_list_len =
1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size);
get_atomic_caps(dev, props);
props->masked_atomic_cap = IB_ATOMIC_NONE;
props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg);
props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
props->max_mcast_grp;
props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz);
props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
if (MLX5_CAP_GEN(mdev, pg))
props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING;
props->odp_caps = dev->odp_caps;
#endif
if (MLX5_CAP_GEN(mdev, cd))
props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
if (!mlx5_core_is_pf(mdev))
props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION;
return 0;
}
enum mlx5_ib_width {
MLX5_IB_WIDTH_1X = 1 << 0,
MLX5_IB_WIDTH_2X = 1 << 1,
MLX5_IB_WIDTH_4X = 1 << 2,
MLX5_IB_WIDTH_8X = 1 << 3,
MLX5_IB_WIDTH_12X = 1 << 4
};
static int translate_active_width(struct ib_device *ibdev, u8 active_width,
u8 *ib_width)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
int err = 0;
if (active_width & MLX5_IB_WIDTH_1X) {
*ib_width = IB_WIDTH_1X;
} else if (active_width & MLX5_IB_WIDTH_2X) {
mlx5_ib_dbg(dev, "active_width %d is not supported by IB spec\n",
(int)active_width);
err = -EINVAL;
} else if (active_width & MLX5_IB_WIDTH_4X) {
*ib_width = IB_WIDTH_4X;
} else if (active_width & MLX5_IB_WIDTH_8X) {
*ib_width = IB_WIDTH_8X;
} else if (active_width & MLX5_IB_WIDTH_12X) {
*ib_width = IB_WIDTH_12X;
} else {
mlx5_ib_dbg(dev, "Invalid active_width %d\n",
(int)active_width);
err = -EINVAL;
}
return err;
}
static int mlx5_mtu_to_ib_mtu(int mtu)
{
switch (mtu) {
case 256: return 1;
case 512: return 2;
case 1024: return 3;
case 2048: return 4;
case 4096: return 5;
default:
pr_warn("invalid mtu\n");
return -1;
}
}
enum ib_max_vl_num {
__IB_MAX_VL_0 = 1,
__IB_MAX_VL_0_1 = 2,
__IB_MAX_VL_0_3 = 3,
__IB_MAX_VL_0_7 = 4,
__IB_MAX_VL_0_14 = 5,
};
enum mlx5_vl_hw_cap {
MLX5_VL_HW_0 = 1,
MLX5_VL_HW_0_1 = 2,
MLX5_VL_HW_0_2 = 3,
MLX5_VL_HW_0_3 = 4,
MLX5_VL_HW_0_4 = 5,
MLX5_VL_HW_0_5 = 6,
MLX5_VL_HW_0_6 = 7,
MLX5_VL_HW_0_7 = 8,
MLX5_VL_HW_0_14 = 15
};
static int translate_max_vl_num(struct ib_device *ibdev, u8 vl_hw_cap,
u8 *max_vl_num)
{
switch (vl_hw_cap) {
case MLX5_VL_HW_0:
*max_vl_num = __IB_MAX_VL_0;
break;
case MLX5_VL_HW_0_1:
*max_vl_num = __IB_MAX_VL_0_1;
break;
case MLX5_VL_HW_0_3:
*max_vl_num = __IB_MAX_VL_0_3;
break;
case MLX5_VL_HW_0_7:
*max_vl_num = __IB_MAX_VL_0_7;
break;
case MLX5_VL_HW_0_14:
*max_vl_num = __IB_MAX_VL_0_14;
break;
default:
return -EINVAL;
}
return 0;
}
static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_hca_vport_context *rep;
int max_mtu;
int oper_mtu;
int err;
u8 ib_link_width_oper;
u8 vl_hw_cap;
rep = kzalloc(sizeof(*rep), GFP_KERNEL);
if (!rep) {
err = -ENOMEM;
goto out;
}
memset(props, 0, sizeof(*props));
err = mlx5_query_hca_vport_context(mdev, 0, port, 0, rep);
if (err)
goto out;
props->lid = rep->lid;
props->lmc = rep->lmc;
props->sm_lid = rep->sm_lid;
props->sm_sl = rep->sm_sl;
props->state = rep->vport_state;
props->phys_state = rep->port_physical_state;
props->port_cap_flags = rep->cap_mask1;
props->gid_tbl_len = mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size));
props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg);
props->pkey_tbl_len = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, pkey_table_size));
props->bad_pkey_cntr = rep->pkey_violation_counter;
props->qkey_viol_cntr = rep->qkey_violation_counter;
props->subnet_timeout = rep->subnet_timeout;
props->init_type_reply = rep->init_type_reply;
props->grh_required = rep->grh_required;
err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port);
if (err)
goto out;
err = translate_active_width(ibdev, ib_link_width_oper,
&props->active_width);
if (err)
goto out;
err = mlx5_query_port_proto_oper(mdev, &props->active_speed, MLX5_PTYS_IB,
port);
if (err)
goto out;
mlx5_query_port_max_mtu(mdev, &max_mtu, port);
props->max_mtu = mlx5_mtu_to_ib_mtu(max_mtu);
mlx5_query_port_oper_mtu(mdev, &oper_mtu, port);
props->active_mtu = mlx5_mtu_to_ib_mtu(oper_mtu);
err = mlx5_query_port_vl_hw_cap(mdev, &vl_hw_cap, port);
if (err)
goto out;
err = translate_max_vl_num(ibdev, vl_hw_cap,
&props->max_vl_num);
out:
kfree(rep);
return err;
}
int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props)
{
switch (mlx5_get_vport_access_method(ibdev)) {
case MLX5_VPORT_ACCESS_METHOD_MAD:
return mlx5_query_mad_ifc_port(ibdev, port, props);
case MLX5_VPORT_ACCESS_METHOD_HCA:
return mlx5_query_hca_port(ibdev, port, props);
case MLX5_VPORT_ACCESS_METHOD_NIC:
return mlx5_query_port_roce(ibdev, port, props);
default:
return -EINVAL;
}
}
static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *gid)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_core_dev *mdev = dev->mdev;
switch (mlx5_get_vport_access_method(ibdev)) {
case MLX5_VPORT_ACCESS_METHOD_MAD:
return mlx5_query_mad_ifc_gids(ibdev, port, index, gid);
case MLX5_VPORT_ACCESS_METHOD_HCA:
return mlx5_query_hca_vport_gid(mdev, 0, port, 0, index, gid);
default:
return -EINVAL;
}
}
static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
u16 *pkey)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_core_dev *mdev = dev->mdev;
switch (mlx5_get_vport_access_method(ibdev)) {
case MLX5_VPORT_ACCESS_METHOD_MAD:
return mlx5_query_mad_ifc_pkey(ibdev, port, index, pkey);
case MLX5_VPORT_ACCESS_METHOD_HCA:
case MLX5_VPORT_ACCESS_METHOD_NIC:
return mlx5_query_hca_vport_pkey(mdev, 0, port, 0, index,
pkey);
default:
return -EINVAL;
}
}
static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
struct ib_device_modify *props)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_reg_node_desc in;
struct mlx5_reg_node_desc out;
int err;
if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
return -EOPNOTSUPP;
if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
return 0;
/*
* If possible, pass node desc to FW, so it can generate
* a 144 trap. If cmd fails, just ignore.
*/
memcpy(&in, props->node_desc, 64);
err = mlx5_core_access_reg(dev->mdev, &in, sizeof(in), &out,
sizeof(out), MLX5_REG_NODE_DESC, 0, 1);
if (err)
return err;
memcpy(ibdev->node_desc, props->node_desc, 64);
return err;
}
static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
struct ib_port_modify *props)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct ib_port_attr attr;
u32 tmp;
int err;
mutex_lock(&dev->cap_mask_mutex);
err = mlx5_ib_query_port(ibdev, port, &attr);
if (err)
goto out;
tmp = (attr.port_cap_flags | props->set_port_cap_mask) &
~props->clr_port_cap_mask;
err = mlx5_set_port_caps(dev->mdev, port, tmp);
out:
mutex_unlock(&dev->cap_mask_mutex);
return err;
}
static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_ib_alloc_ucontext_req_v2 req = {};
struct mlx5_ib_alloc_ucontext_resp resp = {};
struct mlx5_ib_ucontext *context;
struct mlx5_uuar_info *uuari;
struct mlx5_uar *uars;
int gross_uuars;
int num_uars;
int ver;
int uuarn;
int err;
int i;
size_t reqlen;
size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2,
max_cqe_version);
if (!dev->ib_active)
return ERR_PTR(-EAGAIN);
if (udata->inlen < sizeof(struct ib_uverbs_cmd_hdr))
return ERR_PTR(-EINVAL);
reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
ver = 0;
else if (reqlen >= min_req_v2)
ver = 2;
else
return ERR_PTR(-EINVAL);
err = ib_copy_from_udata(&req, udata, min(reqlen, sizeof(req)));
if (err)
return ERR_PTR(err);
if (req.flags)
return ERR_PTR(-EINVAL);
if (req.total_num_uuars > MLX5_MAX_UUARS)
return ERR_PTR(-ENOMEM);
if (req.total_num_uuars == 0)
return ERR_PTR(-EINVAL);
if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2)
return ERR_PTR(-EOPNOTSUPP);
if (reqlen > sizeof(req) &&
!ib_is_udata_cleared(udata, sizeof(req),
reqlen - sizeof(req)))
return ERR_PTR(-EOPNOTSUPP);
req.total_num_uuars = ALIGN(req.total_num_uuars,
MLX5_NON_FP_BF_REGS_PER_PAGE);
if (req.num_low_latency_uuars > req.total_num_uuars - 1)
return ERR_PTR(-EINVAL);
num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE;
gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;
resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
resp.cache_line_size = L1_CACHE_BYTES;
resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
resp.cqe_version = min_t(__u8,
(__u8)MLX5_CAP_GEN(dev->mdev, cqe_version),
req.max_cqe_version);
resp.response_length = min(offsetof(typeof(resp), response_length) +
sizeof(resp.response_length), udata->outlen);
context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context)
return ERR_PTR(-ENOMEM);
uuari = &context->uuari;
mutex_init(&uuari->lock);
uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL);
if (!uars) {
err = -ENOMEM;
goto out_ctx;
}
uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars),
sizeof(*uuari->bitmap),
GFP_KERNEL);
if (!uuari->bitmap) {
err = -ENOMEM;
goto out_uar_ctx;
}
/*
* clear all fast path uuars
*/
for (i = 0; i < gross_uuars; i++) {
uuarn = i & 3;
if (uuarn == 2 || uuarn == 3)
set_bit(i, uuari->bitmap);
}
uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL);
if (!uuari->count) {
err = -ENOMEM;
goto out_bitmap;
}
for (i = 0; i < num_uars; i++) {
err = mlx5_cmd_alloc_uar(dev->mdev, &uars[i].index);
if (err)
goto out_count;
}
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
#endif
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {
err = mlx5_core_alloc_transport_domain(dev->mdev,
&context->tdn);
if (err)
goto out_uars;
}
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
resp.tot_uuars = req.total_num_uuars;
resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports);
if (field_avail(typeof(resp), cqe_version, udata->outlen))
resp.response_length += sizeof(resp.cqe_version);
if (field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) {
resp.comp_mask |=
MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
resp.hca_core_clock_offset =
offsetof(struct mlx5_init_seg, internal_timer_h) %
PAGE_SIZE;
resp.response_length += sizeof(resp.hca_core_clock_offset) +
sizeof(resp.reserved2) +
sizeof(resp.reserved3);
}
err = ib_copy_to_udata(udata, &resp, resp.response_length);
if (err)
goto out_td;
uuari->ver = ver;
uuari->num_low_latency_uuars = req.num_low_latency_uuars;
uuari->uars = uars;
uuari->num_uars = num_uars;
context->cqe_version = resp.cqe_version;
return &context->ibucontext;
out_td:
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
out_uars:
for (i--; i >= 0; i--)
mlx5_cmd_free_uar(dev->mdev, uars[i].index);
out_count:
kfree(uuari->count);
out_bitmap:
kfree(uuari->bitmap);
out_uar_ctx:
kfree(uars);
out_ctx:
kfree(context);
return ERR_PTR(err);
}
static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
{
struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
struct mlx5_uuar_info *uuari = &context->uuari;
int i;
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
for (i = 0; i < uuari->num_uars; i++) {
if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index))
mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
}
kfree(uuari->count);
kfree(uuari->bitmap);
kfree(uuari->uars);
kfree(context);
return 0;
}
static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index)
{
return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + index;
}
static int get_command(unsigned long offset)
{
return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK;
}
static int get_arg(unsigned long offset)
{
return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1);
}
static int get_index(unsigned long offset)
{
return get_arg(offset);
}
static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
{
struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
struct mlx5_uuar_info *uuari = &context->uuari;
unsigned long command;
unsigned long idx;
phys_addr_t pfn;
command = get_command(vma->vm_pgoff);
switch (command) {
case MLX5_IB_MMAP_REGULAR_PAGE:
if (vma->vm_end - vma->vm_start != PAGE_SIZE)
return -EINVAL;
idx = get_index(vma->vm_pgoff);
if (idx >= uuari->num_uars)
return -EINVAL;
pfn = uar_index2pfn(dev, uuari->uars[idx].index);
mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx,
(unsigned long long)pfn);
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
if (io_remap_pfn_range(vma, vma->vm_start, pfn,
PAGE_SIZE, vma->vm_page_prot))
return -EAGAIN;
mlx5_ib_dbg(dev, "mapped WC at 0x%lx, PA 0x%llx\n",
vma->vm_start,
(unsigned long long)pfn << PAGE_SHIFT);
break;
case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
return -ENOSYS;
case MLX5_IB_MMAP_CORE_CLOCK:
if (vma->vm_end - vma->vm_start != PAGE_SIZE)
return -EINVAL;
if (vma->vm_flags & (VM_WRITE | VM_EXEC))
return -EPERM;
/* Don't expose to user-space information it shouldn't have */
if (PAGE_SIZE > 4096)
return -EOPNOTSUPP;
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
pfn = (dev->mdev->iseg_base +
offsetof(struct mlx5_init_seg, internal_timer_h)) >>
PAGE_SHIFT;
if (io_remap_pfn_range(vma, vma->vm_start, pfn,
PAGE_SIZE, vma->vm_page_prot))
return -EAGAIN;
mlx5_ib_dbg(dev, "mapped internal timer at 0x%lx, PA 0x%llx\n",
vma->vm_start,
(unsigned long long)pfn << PAGE_SHIFT);
break;
default:
return -EINVAL;
}
return 0;
}
static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
struct ib_ucontext *context,
struct ib_udata *udata)
{
struct mlx5_ib_alloc_pd_resp resp;
struct mlx5_ib_pd *pd;
int err;
pd = kmalloc(sizeof(*pd), GFP_KERNEL);
if (!pd)
return ERR_PTR(-ENOMEM);
err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn);
if (err) {
kfree(pd);
return ERR_PTR(err);
}
if (context) {
resp.pdn = pd->pdn;
if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
kfree(pd);
return ERR_PTR(-EFAULT);
}
}
return &pd->ibpd;
}
static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
{
struct mlx5_ib_dev *mdev = to_mdev(pd->device);
struct mlx5_ib_pd *mpd = to_mpd(pd);
mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn);
kfree(mpd);
return 0;
}
static bool outer_header_zero(u32 *match_criteria)
{
int size = MLX5_ST_SZ_BYTES(fte_match_param);
char *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_criteria,
outer_headers);
return outer_headers_c[0] == 0 && !memcmp(outer_headers_c,
outer_headers_c + 1,
size - 1);
}
static int parse_flow_attr(u32 *match_c, u32 *match_v,
union ib_flow_spec *ib_spec)
{
void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
outer_headers);
void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
outer_headers);
switch (ib_spec->type) {
case IB_FLOW_SPEC_ETH:
if (ib_spec->size != sizeof(ib_spec->eth))
return -EINVAL;
ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
dmac_47_16),
ib_spec->eth.mask.dst_mac);
ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
dmac_47_16),
ib_spec->eth.val.dst_mac);
if (ib_spec->eth.mask.vlan_tag) {
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
vlan_tag, 1);
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
vlan_tag, 1);
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
first_vid, ntohs(ib_spec->eth.val.vlan_tag));
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
first_cfi,
ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
first_cfi,
ntohs(ib_spec->eth.val.vlan_tag) >> 12);
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
first_prio,
ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
first_prio,
ntohs(ib_spec->eth.val.vlan_tag) >> 13);
}
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
ethertype, ntohs(ib_spec->eth.mask.ether_type));
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
ethertype, ntohs(ib_spec->eth.val.ether_type));
break;
case IB_FLOW_SPEC_IPV4:
if (ib_spec->size != sizeof(ib_spec->ipv4))
return -EINVAL;
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
ethertype, 0xffff);
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
ethertype, ETH_P_IP);
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
src_ipv4_src_ipv6.ipv4_layout.ipv4),
&ib_spec->ipv4.mask.src_ip,
sizeof(ib_spec->ipv4.mask.src_ip));
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
src_ipv4_src_ipv6.ipv4_layout.ipv4),
&ib_spec->ipv4.val.src_ip,
sizeof(ib_spec->ipv4.val.src_ip));
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
&ib_spec->ipv4.mask.dst_ip,
sizeof(ib_spec->ipv4.mask.dst_ip));
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
&ib_spec->ipv4.val.dst_ip,
sizeof(ib_spec->ipv4.val.dst_ip));
break;
case IB_FLOW_SPEC_TCP:
if (ib_spec->size != sizeof(ib_spec->tcp_udp))
return -EINVAL;
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol,
0xff);
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol,
IPPROTO_TCP);
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport,
ntohs(ib_spec->tcp_udp.mask.src_port));
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_sport,
ntohs(ib_spec->tcp_udp.val.src_port));
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport,
ntohs(ib_spec->tcp_udp.mask.dst_port));
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_dport,
ntohs(ib_spec->tcp_udp.val.dst_port));
break;
case IB_FLOW_SPEC_UDP:
if (ib_spec->size != sizeof(ib_spec->tcp_udp))
return -EINVAL;
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol,
0xff);
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol,
IPPROTO_UDP);
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_sport,
ntohs(ib_spec->tcp_udp.mask.src_port));
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_sport,
ntohs(ib_spec->tcp_udp.val.src_port));
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_dport,
ntohs(ib_spec->tcp_udp.mask.dst_port));
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_dport,
ntohs(ib_spec->tcp_udp.val.dst_port));
break;
default:
return -EINVAL;
}
return 0;
}
/* If a flow could catch both multicast and unicast packets,
* it won't fall into the multicast flow steering table and this rule
* could steal other multicast packets.
*/
static bool flow_is_multicast_only(struct ib_flow_attr *ib_attr)
{
struct ib_flow_spec_eth *eth_spec;
if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
ib_attr->size < sizeof(struct ib_flow_attr) +
sizeof(struct ib_flow_spec_eth) ||
ib_attr->num_of_specs < 1)
return false;
eth_spec = (struct ib_flow_spec_eth *)(ib_attr + 1);
if (eth_spec->type != IB_FLOW_SPEC_ETH ||
eth_spec->size != sizeof(*eth_spec))
return false;
return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
is_multicast_ether_addr(eth_spec->val.dst_mac);
}
static bool is_valid_attr(struct ib_flow_attr *flow_attr)
{
union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
bool has_ipv4_spec = false;
bool eth_type_ipv4 = true;
unsigned int spec_index;
/* Validate that ethertype is correct */
for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
if (ib_spec->type == IB_FLOW_SPEC_ETH &&
ib_spec->eth.mask.ether_type) {
if (!((ib_spec->eth.mask.ether_type == htons(0xffff)) &&
ib_spec->eth.val.ether_type == htons(ETH_P_IP)))
eth_type_ipv4 = false;
} else if (ib_spec->type == IB_FLOW_SPEC_IPV4) {
has_ipv4_spec = true;
}
ib_spec = (void *)ib_spec + ib_spec->size;
}
return !has_ipv4_spec || eth_type_ipv4;
}
static void put_flow_table(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_prio *prio, bool ft_added)
{
prio->refcount -= !!ft_added;
if (!prio->refcount) {
mlx5_destroy_flow_table(prio->flow_table);
prio->flow_table = NULL;
}
}
static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
{
struct mlx5_ib_dev *dev = to_mdev(flow_id->qp->device);
struct mlx5_ib_flow_handler *handler = container_of(flow_id,
struct mlx5_ib_flow_handler,
ibflow);
struct mlx5_ib_flow_handler *iter, *tmp;
mutex_lock(&dev->flow_db.lock);
list_for_each_entry_safe(iter, tmp, &handler->list, list) {
mlx5_del_flow_rule(iter->rule);
list_del(&iter->list);
kfree(iter);
}
mlx5_del_flow_rule(handler->rule);
put_flow_table(dev, &dev->flow_db.prios[handler->prio], true);
mutex_unlock(&dev->flow_db.lock);
kfree(handler);
return 0;
}
static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap)
{
priority *= 2;
if (!dont_trap)
priority++;
return priority;
}
#define MLX5_FS_MAX_TYPES 10
#define MLX5_FS_MAX_ENTRIES 32000UL
static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
struct ib_flow_attr *flow_attr)
{
bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
struct mlx5_flow_namespace *ns = NULL;
struct mlx5_ib_flow_prio *prio;
struct mlx5_flow_table *ft;
int num_entries;
int num_groups;
int priority;
int err = 0;
if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
if (flow_is_multicast_only(flow_attr) &&
!dont_trap)
priority = MLX5_IB_FLOW_MCAST_PRIO;
else
priority = ib_prio_to_core_prio(flow_attr->priority,
dont_trap);
ns = mlx5_get_flow_namespace(dev->mdev,
MLX5_FLOW_NAMESPACE_BYPASS);
num_entries = MLX5_FS_MAX_ENTRIES;
num_groups = MLX5_FS_MAX_TYPES;
prio = &dev->flow_db.prios[priority];
} else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
ns = mlx5_get_flow_namespace(dev->mdev,
MLX5_FLOW_NAMESPACE_LEFTOVERS);
build_leftovers_ft_param(&priority,
&num_entries,
&num_groups);
prio = &dev->flow_db.prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
}
if (!ns)
return ERR_PTR(-ENOTSUPP);
ft = prio->flow_table;
if (!ft) {
ft = mlx5_create_auto_grouped_flow_table(ns, priority,
num_entries,
num_groups);
if (!IS_ERR(ft)) {
prio->refcount = 0;
prio->flow_table = ft;
} else {
err = PTR_ERR(ft);
}
}
return err ? ERR_PTR(err) : prio;
}
static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_prio *ft_prio,
struct ib_flow_attr *flow_attr,
struct mlx5_flow_destination *dst)
{
struct mlx5_flow_table *ft = ft_prio->flow_table;
struct mlx5_ib_flow_handler *handler;
void *ib_flow = flow_attr + 1;
u8 match_criteria_enable = 0;
unsigned int spec_index;
u32 *match_c;
u32 *match_v;
u32 action;
int err = 0;
if (!is_valid_attr(flow_attr))
return ERR_PTR(-EINVAL);
match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
handler = kzalloc(sizeof(*handler), GFP_KERNEL);
if (!handler || !match_c || !match_v) {
err = -ENOMEM;
goto free;
}
INIT_LIST_HEAD(&handler->list);
for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
err = parse_flow_attr(match_c, match_v, ib_flow);
if (err < 0)
goto free;
ib_flow += ((union ib_flow_spec *)ib_flow)->size;
}
/* Outer header support only */
match_criteria_enable = (!outer_header_zero(match_c)) << 0;
action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
handler->rule = mlx5_add_flow_rule(ft, match_criteria_enable,
match_c, match_v,
action,
MLX5_FS_DEFAULT_FLOW_TAG,
dst);
if (IS_ERR(handler->rule)) {
err = PTR_ERR(handler->rule);
goto free;
}
handler->prio = ft_prio - dev->flow_db.prios;
ft_prio->flow_table = ft;
free:
if (err)
kfree(handler);
kfree(match_c);
kfree(match_v);
return err ? ERR_PTR(err) : handler;
}
static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_prio *ft_prio,
struct ib_flow_attr *flow_attr,
struct mlx5_flow_destination *dst)
{
struct mlx5_ib_flow_handler *handler_dst = NULL;
struct mlx5_ib_flow_handler *handler = NULL;
handler = create_flow_rule(dev, ft_prio, flow_attr, NULL);
if (!IS_ERR(handler)) {
handler_dst = create_flow_rule(dev, ft_prio,
flow_attr, dst);
if (IS_ERR(handler_dst)) {
mlx5_del_flow_rule(handler->rule);
kfree(handler);
handler = handler_dst;
} else {
list_add(&handler_dst->list, &handler->list);
}
}
return handler;
}
enum {
LEFTOVERS_MC,
LEFTOVERS_UC,
};
static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_prio *ft_prio,
struct ib_flow_attr *flow_attr,
struct mlx5_flow_destination *dst)
{
struct mlx5_ib_flow_handler *handler_ucast = NULL;
struct mlx5_ib_flow_handler *handler = NULL;
static struct {
struct ib_flow_attr flow_attr;
struct ib_flow_spec_eth eth_flow;
} leftovers_specs[] = {
[LEFTOVERS_MC] = {
.flow_attr = {
.num_of_specs = 1,
.size = sizeof(leftovers_specs[0])
},
.eth_flow = {
.type = IB_FLOW_SPEC_ETH,
.size = sizeof(struct ib_flow_spec_eth),
.mask = {.dst_mac = {0x1} },
.val = {.dst_mac = {0x1} }
}
},
[LEFTOVERS_UC] = {
.flow_attr = {
.num_of_specs = 1,
.size = sizeof(leftovers_specs[0])
},
.eth_flow = {
.type = IB_FLOW_SPEC_ETH,
.size = sizeof(struct ib_flow_spec_eth),
.mask = {.dst_mac = {0x1} },
.val = {.dst_mac = {} }
}
}
};
handler = create_flow_rule(dev, ft_prio,
&leftovers_specs[LEFTOVERS_MC].flow_attr,
dst);
if (!IS_ERR(handler) &&
flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
handler_ucast = create_flow_rule(dev, ft_prio,
&leftovers_specs[LEFTOVERS_UC].flow_attr,
dst);
if (IS_ERR(handler_ucast)) {
kfree(handler);
handler = handler_ucast;
} else {
list_add(&handler_ucast->list, &handler->list);
}
}
return handler;
}
static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
struct ib_flow_attr *flow_attr,
int domain)
{
struct mlx5_ib_dev *dev = to_mdev(qp->device);
struct mlx5_ib_flow_handler *handler = NULL;
struct mlx5_flow_destination *dst = NULL;
struct mlx5_ib_flow_prio *ft_prio;
int err;
if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO)
return ERR_PTR(-ENOSPC);
if (domain != IB_FLOW_DOMAIN_USER ||
flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) ||
(flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP))
return ERR_PTR(-EINVAL);
dst = kzalloc(sizeof(*dst), GFP_KERNEL);
if (!dst)
return ERR_PTR(-ENOMEM);
mutex_lock(&dev->flow_db.lock);
ft_prio = get_flow_table(dev, flow_attr);
if (IS_ERR(ft_prio)) {
err = PTR_ERR(ft_prio);
goto unlock;
}
dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn;
if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) {
handler = create_dont_trap_rule(dev, ft_prio,
flow_attr, dst);
} else {
handler = create_flow_rule(dev, ft_prio, flow_attr,
dst);
}
} else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
handler = create_leftovers_rule(dev, ft_prio, flow_attr,
dst);
} else {
err = -EINVAL;
goto destroy_ft;
}
if (IS_ERR(handler)) {
err = PTR_ERR(handler);
handler = NULL;
goto destroy_ft;
}
ft_prio->refcount++;
mutex_unlock(&dev->flow_db.lock);
kfree(dst);
return &handler->ibflow;
destroy_ft:
put_flow_table(dev, ft_prio, false);
unlock:
mutex_unlock(&dev->flow_db.lock);
kfree(dst);
kfree(handler);
return ERR_PTR(err);
}
static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
int err;
err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num);
if (err)
mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
ibqp->qp_num, gid->raw);
return err;
}
static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
int err;
err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num);
if (err)
mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n",
ibqp->qp_num, gid->raw);
return err;
}
static int init_node_data(struct mlx5_ib_dev *dev)
{
int err;
err = mlx5_query_node_desc(dev, dev->ib_dev.node_desc);
if (err)
return err;
dev->mdev->rev_id = dev->mdev->pdev->revision;
return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid);
}
static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr,
char *buf)
{
struct mlx5_ib_dev *dev =
container_of(device, struct mlx5_ib_dev, ib_dev.dev);
return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages);
}
static ssize_t show_reg_pages(struct device *device,
struct device_attribute *attr, char *buf)
{
struct mlx5_ib_dev *dev =
container_of(device, struct mlx5_ib_dev, ib_dev.dev);
return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
}
static ssize_t show_hca(struct device *device, struct device_attribute *attr,
char *buf)
{
struct mlx5_ib_dev *dev =
container_of(device, struct mlx5_ib_dev, ib_dev.dev);
return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
}
static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
char *buf)
{
struct mlx5_ib_dev *dev =
container_of(device, struct mlx5_ib_dev, ib_dev.dev);
return sprintf(buf, "%d.%d.%d\n", fw_rev_maj(dev->mdev),
fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
}
static ssize_t show_rev(struct device *device, struct device_attribute *attr,
char *buf)
{
struct mlx5_ib_dev *dev =
container_of(device, struct mlx5_ib_dev, ib_dev.dev);
return sprintf(buf, "%x\n", dev->mdev->rev_id);
}
static ssize_t show_board(struct device *device, struct device_attribute *attr,
char *buf)
{
struct mlx5_ib_dev *dev =
container_of(device, struct mlx5_ib_dev, ib_dev.dev);
return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
dev->mdev->board_id);
}
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL);
static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL);
static struct device_attribute *mlx5_class_attributes[] = {
&dev_attr_hw_rev,
&dev_attr_fw_ver,
&dev_attr_hca_type,
&dev_attr_board_id,
&dev_attr_fw_pages,
&dev_attr_reg_pages,
};
static void pkey_change_handler(struct work_struct *work)
{
struct mlx5_ib_port_resources *ports =
container_of(work, struct mlx5_ib_port_resources,
pkey_change_work);
mutex_lock(&ports->devr->mutex);
mlx5_ib_gsi_pkey_change(ports->gsi);
mutex_unlock(&ports->devr->mutex);
}
static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
enum mlx5_dev_event event, unsigned long param)
{
struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context;
struct ib_event ibev;
u8 port = 0;
switch (event) {
case MLX5_DEV_EVENT_SYS_ERROR:
ibdev->ib_active = false;
ibev.event = IB_EVENT_DEVICE_FATAL;
break;
case MLX5_DEV_EVENT_PORT_UP:
ibev.event = IB_EVENT_PORT_ACTIVE;
port = (u8)param;
break;
case MLX5_DEV_EVENT_PORT_DOWN:
ibev.event = IB_EVENT_PORT_ERR;
port = (u8)param;
break;
case MLX5_DEV_EVENT_PORT_INITIALIZED:
/* not used by ULPs */
return;
case MLX5_DEV_EVENT_LID_CHANGE:
ibev.event = IB_EVENT_LID_CHANGE;
port = (u8)param;
break;
case MLX5_DEV_EVENT_PKEY_CHANGE:
ibev.event = IB_EVENT_PKEY_CHANGE;
port = (u8)param;
schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work);
break;
case MLX5_DEV_EVENT_GUID_CHANGE:
ibev.event = IB_EVENT_GID_CHANGE;
port = (u8)param;
break;
case MLX5_DEV_EVENT_CLIENT_REREG:
ibev.event = IB_EVENT_CLIENT_REREGISTER;
port = (u8)param;
break;
}
ibev.device = &ibdev->ib_dev;
ibev.element.port_num = port;
if (port < 1 || port > ibdev->num_ports) {
mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
return;
}
if (ibdev->ib_active)
ib_dispatch_event(&ibev);
}
static void get_ext_port_caps(struct mlx5_ib_dev *dev)
{
int port;
for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++)
mlx5_query_ext_port_caps(dev, port);
}
static int get_port_caps(struct mlx5_ib_dev *dev)
{
struct ib_device_attr *dprops = NULL;
struct ib_port_attr *pprops = NULL;
int err = -ENOMEM;
int port;
struct ib_udata uhw = {.inlen = 0, .outlen = 0};
pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
if (!pprops)
goto out;
dprops = kmalloc(sizeof(*dprops), GFP_KERNEL);
if (!dprops)
goto out;
err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw);
if (err) {
mlx5_ib_warn(dev, "query_device failed %d\n", err);
goto out;
}
for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
if (err) {
mlx5_ib_warn(dev, "query_port %d failed %d\n",
port, err);
break;
}
dev->mdev->port_caps[port - 1].pkey_table_len =
dprops->max_pkeys;
dev->mdev->port_caps[port - 1].gid_table_len =
pprops->gid_tbl_len;
mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n",
dprops->max_pkeys, pprops->gid_tbl_len);
}
out:
kfree(pprops);
kfree(dprops);
return err;
}
static void destroy_umrc_res(struct mlx5_ib_dev *dev)
{
int err;
err = mlx5_mr_cache_cleanup(dev);
if (err)
mlx5_ib_warn(dev, "mr cache cleanup failed\n");
mlx5_ib_destroy_qp(dev->umrc.qp);
ib_free_cq(dev->umrc.cq);
ib_dealloc_pd(dev->umrc.pd);
}
enum {
MAX_UMR_WR = 128,
};
static int create_umr_res(struct mlx5_ib_dev *dev)
{
struct ib_qp_init_attr *init_attr = NULL;
struct ib_qp_attr *attr = NULL;
struct ib_pd *pd;
struct ib_cq *cq;
struct ib_qp *qp;
int ret;
attr = kzalloc(sizeof(*attr), GFP_KERNEL);
init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL);
if (!attr || !init_attr) {
ret = -ENOMEM;
goto error_0;
}
pd = ib_alloc_pd(&dev->ib_dev);
if (IS_ERR(pd)) {
mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
ret = PTR_ERR(pd);
goto error_0;
}
cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
if (IS_ERR(cq)) {
mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
ret = PTR_ERR(cq);
goto error_2;
}
init_attr->send_cq = cq;
init_attr->recv_cq = cq;
init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
init_attr->cap.max_send_wr = MAX_UMR_WR;
init_attr->cap.max_send_sge = 1;
init_attr->qp_type = MLX5_IB_QPT_REG_UMR;
init_attr->port_num = 1;
qp = mlx5_ib_create_qp(pd, init_attr, NULL);
if (IS_ERR(qp)) {
mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
ret = PTR_ERR(qp);
goto error_3;
}
qp->device = &dev->ib_dev;
qp->real_qp = qp;
qp->uobject = NULL;
qp->qp_type = MLX5_IB_QPT_REG_UMR;
attr->qp_state = IB_QPS_INIT;
attr->port_num = 1;
ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX |
IB_QP_PORT, NULL);
if (ret) {
mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
goto error_4;
}
memset(attr, 0, sizeof(*attr));
attr->qp_state = IB_QPS_RTR;
attr->path_mtu = IB_MTU_256;
ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
if (ret) {
mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
goto error_4;
}
memset(attr, 0, sizeof(*attr));
attr->qp_state = IB_QPS_RTS;
ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
if (ret) {
mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
goto error_4;
}
dev->umrc.qp = qp;
dev->umrc.cq = cq;
dev->umrc.pd = pd;
sema_init(&dev->umrc.sem, MAX_UMR_WR);
ret = mlx5_mr_cache_init(dev);
if (ret) {
mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
goto error_4;
}
kfree(attr);
kfree(init_attr);
return 0;
error_4:
mlx5_ib_destroy_qp(qp);
error_3:
ib_free_cq(cq);
error_2:
ib_dealloc_pd(pd);
error_0:
kfree(attr);
kfree(init_attr);
return ret;
}
static int create_dev_resources(struct mlx5_ib_resources *devr)
{
struct ib_srq_init_attr attr;
struct mlx5_ib_dev *dev;
struct ib_cq_init_attr cq_attr = {.cqe = 1};
int port;
int ret = 0;
dev = container_of(devr, struct mlx5_ib_dev, devr);
mutex_init(&devr->mutex);
devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
if (IS_ERR(devr->p0)) {
ret = PTR_ERR(devr->p0);
goto error0;
}
devr->p0->device = &dev->ib_dev;
devr->p0->uobject = NULL;
atomic_set(&devr->p0->usecnt, 0);
devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL, NULL);
if (IS_ERR(devr->c0)) {
ret = PTR_ERR(devr->c0);
goto error1;
}
devr->c0->device = &dev->ib_dev;
devr->c0->uobject = NULL;
devr->c0->comp_handler = NULL;
devr->c0->event_handler = NULL;
devr->c0->cq_context = NULL;
atomic_set(&devr->c0->usecnt, 0);
devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
if (IS_ERR(devr->x0)) {
ret = PTR_ERR(devr->x0);
goto error2;
}
devr->x0->device = &dev->ib_dev;
devr->x0->inode = NULL;
atomic_set(&devr->x0->usecnt, 0);
mutex_init(&devr->x0->tgt_qp_mutex);
INIT_LIST_HEAD(&devr->x0->tgt_qp_list);
devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
if (IS_ERR(devr->x1)) {
ret = PTR_ERR(devr->x1);
goto error3;
}
devr->x1->device = &dev->ib_dev;
devr->x1->inode = NULL;
atomic_set(&devr->x1->usecnt, 0);
mutex_init(&devr->x1->tgt_qp_mutex);
INIT_LIST_HEAD(&devr->x1->tgt_qp_list);
memset(&attr, 0, sizeof(attr));
attr.attr.max_sge = 1;
attr.attr.max_wr = 1;
attr.srq_type = IB_SRQT_XRC;
attr.ext.xrc.cq = devr->c0;
attr.ext.xrc.xrcd = devr->x0;
devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
if (IS_ERR(devr->s0)) {
ret = PTR_ERR(devr->s0);
goto error4;
}
devr->s0->device = &dev->ib_dev;
devr->s0->pd = devr->p0;
devr->s0->uobject = NULL;
devr->s0->event_handler = NULL;
devr->s0->srq_context = NULL;
devr->s0->srq_type = IB_SRQT_XRC;
devr->s0->ext.xrc.xrcd = devr->x0;
devr->s0->ext.xrc.cq = devr->c0;
atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
atomic_inc(&devr->s0->ext.xrc.cq->usecnt);
atomic_inc(&devr->p0->usecnt);
atomic_set(&devr->s0->usecnt, 0);
memset(&attr, 0, sizeof(attr));
attr.attr.max_sge = 1;
attr.attr.max_wr = 1;
attr.srq_type = IB_SRQT_BASIC;
devr->s1 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
if (IS_ERR(devr->s1)) {
ret = PTR_ERR(devr->s1);
goto error5;
}
devr->s1->device = &dev->ib_dev;
devr->s1->pd = devr->p0;
devr->s1->uobject = NULL;
devr->s1->event_handler = NULL;
devr->s1->srq_context = NULL;
devr->s1->srq_type = IB_SRQT_BASIC;
devr->s1->ext.xrc.cq = devr->c0;
atomic_inc(&devr->p0->usecnt);
atomic_set(&devr->s0->usecnt, 0);
for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) {
INIT_WORK(&devr->ports[port].pkey_change_work,
pkey_change_handler);
devr->ports[port].devr = devr;
}
return 0;
error5:
mlx5_ib_destroy_srq(devr->s0);
error4:
mlx5_ib_dealloc_xrcd(devr->x1);
error3:
mlx5_ib_dealloc_xrcd(devr->x0);
error2:
mlx5_ib_destroy_cq(devr->c0);
error1:
mlx5_ib_dealloc_pd(devr->p0);
error0:
return ret;
}
static void destroy_dev_resources(struct mlx5_ib_resources *devr)
{
struct mlx5_ib_dev *dev =
container_of(devr, struct mlx5_ib_dev, devr);
int port;
mlx5_ib_destroy_srq(devr->s1);
mlx5_ib_destroy_srq(devr->s0);
mlx5_ib_dealloc_xrcd(devr->x0);
mlx5_ib_dealloc_xrcd(devr->x1);
mlx5_ib_destroy_cq(devr->c0);
mlx5_ib_dealloc_pd(devr->p0);
/* Make sure no change P_Key work items are still executing */
for (port = 0; port < dev->num_ports; ++port)
cancel_work_sync(&devr->ports[port].pkey_change_work);
}
static u32 get_core_cap_flags(struct ib_device *ibdev)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1);
u8 l3_type_cap = MLX5_CAP_ROCE(dev->mdev, l3_type);
u8 roce_version_cap = MLX5_CAP_ROCE(dev->mdev, roce_version);
u32 ret = 0;
if (ll == IB_LINK_LAYER_INFINIBAND)
return RDMA_CORE_PORT_IBA_IB;
if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
return 0;
if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP))
return 0;
if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP)
ret |= RDMA_CORE_PORT_IBA_ROCE;
if (roce_version_cap & MLX5_ROCE_VERSION_2_CAP)
ret |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
return ret;
}
static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_immutable *immutable)
{
struct ib_port_attr attr;
int err;
err = mlx5_ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
immutable->core_cap_flags = get_core_cap_flags(ibdev);
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
}
static int mlx5_enable_roce(struct mlx5_ib_dev *dev)
{
int err;
dev->roce.nb.notifier_call = mlx5_netdev_event;
err = register_netdevice_notifier(&dev->roce.nb);
if (err)
return err;
err = mlx5_nic_vport_enable_roce(dev->mdev);
if (err)
goto err_unregister_netdevice_notifier;
return 0;
err_unregister_netdevice_notifier:
unregister_netdevice_notifier(&dev->roce.nb);
return err;
}
static void mlx5_disable_roce(struct mlx5_ib_dev *dev)
{
mlx5_nic_vport_disable_roce(dev->mdev);
unregister_netdevice_notifier(&dev->roce.nb);
}
static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
{
struct mlx5_ib_dev *dev;
enum rdma_link_layer ll;
int port_type_cap;
int err;
int i;
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
if ((ll == IB_LINK_LAYER_ETHERNET) && !MLX5_CAP_GEN(mdev, roce))
return NULL;
printk_once(KERN_INFO "%s", mlx5_version);
dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
if (!dev)
return NULL;
dev->mdev = mdev;
rwlock_init(&dev->roce.netdev_lock);
err = get_port_caps(dev);
if (err)
goto err_dealloc;
if (mlx5_use_mad_ifc(dev))
get_ext_port_caps(dev);
MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock);
strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX);
dev->ib_dev.owner = THIS_MODULE;
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
dev->ib_dev.local_dma_lkey = 0 /* not supported for now */;
dev->num_ports = MLX5_CAP_GEN(mdev, num_ports);
dev->ib_dev.phys_port_cnt = dev->num_ports;
dev->ib_dev.num_comp_vectors =
dev->mdev->priv.eq_table.num_comp_vectors;
dev->ib_dev.dma_device = &mdev->pdev->dev;
dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION;
dev->ib_dev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
(1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
(1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
(1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
(1ull << IB_USER_VERBS_CMD_REG_MR) |
(1ull << IB_USER_VERBS_CMD_REREG_MR) |
(1ull << IB_USER_VERBS_CMD_DEREG_MR) |
(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
(1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
(1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
(1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
(1ull << IB_USER_VERBS_CMD_CREATE_QP) |
(1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
(1ull << IB_USER_VERBS_CMD_QUERY_QP) |
(1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
(1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
(1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
(1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
(1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) |
(1ull << IB_USER_VERBS_CMD_OPEN_QP);
dev->ib_dev.uverbs_ex_cmd_mask =
(1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) |
(1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
(1ull << IB_USER_VERBS_EX_CMD_CREATE_QP);
dev->ib_dev.query_device = mlx5_ib_query_device;
dev->ib_dev.query_port = mlx5_ib_query_port;
dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer;
if (ll == IB_LINK_LAYER_ETHERNET)
dev->ib_dev.get_netdev = mlx5_ib_get_netdev;
dev->ib_dev.query_gid = mlx5_ib_query_gid;
dev->ib_dev.add_gid = mlx5_ib_add_gid;
dev->ib_dev.del_gid = mlx5_ib_del_gid;
dev->ib_dev.query_pkey = mlx5_ib_query_pkey;
dev->ib_dev.modify_device = mlx5_ib_modify_device;
dev->ib_dev.modify_port = mlx5_ib_modify_port;
dev->ib_dev.alloc_ucontext = mlx5_ib_alloc_ucontext;
dev->ib_dev.dealloc_ucontext = mlx5_ib_dealloc_ucontext;
dev->ib_dev.mmap = mlx5_ib_mmap;
dev->ib_dev.alloc_pd = mlx5_ib_alloc_pd;
dev->ib_dev.dealloc_pd = mlx5_ib_dealloc_pd;
dev->ib_dev.create_ah = mlx5_ib_create_ah;
dev->ib_dev.query_ah = mlx5_ib_query_ah;
dev->ib_dev.destroy_ah = mlx5_ib_destroy_ah;
dev->ib_dev.create_srq = mlx5_ib_create_srq;
dev->ib_dev.modify_srq = mlx5_ib_modify_srq;
dev->ib_dev.query_srq = mlx5_ib_query_srq;
dev->ib_dev.destroy_srq = mlx5_ib_destroy_srq;
dev->ib_dev.post_srq_recv = mlx5_ib_post_srq_recv;
dev->ib_dev.create_qp = mlx5_ib_create_qp;
dev->ib_dev.modify_qp = mlx5_ib_modify_qp;
dev->ib_dev.query_qp = mlx5_ib_query_qp;
dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp;
dev->ib_dev.post_send = mlx5_ib_post_send;
dev->ib_dev.post_recv = mlx5_ib_post_recv;
dev->ib_dev.create_cq = mlx5_ib_create_cq;
dev->ib_dev.modify_cq = mlx5_ib_modify_cq;
dev->ib_dev.resize_cq = mlx5_ib_resize_cq;
dev->ib_dev.destroy_cq = mlx5_ib_destroy_cq;
dev->ib_dev.poll_cq = mlx5_ib_poll_cq;
dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq;
dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr;
dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr;
dev->ib_dev.rereg_user_mr = mlx5_ib_rereg_user_mr;
dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr;
dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach;
dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach;
dev->ib_dev.process_mad = mlx5_ib_process_mad;
dev->ib_dev.alloc_mr = mlx5_ib_alloc_mr;
dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg;
dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
dev->ib_dev.get_port_immutable = mlx5_port_immutable;
if (mlx5_core_is_pf(mdev)) {
dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config;
dev->ib_dev.set_vf_link_state = mlx5_ib_set_vf_link_state;
dev->ib_dev.get_vf_stats = mlx5_ib_get_vf_stats;
dev->ib_dev.set_vf_guid = mlx5_ib_set_vf_guid;
}
mlx5_ib_internal_fill_odp_caps(dev);
if (MLX5_CAP_GEN(mdev, imaicl)) {
dev->ib_dev.alloc_mw = mlx5_ib_alloc_mw;
dev->ib_dev.dealloc_mw = mlx5_ib_dealloc_mw;
dev->ib_dev.uverbs_cmd_mask |=
(1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
(1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
}
if (MLX5_CAP_GEN(mdev, xrc)) {
dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
dev->ib_dev.uverbs_cmd_mask |=
(1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
}
if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
IB_LINK_LAYER_ETHERNET) {
dev->ib_dev.create_flow = mlx5_ib_create_flow;
dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
dev->ib_dev.uverbs_ex_cmd_mask |=
(1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
}
err = init_node_data(dev);
if (err)
goto err_dealloc;
mutex_init(&dev->flow_db.lock);
mutex_init(&dev->cap_mask_mutex);
if (ll == IB_LINK_LAYER_ETHERNET) {
err = mlx5_enable_roce(dev);
if (err)
goto err_dealloc;
}
err = create_dev_resources(&dev->devr);
if (err)
goto err_disable_roce;
err = mlx5_ib_odp_init_one(dev);
if (err)
goto err_rsrc;
err = ib_register_device(&dev->ib_dev, NULL);
if (err)
goto err_odp;
err = create_umr_res(dev);
if (err)
goto err_dev;
for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
err = device_create_file(&dev->ib_dev.dev,
mlx5_class_attributes[i]);
if (err)
goto err_umrc;
}
dev->ib_active = true;
return dev;
err_umrc:
destroy_umrc_res(dev);
err_dev:
ib_unregister_device(&dev->ib_dev);
err_odp:
mlx5_ib_odp_remove_one(dev);
err_rsrc:
destroy_dev_resources(&dev->devr);
err_disable_roce:
if (ll == IB_LINK_LAYER_ETHERNET)
mlx5_disable_roce(dev);
err_dealloc:
ib_dealloc_device((struct ib_device *)dev);
return NULL;
}
static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
{
struct mlx5_ib_dev *dev = context;
enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1);
ib_unregister_device(&dev->ib_dev);
destroy_umrc_res(dev);
mlx5_ib_odp_remove_one(dev);
destroy_dev_resources(&dev->devr);
if (ll == IB_LINK_LAYER_ETHERNET)
mlx5_disable_roce(dev);
ib_dealloc_device(&dev->ib_dev);
}
static struct mlx5_interface mlx5_ib_interface = {
.add = mlx5_ib_add,
.remove = mlx5_ib_remove,
.event = mlx5_ib_event,
.protocol = MLX5_INTERFACE_PROTOCOL_IB,
};
static int __init mlx5_ib_init(void)
{
int err;
if (deprecated_prof_sel != 2)
pr_warn("prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
err = mlx5_ib_odp_init();
if (err)
return err;
err = mlx5_register_interface(&mlx5_ib_interface);
if (err)
goto clean_odp;
return err;
clean_odp:
mlx5_ib_odp_cleanup();
return err;
}
static void __exit mlx5_ib_cleanup(void)
{
mlx5_unregister_interface(&mlx5_ib_interface);
mlx5_ib_odp_cleanup();
}
module_init(mlx5_ib_init);
module_exit(mlx5_ib_cleanup);