IB/mlx5: Make netdev notifications multiport capable

When multiple RoCE ports are supported registration for events on
multiple netdevs is required. Refactor the event registration and
handling to support multiple ports.

Signed-off-by: Daniel Jurgens <danielj@mellanox.com>
Reviewed-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
This commit is contained in:
Daniel Jurgens 2018-01-04 17:25:35 +02:00 committed by Jason Gunthorpe
parent 508562d6f7
commit 7fd8aefb7c
4 changed files with 60 additions and 37 deletions

View File

@ -113,24 +113,30 @@ static int get_port_state(struct ib_device *ibdev,
static int mlx5_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct mlx5_roce *roce = container_of(this, struct mlx5_roce, nb);
struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
struct mlx5_ib_dev *ibdev = container_of(this, struct mlx5_ib_dev,
roce.nb);
u8 port_num = roce->native_port_num;
struct mlx5_core_dev *mdev;
struct mlx5_ib_dev *ibdev;
ibdev = roce->dev;
mdev = ibdev->mdev;
switch (event) {
case NETDEV_REGISTER:
case NETDEV_UNREGISTER:
write_lock(&ibdev->roce.netdev_lock);
if (ndev->dev.parent == &ibdev->mdev->pdev->dev)
ibdev->roce.netdev = (event == NETDEV_UNREGISTER) ?
NULL : ndev;
write_unlock(&ibdev->roce.netdev_lock);
write_lock(&roce->netdev_lock);
if (ndev->dev.parent == &mdev->pdev->dev)
roce->netdev = (event == NETDEV_UNREGISTER) ?
NULL : ndev;
write_unlock(&roce->netdev_lock);
break;
case NETDEV_CHANGE:
case NETDEV_UP:
case NETDEV_DOWN: {
struct net_device *lag_ndev = mlx5_lag_get_roce_netdev(ibdev->mdev);
struct net_device *lag_ndev = mlx5_lag_get_roce_netdev(mdev);
struct net_device *upper = NULL;
if (lag_ndev) {
@ -138,27 +144,28 @@ static int mlx5_netdev_event(struct notifier_block *this,
dev_put(lag_ndev);
}
if ((upper == ndev || (!upper && ndev == ibdev->roce.netdev))
if ((upper == ndev || (!upper && ndev == roce->netdev))
&& ibdev->ib_active) {
struct ib_event ibev = { };
enum ib_port_state port_state;
if (get_port_state(&ibdev->ib_dev, 1, &port_state))
return NOTIFY_DONE;
if (get_port_state(&ibdev->ib_dev, port_num,
&port_state))
goto done;
if (ibdev->roce.last_port_state == port_state)
return NOTIFY_DONE;
if (roce->last_port_state == port_state)
goto done;
ibdev->roce.last_port_state = port_state;
roce->last_port_state = port_state;
ibev.device = &ibdev->ib_dev;
if (port_state == IB_PORT_DOWN)
ibev.event = IB_EVENT_PORT_ERR;
else if (port_state == IB_PORT_ACTIVE)
ibev.event = IB_EVENT_PORT_ACTIVE;
else
return NOTIFY_DONE;
goto done;
ibev.element.port_num = 1;
ibev.element.port_num = port_num;
ib_dispatch_event(&ibev);
}
break;
@ -167,7 +174,7 @@ static int mlx5_netdev_event(struct notifier_block *this,
default:
break;
}
done:
return NOTIFY_DONE;
}
@ -183,11 +190,11 @@ static struct net_device *mlx5_ib_get_netdev(struct ib_device *device,
/* Ensure ndev does not disappear before we invoke dev_hold()
*/
read_lock(&ibdev->roce.netdev_lock);
ndev = ibdev->roce.netdev;
read_lock(&ibdev->roce[port_num - 1].netdev_lock);
ndev = ibdev->roce[port_num - 1].netdev;
if (ndev)
dev_hold(ndev);
read_unlock(&ibdev->roce.netdev_lock);
read_unlock(&ibdev->roce[port_num - 1].netdev_lock);
return ndev;
}
@ -3579,33 +3586,33 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev)
}
}
static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev)
static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num)
{
int err;
dev->roce.nb.notifier_call = mlx5_netdev_event;
err = register_netdevice_notifier(&dev->roce.nb);
dev->roce[port_num].nb.notifier_call = mlx5_netdev_event;
err = register_netdevice_notifier(&dev->roce[port_num].nb);
if (err) {
dev->roce.nb.notifier_call = NULL;
dev->roce[port_num].nb.notifier_call = NULL;
return err;
}
return 0;
}
static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev)
static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num)
{
if (dev->roce.nb.notifier_call) {
unregister_netdevice_notifier(&dev->roce.nb);
dev->roce.nb.notifier_call = NULL;
if (dev->roce[port_num].nb.notifier_call) {
unregister_netdevice_notifier(&dev->roce[port_num].nb);
dev->roce[port_num].nb.notifier_call = NULL;
}
}
static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
static int mlx5_enable_eth(struct mlx5_ib_dev *dev, u8 port_num)
{
int err;
err = mlx5_add_netdev_notifier(dev);
err = mlx5_add_netdev_notifier(dev, port_num);
if (err)
return err;
@ -3626,7 +3633,7 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
mlx5_nic_vport_disable_roce(dev->mdev);
err_unregister_netdevice_notifier:
mlx5_remove_netdev_notifier(dev);
mlx5_remove_netdev_notifier(dev, port_num);
return err;
}
@ -4066,7 +4073,6 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
if (!dev->port)
return -ENOMEM;
rwlock_init(&dev->roce.netdev_lock);
err = get_port_caps(dev);
if (err)
goto err_free_port;
@ -4246,12 +4252,21 @@ static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
struct mlx5_core_dev *mdev = dev->mdev;
enum rdma_link_layer ll;
int port_type_cap;
u8 port_num = 0;
int err;
int i;
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
if (ll == IB_LINK_LAYER_ETHERNET) {
for (i = 0; i < dev->num_ports; i++) {
rwlock_init(&dev->roce[i].netdev_lock);
dev->roce[i].dev = dev;
dev->roce[i].native_port_num = i + 1;
dev->roce[i].last_port_state = IB_PORT_DOWN;
}
dev->ib_dev.get_netdev = mlx5_ib_get_netdev;
dev->ib_dev.create_wq = mlx5_ib_create_wq;
dev->ib_dev.modify_wq = mlx5_ib_modify_wq;
@ -4264,10 +4279,9 @@ static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
err = mlx5_enable_eth(dev);
err = mlx5_enable_eth(dev, port_num);
if (err)
return err;
dev->roce.last_port_state = IB_PORT_DOWN;
}
return 0;
@ -4278,13 +4292,14 @@ static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev)
struct mlx5_core_dev *mdev = dev->mdev;
enum rdma_link_layer ll;
int port_type_cap;
u8 port_num = 0;
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
if (ll == IB_LINK_LAYER_ETHERNET) {
mlx5_disable_eth(dev);
mlx5_remove_netdev_notifier(dev);
mlx5_remove_netdev_notifier(dev, port_num);
}
}

View File

@ -667,6 +667,8 @@ struct mlx5_roce {
struct notifier_block nb;
atomic_t next_port;
enum ib_port_state last_port_state;
struct mlx5_ib_dev *dev;
u8 native_port_num;
};
struct mlx5_ib_dbg_param {
@ -757,7 +759,7 @@ struct mlx5_ib_profile {
struct mlx5_ib_dev {
struct ib_device ib_dev;
struct mlx5_core_dev *mdev;
struct mlx5_roce roce;
struct mlx5_roce roce[MLX5_MAX_PORTS];
int num_ports;
/* serialize update of capability mask
*/

View File

@ -2962,8 +2962,9 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
(ibqp->qp_type == IB_QPT_XRC_INI) ||
(ibqp->qp_type == IB_QPT_XRC_TGT)) {
if (mlx5_lag_is_active(dev->mdev)) {
u8 p = mlx5_core_native_port_num(dev->mdev);
tx_affinity = (unsigned int)atomic_add_return(1,
&dev->roce.next_port) %
&dev->roce[p].next_port) %
MLX5_MAX_PORTS + 1;
context->flags |= cpu_to_be32(tx_affinity << 24);
}

View File

@ -1234,6 +1234,11 @@ static inline bool mlx5_rl_is_supported(struct mlx5_core_dev *dev)
return !!(dev->priv.rl_table.max_size);
}
static inline int mlx5_core_native_port_num(struct mlx5_core_dev *dev)
{
return 1;
}
enum {
MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32,
};