Merge remote-tracking branch 'mlx5-next/mlx5-next' into HEAD

Take mlx5-next so we can take a dependent two patch series next. Signed-off-by: Doug Ledford <dledford@redhat.com>
2024-12-23 14:32:56 +07:00 · 2019-06-18 22:44:36 -04:00 · 2019-06-18 22:44:36 -04:00 · 12dbc04db0
commit 12dbc04db0
parent 7608bf40cf 82b11f0719
30 changed files with 865 additions and 501 deletions
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@ -60,7 +60,7 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 	if (!__mlx5_ib_add(ibdev, profile))
 		return -EINVAL;

-	rep->rep_if[REP_IB].priv = ibdev;
+	rep->rep_data[REP_IB].priv = ibdev;

 	return 0;
 }
@ -70,13 +70,13 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
 {
 	struct mlx5_ib_dev *dev;

-	if (!rep->rep_if[REP_IB].priv ||
+	if (!rep->rep_data[REP_IB].priv ||
 	    rep->vport != MLX5_VPORT_UPLINK)
 		return;

 	dev = mlx5_ib_rep_to_dev(rep);
 	__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
-	rep->rep_if[REP_IB].priv = NULL;
+	rep->rep_data[REP_IB].priv = NULL;
 }

 static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep)
@ -84,16 +84,17 @@ static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep)
 	return mlx5_ib_rep_to_dev(rep);
 }

+static const struct mlx5_eswitch_rep_ops rep_ops = {
+	.load = mlx5_ib_vport_rep_load,
+	.unload = mlx5_ib_vport_rep_unload,
+	.get_proto_dev = mlx5_ib_vport_get_proto_dev,
+};
+
 void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev)
 {
 	struct mlx5_eswitch *esw = mdev->priv.eswitch;
-	struct mlx5_eswitch_rep_if rep_if = {};

-	rep_if.load = mlx5_ib_vport_rep_load;
-	rep_if.unload = mlx5_ib_vport_rep_unload;
-	rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev;
-
-	mlx5_eswitch_register_vport_reps(esw, &rep_if, REP_IB);
+	mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
 }

 void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev)
--- a/drivers/infiniband/hw/mlx5/ib_rep.h
+++ b/drivers/infiniband/hw/mlx5/ib_rep.h
@ -72,6 +72,6 @@ struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
 static inline
 struct mlx5_ib_dev *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep)
 {
-	return (struct mlx5_ib_dev *)rep->rep_if[REP_IB].priv;
+	return rep->rep_data[REP_IB].priv;
 }
 #endif /* __MLX5_IB_REP_H__ */
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@ -920,6 +920,7 @@ struct mlx5_ib_lb_state {
 };

 struct mlx5_ib_pf_eq {
+	struct notifier_block irq_nb;
 	struct mlx5_ib_dev *dev;
 	struct mlx5_eq *core;
 	struct work_struct work;
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@ -1485,9 +1485,11 @@ static void mlx5_ib_eq_pf_process(struct mlx5_ib_pf_eq *eq)
 	mlx5_eq_update_ci(eq->core, cc, 1);
 }

-static irqreturn_t mlx5_ib_eq_pf_int(int irq, void *eq_ptr)
+static int mlx5_ib_eq_pf_int(struct notifier_block *nb, unsigned long type,
+			     void *data)
 {
-	struct mlx5_ib_pf_eq *eq = eq_ptr;
+	struct mlx5_ib_pf_eq *eq =
+		container_of(nb, struct mlx5_ib_pf_eq, irq_nb);
 	unsigned long flags;

 	if (spin_trylock_irqsave(&eq->lock, flags)) {
@ -1550,20 +1552,26 @@ mlx5_ib_create_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
 		goto err_mempool;
 	}

+	eq->irq_nb.notifier_call = mlx5_ib_eq_pf_int;
 	param = (struct mlx5_eq_param) {
-		.index = MLX5_EQ_PFAULT_IDX,
+		.irq_index = 0,
 		.mask = 1 << MLX5_EVENT_TYPE_PAGE_FAULT,
 		.nent = MLX5_IB_NUM_PF_EQE,
-		.context = eq,
-		.handler = mlx5_ib_eq_pf_int
 	};
-	eq->core = mlx5_eq_create_generic(dev->mdev, "mlx5_ib_page_fault_eq", &param);
+	eq->core = mlx5_eq_create_generic(dev->mdev, &param);
 	if (IS_ERR(eq->core)) {
 		err = PTR_ERR(eq->core);
 		goto err_wq;
 	}
+	err = mlx5_eq_enable(dev->mdev, eq->core, &eq->irq_nb);
+	if (err) {
+		mlx5_ib_err(dev, "failed to enable odp EQ %d\n", err);
+		goto err_eq;
+	}

 	return 0;
+err_eq:
+	mlx5_eq_destroy_generic(dev->mdev, eq->core);
 err_wq:
 	destroy_workqueue(eq->wq);
 err_mempool:
@ -1576,6 +1584,7 @@ mlx5_ib_destroy_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
 {
 	int err;

+	mlx5_eq_disable(dev->mdev, eq->core, &eq->irq_nb);
 	err = mlx5_eq_destroy_generic(dev->mdev, eq->core);
 	cancel_work_sync(&eq->work);
 	destroy_workqueue(eq->wq);
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@ -13,7 +13,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
 #
 mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
 		health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \
-		transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
+		transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
 		fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
 		lib/devcom.o diag/fs_tracepoint.o diag/fw_tracer.o

--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@ -316,7 +316,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT:
 	case MLX5_CMD_OP_DEALLOC_MEMIC:
 	case MLX5_CMD_OP_PAGE_FAULT_RESUME:
-	case MLX5_CMD_OP_QUERY_HOST_PARAMS:
+	case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS:
 		return MLX5_CMD_STAT_OK;

 	case MLX5_CMD_OP_QUERY_HCA_CAP:
@ -628,7 +628,7 @@ const char *mlx5_command_str(int command)
 	MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT);
 	MLX5_COMMAND_STR_CASE(ALLOC_MEMIC);
 	MLX5_COMMAND_STR_CASE(DEALLOC_MEMIC);
-	MLX5_COMMAND_STR_CASE(QUERY_HOST_PARAMS);
+	MLX5_COMMAND_STR_CASE(QUERY_ESW_FUNCTIONS);
 	default: return "unknown command opcode";
 	}
 }
--- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
@ -83,30 +83,3 @@ void mlx5_ec_cleanup(struct mlx5_core_dev *dev)

 	mlx5_peer_pf_cleanup(dev);
 }
-
-static int mlx5_query_host_params_context(struct mlx5_core_dev *dev,
-					  u32 *out, int outlen)
-{
-	u32 in[MLX5_ST_SZ_DW(query_host_params_in)] = {};
-
-	MLX5_SET(query_host_params_in, in, opcode,
-		 MLX5_CMD_OP_QUERY_HOST_PARAMS);
-
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
-}
-
-int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf)
-{
-	u32 out[MLX5_ST_SZ_DW(query_host_params_out)] = {};
-	int err;
-
-	err = mlx5_query_host_params_context(dev, out, sizeof(out));
-	if (err)
-		return err;
-
-	*num_vf = MLX5_GET(query_host_params_out, out,
-			   host_params_context.host_num_of_vfs);
-	mlx5_core_dbg(dev, "host_num_of_vfs %d\n", *num_vf);
-
-	return 0;
-}
--- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h
@ -16,7 +16,6 @@ enum {
 bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev);
 int mlx5_ec_init(struct mlx5_core_dev *dev);
 void mlx5_ec_cleanup(struct mlx5_core_dev *dev);
-int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf);

 #else  /* CONFIG_MLX5_ESWITCH */

@ -24,9 +23,6 @@ static inline bool
 mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) { return false; }
 static inline int mlx5_ec_init(struct mlx5_core_dev *dev) { return 0; }
 static inline void mlx5_ec_cleanup(struct mlx5_core_dev *dev) {}
-static inline int
-mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf)
-{ return -EOPNOTSUPP; }

 #endif /* CONFIG_MLX5_ESWITCH */

--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@ -1752,7 +1752,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 	}

 	rpriv->netdev = netdev;
-	rep->rep_if[REP_ETH].priv = rpriv;
+	rep->rep_data[REP_ETH].priv = rpriv;
 	INIT_LIST_HEAD(&rpriv->vport_sqs_list);

 	if (rep->vport == MLX5_VPORT_UPLINK) {
@ -1826,16 +1826,17 @@ static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep)
 	return rpriv->netdev;
 }

+static const struct mlx5_eswitch_rep_ops rep_ops = {
+	.load = mlx5e_vport_rep_load,
+	.unload = mlx5e_vport_rep_unload,
+	.get_proto_dev = mlx5e_vport_rep_get_proto_dev
+};
+
 void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev)
 {
 	struct mlx5_eswitch *esw = mdev->priv.eswitch;
-	struct mlx5_eswitch_rep_if rep_if = {};

-	rep_if.load = mlx5e_vport_rep_load;
-	rep_if.unload = mlx5e_vport_rep_unload;
-	rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
-
-	mlx5_eswitch_register_vport_reps(esw, &rep_if, REP_ETH);
+	mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_ETH);
 }

 void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@ -91,7 +91,7 @@ struct mlx5e_rep_priv {
 static inline
 struct mlx5e_rep_priv *mlx5e_rep_to_rep_priv(struct mlx5_eswitch_rep *rep)
 {
-	return (struct mlx5e_rep_priv *)rep->rep_if[REP_ETH].priv;
+	return rep->rep_data[REP_ETH].priv;
 }

 struct mlx5e_neigh {
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@ -61,17 +61,21 @@ enum {
 	MLX5_EQ_DOORBEL_OFFSET	= 0x40,
 };

-struct mlx5_irq_info {
-	cpumask_var_t mask;
-	char name[MLX5_MAX_IRQ_NAME];
-	void *context; /* dev_id provided to request_irq */
+/* budget must be smaller than MLX5_NUM_SPARE_EQE to guarantee that we update
+ * the ci before we polled all the entries in the EQ. MLX5_NUM_SPARE_EQE is
+ * used to set the EQ size, budget must be smaller than the EQ size.
+ */
+enum {
+	MLX5_EQ_POLLING_BUDGET	= 128,
 };

+static_assert(MLX5_EQ_POLLING_BUDGET <= MLX5_NUM_SPARE_EQE);
+
 struct mlx5_eq_table {
 	struct list_head        comp_eqs_list;
-	struct mlx5_eq          pages_eq;
-	struct mlx5_eq	        cmd_eq;
-	struct mlx5_eq          async_eq;
+	struct mlx5_eq_async    pages_eq;
+	struct mlx5_eq_async    cmd_eq;
+	struct mlx5_eq_async    async_eq;

 	struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX];

@ -79,11 +83,8 @@ struct mlx5_eq_table {
 	struct mlx5_nb          cq_err_nb;

 	struct mutex            lock; /* sync async eqs creations */
-	int			num_comp_vectors;
-	struct mlx5_irq_info	*irq_info;
-#ifdef CONFIG_RFS_ACCEL
-	struct cpu_rmap         *rmap;
-#endif
+	int			num_comp_eqs;
+	struct mlx5_irq_table	*irq_table;
 };

 #define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG)	    | \
@ -124,16 +125,24 @@ static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
 	return cq;
 }

-static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr)
+static int mlx5_eq_comp_int(struct notifier_block *nb,
+			    __always_unused unsigned long action,
+			    __always_unused void *data)
 {
-	struct mlx5_eq_comp *eq_comp = eq_ptr;
-	struct mlx5_eq *eq = eq_ptr;
+	struct mlx5_eq_comp *eq_comp =
+		container_of(nb, struct mlx5_eq_comp, irq_nb);
+	struct mlx5_eq *eq = &eq_comp->core;
 	struct mlx5_eqe *eqe;
-	int set_ci = 0;
+	int num_eqes = 0;
 	u32 cqn = -1;

-	while ((eqe = next_eqe_sw(eq))) {
+	eqe = next_eqe_sw(eq);
+	if (!eqe)
+		goto out;
+
+	do {
 		struct mlx5_core_cq *cq;
+
 		/* Make sure we read EQ entry contents after we've
 		 * checked the ownership bit.
 		 */
@ -151,26 +160,16 @@ static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr)
 		}

 		++eq->cons_index;
-		++set_ci;

-		/* The HCA will think the queue has overflowed if we
-		 * don't tell it we've been processing events.  We
-		 * create our EQs with MLX5_NUM_SPARE_EQE extra
-		 * entries, so we must update our consumer index at
-		 * least that often.
-		 */
-		if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
-			eq_update_ci(eq, 0);
-			set_ci = 0;
-		}
-	}
+	} while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));

+out:
 	eq_update_ci(eq, 1);

 	if (cqn != -1)
 		tasklet_schedule(&eq_comp->tasklet_ctx.task);

-	return IRQ_HANDLED;
+	return 0;
 }

 /* Some architectures don't latch interrupts when they are disabled, so using
@ -184,25 +183,32 @@ u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq)

 	disable_irq(eq->core.irqn);
 	count_eqe = eq->core.cons_index;
-	mlx5_eq_comp_int(eq->core.irqn, eq);
+	mlx5_eq_comp_int(&eq->irq_nb, 0, NULL);
 	count_eqe = eq->core.cons_index - count_eqe;
 	enable_irq(eq->core.irqn);

 	return count_eqe;
 }

-static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr)
+static int mlx5_eq_async_int(struct notifier_block *nb,
+			     unsigned long action, void *data)
 {
-	struct mlx5_eq *eq = eq_ptr;
+	struct mlx5_eq_async *eq_async =
+		container_of(nb, struct mlx5_eq_async, irq_nb);
+	struct mlx5_eq *eq = &eq_async->core;
 	struct mlx5_eq_table *eqt;
 	struct mlx5_core_dev *dev;
 	struct mlx5_eqe *eqe;
-	int set_ci = 0;
+	int num_eqes = 0;

 	dev = eq->dev;
 	eqt = dev->priv.eq_table;

-	while ((eqe = next_eqe_sw(eq))) {
+	eqe = next_eqe_sw(eq);
+	if (!eqe)
+		goto out;
+
+	do {
 		/*
 		 * Make sure we read EQ entry contents after we've
 		 * checked the ownership bit.
@ -217,23 +223,13 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr)
 		atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe);

 		++eq->cons_index;
-		++set_ci;

-		/* The HCA will think the queue has overflowed if we
-		 * don't tell it we've been processing events.  We
-		 * create our EQs with MLX5_NUM_SPARE_EQE extra
-		 * entries, so we must update our consumer index at
-		 * least that often.
-		 */
-		if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
-			eq_update_ci(eq, 0);
-			set_ci = 0;
-		}
-	}
+	} while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));

+out:
 	eq_update_ci(eq, 1);

-	return IRQ_HANDLED;
+	return 0;
 }

 static void init_eq_buf(struct mlx5_eq *eq)
@ -248,23 +244,19 @@ static void init_eq_buf(struct mlx5_eq *eq)
 }

 static int
-create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name,
+create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
 	      struct mlx5_eq_param *param)
 {
-	struct mlx5_eq_table *eq_table = dev->priv.eq_table;
 	struct mlx5_cq_table *cq_table = &eq->cq_table;
 	u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
 	struct mlx5_priv *priv = &dev->priv;
-	u8 vecidx = param->index;
+	u8 vecidx = param->irq_index;
 	__be64 *pas;
 	void *eqc;
 	int inlen;
 	u32 *in;
 	int err;

-	if (eq_table->irq_info[vecidx].context)
-		return -EEXIST;
-
 	/* Init CQ table */
 	memset(cq_table, 0, sizeof(*cq_table));
 	spin_lock_init(&cq_table->lock);
@ -307,34 +299,19 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name,
 	if (err)
 		goto err_in;

-	snprintf(eq_table->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s",
-		 name, pci_name(dev->pdev));
-	eq_table->irq_info[vecidx].context = param->context;
-
 	eq->vecidx = vecidx;
 	eq->eqn = MLX5_GET(create_eq_out, out, eq_number);
 	eq->irqn = pci_irq_vector(dev->pdev, vecidx);
 	eq->dev = dev;
 	eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET;
-	err = request_irq(eq->irqn, param->handler, 0,
-			  eq_table->irq_info[vecidx].name, param->context);
-	if (err)
-		goto err_eq;

 	err = mlx5_debug_eq_add(dev, eq);
 	if (err)
-		goto err_irq;
-
-	/* EQs are created in ARMED state
-	 */
-	eq_update_ci(eq, 1);
+		goto err_eq;

 	kvfree(in);
 	return 0;

-err_irq:
-	free_irq(eq->irqn, eq);
-
 err_eq:
 	mlx5_cmd_destroy_eq(dev, eq->eqn);

@ -346,19 +323,49 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name,
 	return err;
 }

-static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+/**
+ * mlx5_eq_enable - Enable EQ for receiving EQEs
+ * @dev - Device which owns the eq
+ * @eq - EQ to enable
+ * @nb - notifier call block
+ * mlx5_eq_enable - must be called after EQ is created in device.
+ */
+int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+		   struct notifier_block *nb)
 {
 	struct mlx5_eq_table *eq_table = dev->priv.eq_table;
-	struct mlx5_irq_info *irq_info;
 	int err;

-	irq_info = &eq_table->irq_info[eq->vecidx];
+	err = mlx5_irq_attach_nb(eq_table->irq_table, eq->vecidx, nb);
+	if (!err)
+		eq_update_ci(eq, 1);
+
+	return err;
+}
+EXPORT_SYMBOL(mlx5_eq_enable);
+
+/**
+ * mlx5_eq_disable - Enable EQ for receiving EQEs
+ * @dev - Device which owns the eq
+ * @eq - EQ to disable
+ * @nb - notifier call block
+ * mlx5_eq_disable - must be called before EQ is destroyed.
+ */
+void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+		     struct notifier_block *nb)
+{
+	struct mlx5_eq_table *eq_table = dev->priv.eq_table;
+
+	mlx5_irq_detach_nb(eq_table->irq_table, eq->vecidx, nb);
+}
+EXPORT_SYMBOL(mlx5_eq_disable);
+
+static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+	int err;

 	mlx5_debug_eq_remove(dev, eq);

-	free_irq(eq->irqn, irq_info->context);
-	irq_info->context = NULL;
-
 	err = mlx5_cmd_destroy_eq(dev, eq->eqn);
 	if (err)
 		mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n",
@ -423,6 +430,7 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev)
 	for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++)
 		ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]);

+	eq_table->irq_table = dev->priv.irq_table;
 	return 0;

 kvfree_eq_table:
@ -439,19 +447,20 @@ void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev)

 /* Async EQs */

-static int create_async_eq(struct mlx5_core_dev *dev, const char *name,
+static int create_async_eq(struct mlx5_core_dev *dev,
 			   struct mlx5_eq *eq, struct mlx5_eq_param *param)
 {
 	struct mlx5_eq_table *eq_table = dev->priv.eq_table;
 	int err;

 	mutex_lock(&eq_table->lock);
-	if (param->index >= MLX5_EQ_MAX_ASYNC_EQS) {
-		err = -ENOSPC;
+	/* Async EQs must share irq index 0 */
+	if (param->irq_index != 0) {
+		err = -EINVAL;
 		goto unlock;
 	}

-	err = create_map_eq(dev, eq, name, param);
+	err = create_map_eq(dev, eq, param);
 unlock:
 	mutex_unlock(&eq_table->lock);
 	return err;
@ -480,7 +489,7 @@ static int cq_err_event_notifier(struct notifier_block *nb,
 	/* type == MLX5_EVENT_TYPE_CQ_ERROR */

 	eqt = mlx5_nb_cof(nb, struct mlx5_eq_table, cq_err_nb);
-	eq  = &eqt->async_eq;
+	eq  = &eqt->async_eq.core;
 	eqe = data;

 	cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
@ -533,8 +542,9 @@ static u64 gather_async_events_mask(struct mlx5_core_dev *dev)
 	if (MLX5_CAP_GEN(dev, max_num_of_monitor_counters))
 		async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER);

-	if (mlx5_core_is_ecpf_esw_manager(dev))
-		async_event_mask |= (1ull << MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE);
+	if (mlx5_eswitch_is_funcs_handler(dev))
+		async_event_mask |=
+			(1ull << MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED);

 	return async_event_mask;
 }
@ -548,55 +558,73 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
 	MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR);
 	mlx5_eq_notifier_register(dev, &table->cq_err_nb);

+	table->cmd_eq.irq_nb.notifier_call = mlx5_eq_async_int;
 	param = (struct mlx5_eq_param) {
-		.index = MLX5_EQ_CMD_IDX,
+		.irq_index = 0,
 		.mask = 1ull << MLX5_EVENT_TYPE_CMD,
 		.nent = MLX5_NUM_CMD_EQE,
-		.context = &table->cmd_eq,
-		.handler = mlx5_eq_async_int,
 	};
-	err = create_async_eq(dev, "mlx5_cmd_eq", &table->cmd_eq, &param);
+	err = create_async_eq(dev, &table->cmd_eq.core, &param);
 	if (err) {
 		mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err);
 		goto err0;
 	}
-
-	mlx5_cmd_use_events(dev);
-
-	param = (struct mlx5_eq_param) {
-		.index = MLX5_EQ_ASYNC_IDX,
-		.mask = gather_async_events_mask(dev),
-		.nent = MLX5_NUM_ASYNC_EQE,
-		.context = &table->async_eq,
-		.handler = mlx5_eq_async_int,
-	};
-	err = create_async_eq(dev, "mlx5_async_eq", &table->async_eq, &param);
+	err = mlx5_eq_enable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb);
 	if (err) {
-		mlx5_core_warn(dev, "failed to create async EQ %d\n", err);
+		mlx5_core_warn(dev, "failed to enable cmd EQ %d\n", err);
 		goto err1;
 	}
+	mlx5_cmd_use_events(dev);

+	table->async_eq.irq_nb.notifier_call = mlx5_eq_async_int;
 	param = (struct mlx5_eq_param) {
-		.index = MLX5_EQ_PAGEREQ_IDX,
+		.irq_index = 0,
+		.mask = gather_async_events_mask(dev),
+		.nent = MLX5_NUM_ASYNC_EQE,
+	};
+	err = create_async_eq(dev, &table->async_eq.core, &param);
+	if (err) {
+		mlx5_core_warn(dev, "failed to create async EQ %d\n", err);
+		goto err2;
+	}
+	err = mlx5_eq_enable(dev, &table->async_eq.core,
+			     &table->async_eq.irq_nb);
+	if (err) {
+		mlx5_core_warn(dev, "failed to enable async EQ %d\n", err);
+		goto err3;
+	}
+
+	table->pages_eq.irq_nb.notifier_call = mlx5_eq_async_int;
+	param = (struct mlx5_eq_param) {
+		.irq_index = 0,
 		.mask =  1 << MLX5_EVENT_TYPE_PAGE_REQUEST,
 		.nent = /* TODO: sriov max_vf + */ 1,
-		.context = &table->pages_eq,
-		.handler = mlx5_eq_async_int,
 	};
-	err = create_async_eq(dev, "mlx5_pages_eq", &table->pages_eq, &param);
+	err = create_async_eq(dev, &table->pages_eq.core, &param);
 	if (err) {
 		mlx5_core_warn(dev, "failed to create pages EQ %d\n", err);
-		goto err2;
+		goto err4;
+	}
+	err = mlx5_eq_enable(dev, &table->pages_eq.core,
+			     &table->pages_eq.irq_nb);
+	if (err) {
+		mlx5_core_warn(dev, "failed to enable pages EQ %d\n", err);
+		goto err5;
 	}

 	return err;

+err5:
+	destroy_async_eq(dev, &table->pages_eq.core);
+err4:
+	mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb);
+err3:
+	destroy_async_eq(dev, &table->async_eq.core);
 err2:
-	destroy_async_eq(dev, &table->async_eq);
-
-err1:
 	mlx5_cmd_use_polling(dev);
-	destroy_async_eq(dev, &table->cmd_eq);
+	mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb);
+err1:
+	destroy_async_eq(dev, &table->cmd_eq.core);
 err0:
 	mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
 	return err;
@ -607,19 +635,22 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev)
 	struct mlx5_eq_table *table = dev->priv.eq_table;
 	int err;

-	err = destroy_async_eq(dev, &table->pages_eq);
+	mlx5_eq_disable(dev, &table->pages_eq.core, &table->pages_eq.irq_nb);
+	err = destroy_async_eq(dev, &table->pages_eq.core);
 	if (err)
 		mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n",
 			      err);

-	err = destroy_async_eq(dev, &table->async_eq);
+	mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb);
+	err = destroy_async_eq(dev, &table->async_eq.core);
 	if (err)
 		mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n",
 			      err);

 	mlx5_cmd_use_polling(dev);

-	err = destroy_async_eq(dev, &table->cmd_eq);
+	mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb);
+	err = destroy_async_eq(dev, &table->cmd_eq.core);
 	if (err)
 		mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n",
 			      err);
@ -629,24 +660,24 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev)

 struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev)
 {
-	return &dev->priv.eq_table->async_eq;
+	return &dev->priv.eq_table->async_eq.core;
 }

 void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev)
 {
-	synchronize_irq(dev->priv.eq_table->async_eq.irqn);
+	synchronize_irq(dev->priv.eq_table->async_eq.core.irqn);
 }

 void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev)
 {
-	synchronize_irq(dev->priv.eq_table->cmd_eq.irqn);
+	synchronize_irq(dev->priv.eq_table->cmd_eq.core.irqn);
 }

 /* Generic EQ API for mlx5_core consumers
 * Needed For RDMA ODP EQ for now
 */
 struct mlx5_eq *
-mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name,
+mlx5_eq_create_generic(struct mlx5_core_dev *dev,
 		       struct mlx5_eq_param *param)
 {
 	struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL);
@ -655,7 +686,7 @@ mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name,
 	if (!eq)
 		return ERR_PTR(-ENOMEM);

-	err = create_async_eq(dev, name, eq, param);
+	err = create_async_eq(dev, eq, param);
 	if (err) {
 		kvfree(eq);
 		eq = ERR_PTR(err);
@ -713,84 +744,14 @@ void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm)
 }
 EXPORT_SYMBOL(mlx5_eq_update_ci);

-/* Completion EQs */
-
-static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
-{
-	struct mlx5_priv *priv  = &mdev->priv;
-	int vecidx = MLX5_EQ_VEC_COMP_BASE + i;
-	int irq = pci_irq_vector(mdev->pdev, vecidx);
-	struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx];
-
-	if (!zalloc_cpumask_var(&irq_info->mask, GFP_KERNEL)) {
-		mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
-		return -ENOMEM;
-	}
-
-	cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
-			irq_info->mask);
-
-	if (IS_ENABLED(CONFIG_SMP) &&
-	    irq_set_affinity_hint(irq, irq_info->mask))
-		mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
-
-	return 0;
-}
-
-static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
-{
-	int vecidx = MLX5_EQ_VEC_COMP_BASE + i;
-	struct mlx5_priv *priv  = &mdev->priv;
-	int irq = pci_irq_vector(mdev->pdev, vecidx);
-	struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx];
-
-	irq_set_affinity_hint(irq, NULL);
-	free_cpumask_var(irq_info->mask);
-}
-
-static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev)
-{
-	int err;
-	int i;
-
-	for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) {
-		err = set_comp_irq_affinity_hint(mdev, i);
-		if (err)
-			goto err_out;
-	}
-
-	return 0;
-
-err_out:
-	for (i--; i >= 0; i--)
-		clear_comp_irq_affinity_hint(mdev, i);
-
-	return err;
-}
-
-static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev)
-{
-	int i;
-
-	for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++)
-		clear_comp_irq_affinity_hint(mdev, i);
-}
-
 static void destroy_comp_eqs(struct mlx5_core_dev *dev)
 {
 	struct mlx5_eq_table *table = dev->priv.eq_table;
 	struct mlx5_eq_comp *eq, *n;

-	clear_comp_irqs_affinity_hints(dev);
-
-#ifdef CONFIG_RFS_ACCEL
-	if (table->rmap) {
-		free_irq_cpu_rmap(table->rmap);
-		table->rmap = NULL;
-	}
-#endif
 	list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
 		list_del(&eq->list);
+		mlx5_eq_disable(dev, &eq->core, &eq->irq_nb);
 		if (destroy_unmap_eq(dev, &eq->core))
 			mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n",
 				       eq->core.eqn);
@ -802,23 +763,17 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev)
 static int create_comp_eqs(struct mlx5_core_dev *dev)
 {
 	struct mlx5_eq_table *table = dev->priv.eq_table;
-	char name[MLX5_MAX_IRQ_NAME];
 	struct mlx5_eq_comp *eq;
-	int ncomp_vec;
+	int ncomp_eqs;
 	int nent;
 	int err;
 	int i;

 	INIT_LIST_HEAD(&table->comp_eqs_list);
-	ncomp_vec = table->num_comp_vectors;
+	ncomp_eqs = table->num_comp_eqs;
 	nent = MLX5_COMP_EQ_SIZE;
-#ifdef CONFIG_RFS_ACCEL
-	table->rmap = alloc_irq_cpu_rmap(ncomp_vec);
-	if (!table->rmap)
-		return -ENOMEM;
-#endif
-	for (i = 0; i < ncomp_vec; i++) {
-		int vecidx = i + MLX5_EQ_VEC_COMP_BASE;
+	for (i = 0; i < ncomp_eqs; i++) {
+		int vecidx = i + MLX5_IRQ_VEC_COMP_BASE;
 		struct mlx5_eq_param param = {};

 		eq = kzalloc(sizeof(*eq), GFP_KERNEL);
@ -833,33 +788,29 @@ static int create_comp_eqs(struct mlx5_core_dev *dev)
 		tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
 			     (unsigned long)&eq->tasklet_ctx);

-#ifdef CONFIG_RFS_ACCEL
-		irq_cpu_rmap_add(table->rmap, pci_irq_vector(dev->pdev, vecidx));
-#endif
-		snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i);
+		eq->irq_nb.notifier_call = mlx5_eq_comp_int;
 		param = (struct mlx5_eq_param) {
-			.index = vecidx,
+			.irq_index = vecidx,
 			.mask = 0,
 			.nent = nent,
-			.context = &eq->core,
-			.handler = mlx5_eq_comp_int
 		};
-		err = create_map_eq(dev, &eq->core, name, &param);
+		err = create_map_eq(dev, &eq->core, &param);
 		if (err) {
 			kfree(eq);
 			goto clean;
 		}
+		err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb);
+		if (err) {
+			destroy_unmap_eq(dev, &eq->core);
+			kfree(eq);
+			goto clean;
+		}
+
 		mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn);
 		/* add tail, to keep the list ordered, for mlx5_vector2eqn to work */
 		list_add_tail(&eq->list, &table->comp_eqs_list);
 	}

-	err = set_comp_irq_affinity_hints(dev);
-	if (err) {
-		mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n");
-		goto clean;
-	}
-
 	return 0;

 clean:
@ -890,22 +841,24 @@ EXPORT_SYMBOL(mlx5_vector2eqn);

 unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev)
 {
-	return dev->priv.eq_table->num_comp_vectors;
+	return dev->priv.eq_table->num_comp_eqs;
 }
 EXPORT_SYMBOL(mlx5_comp_vectors_count);

 struct cpumask *
 mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector)
 {
-	/* TODO: consider irq_get_affinity_mask(irq) */
-	return dev->priv.eq_table->irq_info[vector + MLX5_EQ_VEC_COMP_BASE].mask;
+	int vecidx = vector + MLX5_IRQ_VEC_COMP_BASE;
+
+	return mlx5_irq_get_affinity_mask(dev->priv.eq_table->irq_table,
+					  vecidx);
 }
 EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask);

 #ifdef CONFIG_RFS_ACCEL
 struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev)
 {
-	return dev->priv.eq_table->rmap;
+	return mlx5_irq_get_rmap(dev->priv.eq_table->irq_table);
 }
 #endif

@ -926,82 +879,19 @@ struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn)
 void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
 {
 	struct mlx5_eq_table *table = dev->priv.eq_table;
-	int i, max_eqs;
-
-	clear_comp_irqs_affinity_hints(dev);
-
-#ifdef CONFIG_RFS_ACCEL
-	if (table->rmap) {
-		free_irq_cpu_rmap(table->rmap);
-		table->rmap = NULL;
-	}
-#endif

 	mutex_lock(&table->lock); /* sync with create/destroy_async_eq */
-	max_eqs = table->num_comp_vectors + MLX5_EQ_VEC_COMP_BASE;
-	for (i = max_eqs - 1; i >= 0; i--) {
-		if (!table->irq_info[i].context)
-			continue;
-		free_irq(pci_irq_vector(dev->pdev, i), table->irq_info[i].context);
-		table->irq_info[i].context = NULL;
-	}
+	mlx5_irq_table_destroy(dev);
 	mutex_unlock(&table->lock);
-	pci_free_irq_vectors(dev->pdev);
-}
-
-static int alloc_irq_vectors(struct mlx5_core_dev *dev)
-{
-	struct mlx5_priv *priv = &dev->priv;
-	struct mlx5_eq_table *table = priv->eq_table;
-	int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
-		      MLX5_CAP_GEN(dev, max_num_eqs) :
-		      1 << MLX5_CAP_GEN(dev, log_max_eq);
-	int nvec;
-	int err;
-
-	nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
-	       MLX5_EQ_VEC_COMP_BASE;
-	nvec = min_t(int, nvec, num_eqs);
-	if (nvec <= MLX5_EQ_VEC_COMP_BASE)
-		return -ENOMEM;
-
-	table->irq_info = kcalloc(nvec, sizeof(*table->irq_info), GFP_KERNEL);
-	if (!table->irq_info)
-		return -ENOMEM;
-
-	nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1,
-				     nvec, PCI_IRQ_MSIX);
-	if (nvec < 0) {
-		err = nvec;
-		goto err_free_irq_info;
-	}
-
-	table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE;
-
-	return 0;
-
-err_free_irq_info:
-	kfree(table->irq_info);
-	return err;
-}
-
-static void free_irq_vectors(struct mlx5_core_dev *dev)
-{
-	struct mlx5_priv *priv = &dev->priv;
-
-	pci_free_irq_vectors(dev->pdev);
-	kfree(priv->eq_table->irq_info);
 }

 int mlx5_eq_table_create(struct mlx5_core_dev *dev)
 {
+	struct mlx5_eq_table *eq_table = dev->priv.eq_table;
 	int err;

-	err = alloc_irq_vectors(dev);
-	if (err) {
-		mlx5_core_err(dev, "alloc irq vectors failed\n");
-		return err;
-	}
+	eq_table->num_comp_eqs =
+		mlx5_irq_get_num_comp(eq_table->irq_table);

 	err = create_async_eqs(dev);
 	if (err) {
@ -1019,7 +909,6 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev)
 err_comp_eqs:
 	destroy_async_eqs(dev);
 err_async_eqs:
-	free_irq_vectors(dev);
 	return err;
 }

@ -1027,7 +916,6 @@ void mlx5_eq_table_destroy(struct mlx5_core_dev *dev)
 {
 	destroy_comp_eqs(dev);
 	destroy_async_eqs(dev);
-	free_irq_vectors(dev);
 }

 int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@ -1686,13 +1686,23 @@ static int eswitch_vport_event(struct notifier_block *nb,
 	return NOTIFY_OK;
 }

+int mlx5_esw_query_functions(struct mlx5_core_dev *dev, u32 *out, int outlen)
+{
+	u32 in[MLX5_ST_SZ_DW(query_esw_functions_in)] = {};
+
+	MLX5_SET(query_esw_functions_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_ESW_FUNCTIONS);
+
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+
 /* Public E-Switch API */
 #define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev))

 int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 {
-	int vf_nvports = 0, total_nvports = 0;
 	struct mlx5_vport *vport;
+	int total_nvports = 0;
 	int err;
 	int i, enabled_events;

@ -1711,15 +1721,10 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 	esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode);

 	if (mode == SRIOV_OFFLOADS) {
-		if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
-			err = mlx5_query_host_params_num_vfs(esw->dev, &vf_nvports);
-			if (err)
-				return err;
+		if (mlx5_core_is_ecpf_esw_manager(esw->dev))
 			total_nvports = esw->total_vports;
-		} else {
-			vf_nvports = nvfs;
+		else
 			total_nvports = nvfs + MLX5_SPECIAL_VPORTS(esw->dev);
-		}
 	}

 	esw->mode = mode;
@ -1733,7 +1738,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 	} else {
 		mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
 		mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
-		err = esw_offloads_init(esw, vf_nvports, total_nvports);
+		err = esw_offloads_init(esw, nvfs, total_nvports);
 	}

 	if (err)
@ -2452,6 +2457,17 @@ u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw)
 }
 EXPORT_SYMBOL_GPL(mlx5_eswitch_mode);

+enum devlink_eswitch_encap_mode
+mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev)
+{
+	struct mlx5_eswitch *esw;
+
+	esw = dev->priv.eswitch;
+	return ESW_ALLOWED(esw) ? esw->offloads.encap :
+		DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_encap_mode);
+
 bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1)
 {
 	if ((dev0->priv.eswitch->mode == SRIOV_NONE &&
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@ -173,9 +173,10 @@ struct mlx5_esw_offload {
 	struct mutex peer_mutex;
 	DECLARE_HASHTABLE(encap_tbl, 8);
 	DECLARE_HASHTABLE(mod_hdr_tbl, 8);
+	const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES];
 	u8 inline_mode;
 	u64 num_flows;
-	u8 encap;
+	enum devlink_eswitch_encap_mode encap;
 };

 /* E-Switch MC FDB table hash node */
@ -190,7 +191,7 @@ struct mlx5_host_work {
 	struct mlx5_eswitch	*esw;
 };

-struct mlx5_host_info {
+struct mlx5_esw_functions {
 	struct mlx5_nb		nb;
 	u16			num_vfs;
 };
@ -219,7 +220,7 @@ struct mlx5_eswitch {
 	int                     mode;
 	int                     nvports;
 	u16                     manager_vport;
-	struct mlx5_host_info	host_info;
+	struct mlx5_esw_functions esw_funcs;
 };

 void esw_offloads_cleanup(struct mlx5_eswitch *esw);
@ -356,9 +357,11 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
 					 struct netlink_ext_ack *extack);
 int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode);
 int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode);
-int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap,
+int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
+					enum devlink_eswitch_encap_mode encap,
 					struct netlink_ext_ack *extack);
-int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap);
+int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
+					enum devlink_eswitch_encap_mode *encap);
 void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);

 int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
@ -386,6 +389,8 @@ bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0,
 bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0,
 			       struct mlx5_core_dev *dev1);

+int mlx5_esw_query_functions(struct mlx5_core_dev *dev, u32 *out, int outlen);
+
 #define MLX5_DEBUG_ESWITCH_MASK BIT(3)

 #define esw_info(__dev, format, ...)			\
@ -404,6 +409,18 @@ static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev)
 		MLX5_VPORT_ECPF : MLX5_VPORT_PF;
 }

+static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev)
+{
+	/* Ideally device should have the functions changed supported
+	 * capability regardless of it being ECPF or PF wherever such
+	 * event should be processed such as on eswitch manager device.
+	 * However, some ECPF based device might not have this capability
+	 * set. Hence OR for ECPF check to cover such device.
+	 */
+	return MLX5_CAP_ESW(dev, esw_functions_changed) ||
+	       mlx5_core_is_ecpf_esw_manager(dev);
+}
+
 static inline int mlx5_eswitch_uplink_idx(struct mlx5_eswitch *esw)
 {
 	/* Uplink always locate at the last element of the array.*/
@ -498,6 +515,12 @@ static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {}
 static inline int  mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; }
 static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {}
 static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; }
+static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; }
+static inline int
+mlx5_esw_query_functions(struct mlx5_core_dev *dev, u32 *out, int outlen)
+{
+	return -EOPNOTSUPP;
+}

 #define FDB_MAX_CHAIN 1
 #define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1)
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@ -41,7 +41,6 @@
 #include "en.h"
 #include "fs_core.h"
 #include "lib/devcom.h"
-#include "ecpf.h"
 #include "lib/eq.h"

 /* There are two match-all miss flows, one for unicast dst mac and
@ -333,7 +332,7 @@ static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val)
 	esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none");
 	for (vf_vport = 1; vf_vport < esw->enabled_vports; vf_vport++) {
 		rep = &esw->offloads.vport_reps[vf_vport];
-		if (atomic_read(&rep->rep_if[REP_ETH].state) != REP_LOADED)
+		if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED)
 			continue;

 		err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val);
@ -1277,7 +1276,7 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw)
 		ether_addr_copy(rep->hw_id, hw_id);

 		for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
-			atomic_set(&rep->rep_if[rep_type].state,
+			atomic_set(&rep->rep_data[rep_type].state,
 				   REP_UNREGISTERED);
 	}

@ -1287,9 +1286,9 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw)
 static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw,
 				      struct mlx5_eswitch_rep *rep, u8 rep_type)
 {
-	if (atomic_cmpxchg(&rep->rep_if[rep_type].state,
+	if (atomic_cmpxchg(&rep->rep_data[rep_type].state,
 			   REP_LOADED, REP_REGISTERED) == REP_LOADED)
-		rep->rep_if[rep_type].unload(rep);
+		esw->offloads.rep_ops[rep_type]->unload(rep);
 }

 static void __unload_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type)
@ -1350,11 +1349,11 @@ static int __esw_offloads_load_rep(struct mlx5_eswitch *esw,
 {
 	int err = 0;

-	if (atomic_cmpxchg(&rep->rep_if[rep_type].state,
+	if (atomic_cmpxchg(&rep->rep_data[rep_type].state,
 			   REP_REGISTERED, REP_LOADED) == REP_REGISTERED) {
-		err = rep->rep_if[rep_type].load(esw->dev, rep);
+		err = esw->offloads.rep_ops[rep_type]->load(esw->dev, rep);
 		if (err)
-			atomic_set(&rep->rep_if[rep_type].state,
+			atomic_set(&rep->rep_data[rep_type].state,
 				   REP_REGISTERED);
 	}

@ -1437,34 +1436,13 @@ static int esw_offloads_load_vf_reps(struct mlx5_eswitch *esw, int nvports)
 	return err;
 }

-static int __load_reps_all_vport(struct mlx5_eswitch *esw, int nvports,
-				 u8 rep_type)
-{
-	int err;
-
-	/* Special vports must be loaded first. */
-	err = __load_reps_special_vport(esw, rep_type);
-	if (err)
-		return err;
-
-	err = __load_reps_vf_vport(esw, nvports, rep_type);
-	if (err)
-		goto err_vfs;
-
-	return 0;
-
-err_vfs:
-	__unload_reps_special_vport(esw, rep_type);
-	return err;
-}
-
-static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw, int nvports)
+static int esw_offloads_load_special_vport(struct mlx5_eswitch *esw)
 {
 	u8 rep_type = 0;
 	int err;

 	for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
-		err = __load_reps_all_vport(esw, nvports, rep_type);
+		err = __load_reps_special_vport(esw, rep_type);
 		if (err)
 			goto err_reps;
 	}
@ -1473,7 +1451,7 @@ static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw, int nvports)

 err_reps:
 	while (rep_type-- > 0)
-		__unload_reps_all_vport(esw, nvports, rep_type);
+		__unload_reps_special_vport(esw, rep_type);
 	return err;
 }

@ -1782,57 +1760,102 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw)
 		esw_prio_tag_acls_cleanup(esw);
 }

-static void esw_host_params_event_handler(struct work_struct *work)
+static void esw_functions_changed_event_handler(struct work_struct *work)
 {
+	u32 out[MLX5_ST_SZ_DW(query_esw_functions_out)] = {};
 	struct mlx5_host_work *host_work;
 	struct mlx5_eswitch *esw;
-	int err, num_vf = 0;
+	u16 num_vfs = 0;
+	int err;

 	host_work = container_of(work, struct mlx5_host_work, work);
 	esw = host_work->esw;

-	err = mlx5_query_host_params_num_vfs(esw->dev, &num_vf);
-	if (err || num_vf == esw->host_info.num_vfs)
+	err = mlx5_esw_query_functions(esw->dev, out, sizeof(out));
+	num_vfs = MLX5_GET(query_esw_functions_out, out,
+			   host_params_context.host_num_of_vfs);
+	if (err || num_vfs == esw->esw_funcs.num_vfs)
 		goto out;

 	/* Number of VFs can only change from "0 to x" or "x to 0". */
-	if (esw->host_info.num_vfs > 0) {
-		esw_offloads_unload_vf_reps(esw, esw->host_info.num_vfs);
+	if (esw->esw_funcs.num_vfs > 0) {
+		esw_offloads_unload_vf_reps(esw, esw->esw_funcs.num_vfs);
 	} else {
-		err = esw_offloads_load_vf_reps(esw, num_vf);
+		err = esw_offloads_load_vf_reps(esw, num_vfs);

 		if (err)
 			goto out;
 	}

-	esw->host_info.num_vfs = num_vf;
+	esw->esw_funcs.num_vfs = num_vfs;

 out:
 	kfree(host_work);
 }

-static int esw_host_params_event(struct notifier_block *nb,
-				 unsigned long type, void *data)
+static void esw_emulate_event_handler(struct work_struct *work)
 {
+	struct mlx5_host_work *host_work =
+		container_of(work, struct mlx5_host_work, work);
+	struct mlx5_eswitch *esw = host_work->esw;
+	int err;
+
+	if (esw->esw_funcs.num_vfs) {
+		err = esw_offloads_load_vf_reps(esw, esw->esw_funcs.num_vfs);
+		if (err)
+			esw_warn(esw->dev, "Load vf reps err=%d\n", err);
+	}
+	kfree(host_work);
+}
+
+static int esw_functions_changed_event(struct notifier_block *nb,
+				       unsigned long type, void *data)
+{
+	struct mlx5_esw_functions *esw_funcs;
 	struct mlx5_host_work *host_work;
-	struct mlx5_host_info *host_info;
 	struct mlx5_eswitch *esw;

 	host_work = kzalloc(sizeof(*host_work), GFP_ATOMIC);
 	if (!host_work)
 		return NOTIFY_DONE;

-	host_info = mlx5_nb_cof(nb, struct mlx5_host_info, nb);
-	esw = container_of(host_info, struct mlx5_eswitch, host_info);
+	esw_funcs = mlx5_nb_cof(nb, struct mlx5_esw_functions, nb);
+	esw = container_of(esw_funcs, struct mlx5_eswitch, esw_funcs);

 	host_work->esw = esw;

-	INIT_WORK(&host_work->work, esw_host_params_event_handler);
+	if (mlx5_eswitch_is_funcs_handler(esw->dev))
+		INIT_WORK(&host_work->work,
+			  esw_functions_changed_event_handler);
+	else
+		INIT_WORK(&host_work->work, esw_emulate_event_handler);
 	queue_work(esw->work_queue, &host_work->work);

 	return NOTIFY_OK;
 }

+static void esw_functions_changed_event_init(struct mlx5_eswitch *esw,
+					     u16 vf_nvports)
+{
+	if (mlx5_eswitch_is_funcs_handler(esw->dev)) {
+		esw->esw_funcs.num_vfs = 0;
+		MLX5_NB_INIT(&esw->esw_funcs.nb, esw_functions_changed_event,
+			     ESW_FUNCTIONS_CHANGED);
+		mlx5_eq_notifier_register(esw->dev, &esw->esw_funcs.nb);
+	} else {
+		esw->esw_funcs.num_vfs = vf_nvports;
+	}
+}
+
+static void esw_functions_changed_event_cleanup(struct mlx5_eswitch *esw)
+{
+	if (!mlx5_eswitch_is_funcs_handler(esw->dev))
+		return;
+
+	mlx5_eq_notifier_unregister(esw->dev, &esw->esw_funcs.nb);
+	flush_workqueue(esw->work_queue);
+}
+
 int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports,
 		      int total_nvports)
 {
@ -1842,21 +1865,30 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports,
 	if (err)
 		return err;

-	err = esw_offloads_load_all_reps(esw, vf_nvports);
+	/* Only load special vports reps. VF reps will be loaded in
+	 * context of functions_changed event handler through real
+	 * or emulated event.
+	 */
+	err = esw_offloads_load_special_vport(esw);
 	if (err)
 		goto err_reps;

 	esw_offloads_devcom_init(esw);

-	if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
-		MLX5_NB_INIT(&esw->host_info.nb, esw_host_params_event,
-			     HOST_PARAMS_CHANGE);
-		mlx5_eq_notifier_register(esw->dev, &esw->host_info.nb);
-		esw->host_info.num_vfs = vf_nvports;
-	}
+	esw_functions_changed_event_init(esw, vf_nvports);

 	mlx5_rdma_enable_roce(esw->dev);

+	/* Call esw_functions_changed event to load VF reps:
+	 * 1. HW does not support the event then emulate it
+	 * Or
+	 * 2. The event was already notified when num_vfs changed
+	 * and eswitch was in legacy mode
+	 */
+	esw_functions_changed_event(&esw->esw_funcs.nb.nb,
+				    MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED,
+				    NULL);
+
 	return 0;

 err_reps:
@ -1885,19 +1917,10 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw,

 void esw_offloads_cleanup(struct mlx5_eswitch *esw)
 {
-	u16 num_vfs;
-
-	if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
-		mlx5_eq_notifier_unregister(esw->dev, &esw->host_info.nb);
-		flush_workqueue(esw->work_queue);
-		num_vfs = esw->host_info.num_vfs;
-	} else {
-		num_vfs = esw->dev->priv.sriov.num_vfs;
-	}
-
+	esw_functions_changed_event_cleanup(esw);
 	mlx5_rdma_disable_roce(esw->dev);
 	esw_offloads_devcom_cleanup(esw);
-	esw_offloads_unload_all_reps(esw, num_vfs);
+	esw_offloads_unload_all_reps(esw, esw->esw_funcs.num_vfs);
 	esw_offloads_steering_cleanup(esw);
 }

@ -2137,7 +2160,8 @@ int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode)
 	return 0;
 }

-int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap,
+int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
+					enum devlink_eswitch_encap_mode encap,
 					struct netlink_ext_ack *extack)
 {
 	struct mlx5_core_dev *dev = devlink_priv(devlink);
@ -2186,7 +2210,8 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap,
 	return err;
 }

-int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap)
+int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
+					enum devlink_eswitch_encap_mode *encap)
 {
 	struct mlx5_core_dev *dev = devlink_priv(devlink);
 	struct mlx5_eswitch *esw = dev->priv.eswitch;
@ -2201,21 +2226,17 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap)
 }

 void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw,
-				      struct mlx5_eswitch_rep_if *__rep_if,
+				      const struct mlx5_eswitch_rep_ops *ops,
 				      u8 rep_type)
 {
-	struct mlx5_eswitch_rep_if *rep_if;
+	struct mlx5_eswitch_rep_data *rep_data;
 	struct mlx5_eswitch_rep *rep;
 	int i;

+	esw->offloads.rep_ops[rep_type] = ops;
 	mlx5_esw_for_all_reps(esw, i, rep) {
-		rep_if = &rep->rep_if[rep_type];
-		rep_if->load   = __rep_if->load;
-		rep_if->unload = __rep_if->unload;
-		rep_if->get_proto_dev = __rep_if->get_proto_dev;
-		rep_if->priv = __rep_if->priv;
-
-		atomic_set(&rep_if->state, REP_REGISTERED);
+		rep_data = &rep->rep_data[rep_type];
+		atomic_set(&rep_data->state, REP_REGISTERED);
 	}
 }
 EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps);
@ -2230,7 +2251,7 @@ void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type)
 		__unload_reps_all_vport(esw, max_vf, rep_type);

 	mlx5_esw_for_all_reps(esw, i, rep)
-		atomic_set(&rep->rep_if[rep_type].state, REP_UNREGISTERED);
+		atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED);
 }
 EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps);

@ -2239,7 +2260,7 @@ void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
 	struct mlx5_eswitch_rep *rep;

 	rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
-	return rep->rep_if[rep_type].priv;
+	return rep->rep_data[rep_type].priv;
 }

 void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
@ -2250,9 +2271,9 @@ void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,

 	rep = mlx5_eswitch_get_rep(esw, vport);

-	if (atomic_read(&rep->rep_if[rep_type].state) == REP_LOADED &&
-	    rep->rep_if[rep_type].get_proto_dev)
-		return rep->rep_if[rep_type].get_proto_dev(rep);
+	if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED &&
+	    esw->offloads.rep_ops[rep_type]->get_proto_dev)
+		return esw->offloads.rep_ops[rep_type]->get_proto_dev(rep);
 	return NULL;
 }
 EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev);
--- a/drivers/net/ethernet/mellanox/mlx5/core/events.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c
@ -108,8 +108,8 @@ static const char *eqe_type_str(u8 type)
 		return "MLX5_EVENT_TYPE_STALL_EVENT";
 	case MLX5_EVENT_TYPE_CMD:
 		return "MLX5_EVENT_TYPE_CMD";
-	case MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE:
-		return "MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE";
+	case MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED:
+		return "MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED";
 	case MLX5_EVENT_TYPE_PAGE_REQUEST:
 		return "MLX5_EVENT_TYPE_PAGE_REQUEST";
 	case MLX5_EVENT_TYPE_PAGE_FAULT:
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@ -147,6 +147,7 @@ static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns,
 {
 	int en_encap = !!(ft->flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT);
 	int en_decap = !!(ft->flags & MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
+	int term = !!(ft->flags & MLX5_FLOW_TABLE_TERMINATION);
 	u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0};
 	u32 in[MLX5_ST_SZ_DW(create_flow_table_in)]   = {0};
 	struct mlx5_core_dev *dev = ns->dev;
@ -167,6 +168,8 @@ static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns,
 		 en_decap);
 	MLX5_SET(create_flow_table_in, in, flow_table_context.reformat_en,
 		 en_encap);
+	MLX5_SET(create_flow_table_in, in, flow_table_context.termination_table,
+		 term);

 	switch (ft->op_mod) {
 	case FS_FT_OP_MOD_NORMAL:
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
@ -7,7 +7,6 @@
 #include <linux/mlx5/eq.h>
 #include <linux/mlx5/cq.h>

-#define MLX5_MAX_IRQ_NAME   (32)
 #define MLX5_EQE_SIZE       (sizeof(struct mlx5_eqe))

 struct mlx5_eq_tasklet {
@ -36,8 +35,14 @@ struct mlx5_eq {
 	struct mlx5_rsc_debug   *dbg;
 };

+struct mlx5_eq_async {
+	struct mlx5_eq          core;
+	struct notifier_block   irq_nb;
+};
+
 struct mlx5_eq_comp {
-	struct mlx5_eq          core; /* Must be first */
+	struct mlx5_eq          core;
+	struct notifier_block   irq_nb;
 	struct mlx5_eq_tasklet  tasklet_ctx;
 	struct list_head        list;
 };
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@ -169,18 +169,28 @@ static struct mlx5_profile profile[] = {

 #define FW_INIT_TIMEOUT_MILI		2000
 #define FW_INIT_WAIT_MS			2
-#define FW_PRE_INIT_TIMEOUT_MILI	10000
+#define FW_PRE_INIT_TIMEOUT_MILI	120000
+#define FW_INIT_WARN_MESSAGE_INTERVAL	20000

-static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili)
+static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili,
+			u32 warn_time_mili)
 {
+	unsigned long warn = jiffies + msecs_to_jiffies(warn_time_mili);
 	unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili);
 	int err = 0;

+	BUILD_BUG_ON(FW_PRE_INIT_TIMEOUT_MILI < FW_INIT_WARN_MESSAGE_INTERVAL);
+
 	while (fw_initializing(dev)) {
 		if (time_after(jiffies, end)) {
 			err = -EBUSY;
 			break;
 		}
+		if (warn_time_mili && time_after(jiffies, warn)) {
+			mlx5_core_warn(dev, "Waiting for FW initialization, timeout abort in %ds\n",
+				       jiffies_to_msecs(end - warn) / 1000);
+			warn = jiffies + msecs_to_jiffies(warn_time_mili);
+		}
 		msleep(FW_INIT_WAIT_MS);
 	}

@ -794,10 +804,16 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
 		goto err_devcom;
 	}

+	err = mlx5_irq_table_init(dev);
+	if (err) {
+		mlx5_core_err(dev, "failed to initialize irq table\n");
+		goto err_devcom;
+	}
+
 	err = mlx5_eq_table_init(dev);
 	if (err) {
 		mlx5_core_err(dev, "failed to initialize eq\n");
-		goto err_devcom;
+		goto err_irq_cleanup;
 	}

 	err = mlx5_events_init(dev);
@ -834,32 +850,32 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
 		goto err_rl_cleanup;
 	}

-	err = mlx5_eswitch_init(dev);
-	if (err) {
-		mlx5_core_err(dev, "Failed to init eswitch %d\n", err);
-		goto err_mpfs_cleanup;
-	}
-
 	err = mlx5_sriov_init(dev);
 	if (err) {
 		mlx5_core_err(dev, "Failed to init sriov %d\n", err);
-		goto err_eswitch_cleanup;
+		goto err_mpfs_cleanup;
+	}
+
+	err = mlx5_eswitch_init(dev);
+	if (err) {
+		mlx5_core_err(dev, "Failed to init eswitch %d\n", err);
+		goto err_sriov_cleanup;
 	}

 	err = mlx5_fpga_init(dev);
 	if (err) {
 		mlx5_core_err(dev, "Failed to init fpga device %d\n", err);
-		goto err_sriov_cleanup;
+		goto err_eswitch_cleanup;
 	}

 	dev->tracer = mlx5_fw_tracer_create(dev);

 	return 0;

-err_sriov_cleanup:
-	mlx5_sriov_cleanup(dev);
 err_eswitch_cleanup:
 	mlx5_eswitch_cleanup(dev->priv.eswitch);
+err_sriov_cleanup:
+	mlx5_sriov_cleanup(dev);
 err_mpfs_cleanup:
 	mlx5_mpfs_cleanup(dev);
 err_rl_cleanup:
@ -873,6 +889,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
 	mlx5_events_cleanup(dev);
 err_eq_cleanup:
 	mlx5_eq_table_cleanup(dev);
+err_irq_cleanup:
+	mlx5_irq_table_cleanup(dev);
 err_devcom:
 	mlx5_devcom_unregister_device(dev->priv.devcom);

@ -883,8 +901,8 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
 {
 	mlx5_fw_tracer_destroy(dev->tracer);
 	mlx5_fpga_cleanup(dev);
-	mlx5_sriov_cleanup(dev);
 	mlx5_eswitch_cleanup(dev->priv.eswitch);
+	mlx5_sriov_cleanup(dev);
 	mlx5_mpfs_cleanup(dev);
 	mlx5_cleanup_rl_table(dev);
 	mlx5_vxlan_destroy(dev->vxlan);
@ -895,6 +913,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
 	mlx5_cq_debugfs_cleanup(dev);
 	mlx5_events_cleanup(dev);
 	mlx5_eq_table_cleanup(dev);
+	mlx5_irq_table_cleanup(dev);
 	mlx5_devcom_unregister_device(dev->priv.devcom);
 }

@ -911,7 +930,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)

 	/* wait for firmware to accept initialization segments configurations
 	 */
-	err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI);
+	err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI, FW_INIT_WARN_MESSAGE_INTERVAL);
 	if (err) {
 		mlx5_core_err(dev, "Firmware over %d MS in pre-initializing state, aborting\n",
 			      FW_PRE_INIT_TIMEOUT_MILI);
@ -924,7 +943,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
 		return err;
 	}

-	err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI);
+	err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI, 0);
 	if (err) {
 		mlx5_core_err(dev, "Firmware over %d MS in initializing state, aborting\n",
 			      FW_INIT_TIMEOUT_MILI);
@ -1028,6 +1047,12 @@ static int mlx5_load(struct mlx5_core_dev *dev)
 	mlx5_events_start(dev);
 	mlx5_pagealloc_start(dev);

+	err = mlx5_irq_table_create(dev);
+	if (err) {
+		mlx5_core_err(dev, "Failed to alloc IRQs\n");
+		goto err_irq_table;
+	}
+
 	err = mlx5_eq_table_create(dev);
 	if (err) {
 		mlx5_core_err(dev, "Failed to create EQs\n");
@ -1099,6 +1124,8 @@ static int mlx5_load(struct mlx5_core_dev *dev)
 err_fw_tracer:
 	mlx5_eq_table_destroy(dev);
 err_eq_table:
+	mlx5_irq_table_destroy(dev);
+err_irq_table:
 	mlx5_pagealloc_stop(dev);
 	mlx5_events_stop(dev);
 	mlx5_put_uars_page(dev, dev->priv.uar);
@ -1115,6 +1142,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
 	mlx5_fpga_device_stop(dev);
 	mlx5_fw_tracer_cleanup(dev->tracer);
 	mlx5_eq_table_destroy(dev);
+	mlx5_irq_table_destroy(dev);
 	mlx5_pagealloc_stop(dev);
 	mlx5_events_stop(dev);
 	mlx5_put_uars_page(dev, dev->priv.uar);
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@ -153,6 +153,19 @@ int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam,
 void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev);
 void mlx5_lag_remove(struct mlx5_core_dev *dev);

+int mlx5_irq_table_init(struct mlx5_core_dev *dev);
+void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev);
+int mlx5_irq_table_create(struct mlx5_core_dev *dev);
+void mlx5_irq_table_destroy(struct mlx5_core_dev *dev);
+int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx,
+		       struct notifier_block *nb);
+int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx,
+		       struct notifier_block *nb);
+struct cpumask *
+mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx);
+struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *table);
+int mlx5_irq_get_num_comp(struct mlx5_irq_table *table);
+
 int mlx5_events_init(struct mlx5_core_dev *dev);
 void mlx5_events_cleanup(struct mlx5_core_dev *dev);
 void mlx5_events_start(struct mlx5_core_dev *dev);
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@ -0,0 +1,334 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+#ifdef CONFIG_RFS_ACCEL
+#include <linux/cpu_rmap.h>
+#endif
+
+#define MLX5_MAX_IRQ_NAME (32)
+
+struct mlx5_irq {
+	struct atomic_notifier_head nh;
+	cpumask_var_t mask;
+	char name[MLX5_MAX_IRQ_NAME];
+};
+
+struct mlx5_irq_table {
+	struct mlx5_irq *irq;
+	int nvec;
+#ifdef CONFIG_RFS_ACCEL
+	struct cpu_rmap *rmap;
+#endif
+};
+
+int mlx5_irq_table_init(struct mlx5_core_dev *dev)
+{
+	struct mlx5_irq_table *irq_table;
+
+	irq_table = kvzalloc(sizeof(*irq_table), GFP_KERNEL);
+	if (!irq_table)
+		return -ENOMEM;
+
+	dev->priv.irq_table = irq_table;
+	return 0;
+}
+
+void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev)
+{
+	kvfree(dev->priv.irq_table);
+}
+
+int mlx5_irq_get_num_comp(struct mlx5_irq_table *table)
+{
+	return table->nvec - MLX5_IRQ_VEC_COMP_BASE;
+}
+
+static struct mlx5_irq *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx)
+{
+	struct mlx5_irq_table *irq_table = dev->priv.irq_table;
+
+	return &irq_table->irq[vecidx];
+}
+
+int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx,
+		       struct notifier_block *nb)
+{
+	struct mlx5_irq *irq;
+
+	irq = &irq_table->irq[vecidx];
+	return atomic_notifier_chain_register(&irq->nh, nb);
+}
+
+int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx,
+		       struct notifier_block *nb)
+{
+	struct mlx5_irq *irq;
+
+	irq = &irq_table->irq[vecidx];
+	return atomic_notifier_chain_unregister(&irq->nh, nb);
+}
+
+static irqreturn_t mlx5_irq_int_handler(int irq, void *nh)
+{
+	atomic_notifier_call_chain(nh, 0, NULL);
+	return IRQ_HANDLED;
+}
+
+static void irq_set_name(char *name, int vecidx)
+{
+	if (vecidx == 0) {
+		snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async");
+		return;
+	}
+
+	snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d",
+		 vecidx - MLX5_IRQ_VEC_COMP_BASE);
+	return;
+}
+
+static int request_irqs(struct mlx5_core_dev *dev, int nvec)
+{
+	char name[MLX5_MAX_IRQ_NAME];
+	int err;
+	int i;
+
+	for (i = 0; i < nvec; i++) {
+		struct mlx5_irq *irq = mlx5_irq_get(dev, i);
+		int irqn = pci_irq_vector(dev->pdev, i);
+
+		irq_set_name(name, i);
+		ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
+		snprintf(irq->name, MLX5_MAX_IRQ_NAME,
+			 "%s@pci:%s", name, pci_name(dev->pdev));
+		err = request_irq(irqn, mlx5_irq_int_handler, 0, irq->name,
+				  &irq->nh);
+		if (err) {
+			mlx5_core_err(dev, "Failed to request irq\n");
+			goto err_request_irq;
+		}
+	}
+	return 0;
+
+err_request_irq:
+	for (; i >= 0; i--) {
+		struct mlx5_irq *irq = mlx5_irq_get(dev, i);
+		int irqn = pci_irq_vector(dev->pdev, i);
+
+		free_irq(irqn, &irq->nh);
+	}
+	return  err;
+}
+
+static void irq_clear_rmap(struct mlx5_core_dev *dev)
+{
+#ifdef CONFIG_RFS_ACCEL
+	struct mlx5_irq_table *irq_table = dev->priv.irq_table;
+
+	free_irq_cpu_rmap(irq_table->rmap);
+#endif
+}
+
+static int irq_set_rmap(struct mlx5_core_dev *mdev)
+{
+	int err = 0;
+#ifdef CONFIG_RFS_ACCEL
+	struct mlx5_irq_table *irq_table = mdev->priv.irq_table;
+	int num_affinity_vec;
+	int vecidx;
+
+	num_affinity_vec = mlx5_irq_get_num_comp(irq_table);
+	irq_table->rmap = alloc_irq_cpu_rmap(num_affinity_vec);
+	if (!irq_table->rmap) {
+		err = -ENOMEM;
+		mlx5_core_err(mdev, "Failed to allocate cpu_rmap. err %d", err);
+		goto err_out;
+	}
+
+	vecidx = MLX5_IRQ_VEC_COMP_BASE;
+	for (; vecidx < irq_table->nvec; vecidx++) {
+		err = irq_cpu_rmap_add(irq_table->rmap,
+				       pci_irq_vector(mdev->pdev, vecidx));
+		if (err) {
+			mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d",
+				      err);
+			goto err_irq_cpu_rmap_add;
+		}
+	}
+	return 0;
+
+err_irq_cpu_rmap_add:
+	irq_clear_rmap(mdev);
+err_out:
+#endif
+	return err;
+}
+
+/* Completion IRQ vectors */
+
+static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+	int vecidx = MLX5_IRQ_VEC_COMP_BASE + i;
+	struct mlx5_irq *irq;
+	int irqn;
+
+	irq = mlx5_irq_get(mdev, vecidx);
+	irqn = pci_irq_vector(mdev->pdev, vecidx);
+	if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) {
+		mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
+		return -ENOMEM;
+	}
+
+	cpumask_set_cpu(cpumask_local_spread(i, mdev->priv.numa_node),
+			irq->mask);
+	if (IS_ENABLED(CONFIG_SMP) &&
+	    irq_set_affinity_hint(irqn, irq->mask))
+		mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x",
+			       irqn);
+
+	return 0;
+}
+
+static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+	int vecidx = MLX5_IRQ_VEC_COMP_BASE + i;
+	struct mlx5_irq *irq;
+	int irqn;
+
+	irq = mlx5_irq_get(mdev, vecidx);
+	irqn = pci_irq_vector(mdev->pdev, vecidx);
+	irq_set_affinity_hint(irqn, NULL);
+	free_cpumask_var(irq->mask);
+}
+
+static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev)
+{
+	int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table);
+	int err;
+	int i;
+
+	for (i = 0; i < nvec; i++) {
+		err = set_comp_irq_affinity_hint(mdev, i);
+		if (err)
+			goto err_out;
+	}
+
+	return 0;
+
+err_out:
+	for (i--; i >= 0; i--)
+		clear_comp_irq_affinity_hint(mdev, i);
+
+	return err;
+}
+
+static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev)
+{
+	int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table);
+	int i;
+
+	for (i = 0; i < nvec; i++)
+		clear_comp_irq_affinity_hint(mdev, i);
+}
+
+struct cpumask *
+mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx)
+{
+	return irq_table->irq[vecidx].mask;
+}
+
+#ifdef CONFIG_RFS_ACCEL
+struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *irq_table)
+{
+	return irq_table->rmap;
+}
+#endif
+
+static void unrequest_irqs(struct mlx5_core_dev *dev)
+{
+	struct mlx5_irq_table *table = dev->priv.irq_table;
+	int i;
+
+	for (i = 0; i < table->nvec; i++)
+		free_irq(pci_irq_vector(dev->pdev, i),
+			 &mlx5_irq_get(dev, i)->nh);
+}
+
+int mlx5_irq_table_create(struct mlx5_core_dev *dev)
+{
+	struct mlx5_priv *priv = &dev->priv;
+	struct mlx5_irq_table *table = priv->irq_table;
+	int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
+		      MLX5_CAP_GEN(dev, max_num_eqs) :
+		      1 << MLX5_CAP_GEN(dev, log_max_eq);
+	int nvec;
+	int err;
+
+	nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
+	       MLX5_IRQ_VEC_COMP_BASE;
+	nvec = min_t(int, nvec, num_eqs);
+	if (nvec <= MLX5_IRQ_VEC_COMP_BASE)
+		return -ENOMEM;
+
+	table->irq = kcalloc(nvec, sizeof(*table->irq), GFP_KERNEL);
+	if (!table->irq)
+		return -ENOMEM;
+
+	nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_IRQ_VEC_COMP_BASE + 1,
+				     nvec, PCI_IRQ_MSIX);
+	if (nvec < 0) {
+		err = nvec;
+		goto err_free_irq;
+	}
+
+	table->nvec = nvec;
+
+	err = irq_set_rmap(dev);
+	if (err)
+		goto err_set_rmap;
+
+	err = request_irqs(dev, nvec);
+	if (err)
+		goto err_request_irqs;
+
+	err = set_comp_irq_affinity_hints(dev);
+	if (err) {
+		mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n");
+		goto err_set_affinity;
+	}
+
+	return 0;
+
+err_set_affinity:
+	unrequest_irqs(dev);
+err_request_irqs:
+	irq_clear_rmap(dev);
+err_set_rmap:
+	pci_free_irq_vectors(dev->pdev);
+err_free_irq:
+	kfree(table->irq);
+	return err;
+}
+
+void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
+{
+	struct mlx5_irq_table *table = dev->priv.irq_table;
+	int i;
+
+	/* free_irq requires that affinity and rmap will be cleared
+	 * before calling it. This is why there is asymmetry with set_rmap
+	 * which should be called after alloc_irq but before request_irq.
+	 */
+	irq_clear_rmap(dev);
+	clear_comp_irqs_affinity_hints(dev);
+	for (i = 0; i < table->nvec; i++)
+		free_irq(pci_irq_vector(dev->pdev, i),
+			 &mlx5_irq_get(dev, i)->nh);
+	pci_free_irq_vectors(dev->pdev);
+	kfree(table->irq);
+}
+
--- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
@ -106,10 +106,10 @@ static int mlx5_rdma_enable_roce_steering(struct mlx5_core_dev *dev)

 	return 0;

-destroy_flow_table:
-	mlx5_destroy_flow_table(ft);
 destroy_flow_group:
 	mlx5_destroy_flow_group(fg);
+destroy_flow_table:
+	mlx5_destroy_flow_table(ft);
 free:
 	kvfree(spec);
 	kvfree(flow_group_in);
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@ -208,6 +208,27 @@ void mlx5_sriov_detach(struct mlx5_core_dev *dev)
 	mlx5_device_disable_sriov(dev);
 }

+static u16 mlx5_get_max_vfs(struct mlx5_core_dev *dev)
+{
+	u32 out[MLX5_ST_SZ_DW(query_esw_functions_out)] = {};
+	u16 host_total_vfs;
+	int err;
+
+	if (mlx5_core_is_ecpf_esw_manager(dev)) {
+		err = mlx5_esw_query_functions(dev, out, sizeof(out));
+		host_total_vfs = MLX5_GET(query_esw_functions_out, out,
+					  host_params_context.host_total_vfs);
+
+		/* Old FW doesn't support getting total_vfs from esw func
+		 * but supports getting it from pci_sriov.
+		 */
+		if (!err && host_total_vfs)
+			return host_total_vfs;
+	}
+
+	return pci_sriov_get_totalvfs(dev->pdev);
+}
+
 int mlx5_sriov_init(struct mlx5_core_dev *dev)
 {
 	struct mlx5_core_sriov *sriov = &dev->priv.sriov;
@ -218,6 +239,7 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev)
 		return 0;

 	total_vfs = pci_sriov_get_totalvfs(pdev);
+	sriov->max_vfs = mlx5_get_max_vfs(dev);
 	sriov->num_vfs = pci_num_vf(pdev);
 	sriov->vfs_ctx = kcalloc(total_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL);
 	if (!sriov->vfs_ctx)
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@ -342,7 +342,7 @@ enum mlx5_event {
 	MLX5_EVENT_TYPE_PAGE_FAULT	   = 0xc,
 	MLX5_EVENT_TYPE_NIC_VPORT_CHANGE   = 0xd,

-	MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE = 0xe,
+	MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED = 0xe,

 	MLX5_EVENT_TYPE_DCT_DRAINED        = 0x1c,

--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@ -107,6 +107,7 @@ enum {
 	MLX5_REG_FPGA_CAP	 = 0x4022,
 	MLX5_REG_FPGA_CTRL	 = 0x4023,
 	MLX5_REG_FPGA_ACCESS_REG = 0x4024,
+	MLX5_REG_CORE_DUMP	 = 0x402e,
 	MLX5_REG_PCAP		 = 0x5001,
 	MLX5_REG_PMTU		 = 0x5003,
 	MLX5_REG_PTYS		 = 0x5004,
@ -469,6 +470,7 @@ struct mlx5_core_sriov {
 	struct mlx5_vf_context	*vfs_ctx;
 	int			num_vfs;
 	int			enabled_vfs;
+	u16			max_vfs;
 };

 struct mlx5_fc_stats {
@ -490,6 +492,7 @@ struct mlx5_eswitch;
 struct mlx5_lag;
 struct mlx5_devcom;
 struct mlx5_eq_table;
+struct mlx5_irq_table;

 struct mlx5_rate_limit {
 	u32			rate;
@ -519,6 +522,8 @@ struct mlx5_core_roce {
 };

 struct mlx5_priv {
+	/* IRQ table valid only for real pci devices PF or VF */
+	struct mlx5_irq_table   *irq_table;
 	struct mlx5_eq_table	*eq_table;

 	/* pages stuff */
@ -1102,13 +1107,9 @@ static inline bool mlx5_ecpf_vport_exists(struct mlx5_core_dev *dev)
 	return mlx5_core_is_pf(dev) && MLX5_CAP_ESW(dev, ecpf_vport_exists);
 }

-#define MLX5_HOST_PF_MAX_VFS	(127u)
 static inline u16 mlx5_core_max_vfs(struct mlx5_core_dev *dev)
 {
-	if (mlx5_core_is_ecpf_esw_manager(dev))
-		return MLX5_HOST_PF_MAX_VFS;
-	else
-		return pci_sriov_get_totalvfs(dev->pdev);
+	return dev->priv.sriov.max_vfs;
 }

 static inline int mlx5_get_gid_table_len(u16 param)
--- a/include/linux/mlx5/eq.h
+++ b/include/linux/mlx5/eq.h
@ -4,17 +4,7 @@
 #ifndef MLX5_CORE_EQ_H
 #define MLX5_CORE_EQ_H

-enum {
-	MLX5_EQ_PAGEREQ_IDX        = 0,
-	MLX5_EQ_CMD_IDX            = 1,
-	MLX5_EQ_ASYNC_IDX          = 2,
-	/* reserved to be used by mlx5_core ulps (mlx5e/mlx5_ib) */
-	MLX5_EQ_PFAULT_IDX         = 3,
-	MLX5_EQ_MAX_ASYNC_EQS,
-	/* completion eqs vector indices start here */
-	MLX5_EQ_VEC_COMP_BASE = MLX5_EQ_MAX_ASYNC_EQS,
-};
-
+#define MLX5_IRQ_VEC_COMP_BASE 1
 #define MLX5_NUM_CMD_EQE   (32)
 #define MLX5_NUM_ASYNC_EQE (0x1000)
 #define MLX5_NUM_SPARE_EQE (0x80)
@ -23,18 +13,19 @@ struct mlx5_eq;
 struct mlx5_core_dev;

 struct mlx5_eq_param {
-	u8             index;
+	u8             irq_index;
 	int            nent;
 	u64            mask;
-	void          *context;
-	irq_handler_t  handler;
 };

 struct mlx5_eq *
-mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name,
-		       struct mlx5_eq_param *param);
+mlx5_eq_create_generic(struct mlx5_core_dev *dev, struct mlx5_eq_param *param);
 int
 mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+		   struct notifier_block *nb);
+void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+		     struct notifier_block *nb);

 struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc);
 void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm);
--- a/include/linux/mlx5/eswitch.h
+++ b/include/linux/mlx5/eswitch.h
@ -7,6 +7,7 @@
 #define _MLX5_ESWITCH_

 #include <linux/mlx5/driver.h>
+#include <net/devlink.h>

 #define MLX5_ESWITCH_MANAGER(mdev) MLX5_CAP_GEN(mdev, eswitch_manager)

@ -29,17 +30,19 @@ enum {
 };

 struct mlx5_eswitch_rep;
-struct mlx5_eswitch_rep_if {
-	int		       (*load)(struct mlx5_core_dev *dev,
-				       struct mlx5_eswitch_rep *rep);
-	void		       (*unload)(struct mlx5_eswitch_rep *rep);
-	void		       *(*get_proto_dev)(struct mlx5_eswitch_rep *rep);
-	void			*priv;
-	atomic_t		state;
+struct mlx5_eswitch_rep_ops {
+	int (*load)(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep);
+	void (*unload)(struct mlx5_eswitch_rep *rep);
+	void *(*get_proto_dev)(struct mlx5_eswitch_rep *rep);
+};
+
+struct mlx5_eswitch_rep_data {
+	void *priv;
+	atomic_t state;
 };

 struct mlx5_eswitch_rep {
-	struct mlx5_eswitch_rep_if rep_if[NUM_REP_TYPES];
+	struct mlx5_eswitch_rep_data rep_data[NUM_REP_TYPES];
 	u16		       vport;
 	u8		       hw_id[ETH_ALEN];
 	u16		       vlan;
@ -47,7 +50,7 @@ struct mlx5_eswitch_rep {
 };

 void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw,
-				      struct mlx5_eswitch_rep_if *rep_if,
+				      const struct mlx5_eswitch_rep_ops *ops,
 				      u8 rep_type);
 void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type);
 void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
@ -60,4 +63,15 @@ u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw);
 struct mlx5_flow_handle *
 mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw,
 				    int vport, u32 sqn);
+
+#ifdef CONFIG_MLX5_ESWITCH
+enum devlink_eswitch_encap_mode
+mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev);
+#else  /* CONFIG_MLX5_ESWITCH */
+static inline enum devlink_eswitch_encap_mode
+mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev)
+{
+	return DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+}
+#endif /* CONFIG_MLX5_ESWITCH */
 #endif
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@ -47,6 +47,7 @@ enum {
 enum {
 	MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT = BIT(0),
 	MLX5_FLOW_TABLE_TUNNEL_EN_DECAP = BIT(1),
+	MLX5_FLOW_TABLE_TERMINATION = BIT(2),
 };

 #define LEFTOVERS_RULE_NUM	 2
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@ -155,7 +155,7 @@ enum {
 	MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY     = 0x725,
 	MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY       = 0x726,
 	MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS        = 0x727,
-	MLX5_CMD_OP_QUERY_HOST_PARAMS             = 0x740,
+	MLX5_CMD_OP_QUERY_ESW_FUNCTIONS           = 0x740,
 	MLX5_CMD_OP_QUERY_VPORT_STATE             = 0x750,
 	MLX5_CMD_OP_MODIFY_VPORT_STATE            = 0x751,
 	MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT       = 0x752,
@ -382,7 +382,8 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
 	u8	   reformat_and_modify_action[0x1];
 	u8         reserved_at_15[0x2];
 	u8	   table_miss_action_domain[0x1];
-	u8         reserved_at_18[0x8];
+	u8         termination_table[0x1];
+	u8         reserved_at_19[0x7];
 	u8         reserved_at_20[0x2];
 	u8         log_max_ft_size[0x6];
 	u8         log_max_modify_header_context[0x8];
@ -664,7 +665,9 @@ struct mlx5_ifc_e_switch_cap_bits {
 	u8         vport_svlan_insert[0x1];
 	u8         vport_cvlan_insert_if_not_exist[0x1];
 	u8         vport_cvlan_insert_overwrite[0x1];
-	u8         reserved_at_5[0x16];
+	u8         reserved_at_5[0x14];
+	u8         esw_functions_changed[0x1];
+	u8         reserved_at_1a[0x1];
 	u8         ecpf_vport_exists[0x1];
 	u8         counter_eswitch_affinity[0x1];
 	u8         merged_eswitch[0x1];
@ -715,7 +718,9 @@ struct mlx5_ifc_qos_cap_bits {
 };

 struct mlx5_ifc_debug_cap_bits {
-	u8         reserved_at_0[0x20];
+	u8         core_dump_general[0x1];
+	u8         core_dump_qp[0x1];
+	u8         reserved_at_2[0x1e];

 	u8         reserved_at_20[0x2];
 	u8         stall_detect[0x1];
@ -2531,6 +2536,7 @@ union mlx5_ifc_hca_cap_union_bits {
 	struct mlx5_ifc_e_switch_cap_bits e_switch_cap;
 	struct mlx5_ifc_vector_calc_cap_bits vector_calc_cap;
 	struct mlx5_ifc_qos_cap_bits qos_cap;
+	struct mlx5_ifc_debug_cap_bits debug_cap;
 	struct mlx5_ifc_fpga_cap_bits fpga_cap;
 	u8         reserved_at_0[0x8000];
 };
@ -7236,7 +7242,8 @@ struct mlx5_ifc_create_flow_table_out_bits {
 struct mlx5_ifc_flow_table_context_bits {
 	u8         reformat_en[0x1];
 	u8         decap_en[0x1];
-	u8         reserved_at_2[0x2];
+	u8         reserved_at_2[0x1];
+	u8         termination_table[0x1];
 	u8         table_miss_action[0x4];
 	u8         level[0x8];
 	u8         reserved_at_10[0x8];
@ -8546,6 +8553,18 @@ struct mlx5_ifc_qcam_reg_bits {
 	u8         reserved_at_1c0[0x80];
 };

+struct mlx5_ifc_core_dump_reg_bits {
+	u8         reserved_at_0[0x18];
+	u8         core_dump_type[0x8];
+
+	u8         reserved_at_20[0x30];
+	u8         vhca_id[0x10];
+
+	u8         reserved_at_60[0x8];
+	u8         qpn[0x18];
+	u8         reserved_at_80[0x180];
+};
+
 struct mlx5_ifc_pcap_reg_bits {
 	u8         reserved_at_0[0x8];
 	u8         local_port[0x8];
@ -9692,7 +9711,7 @@ struct mlx5_ifc_host_params_context_bits {
 	u8         reserved_at_8[0x8];
 	u8         host_num_of_vfs[0x10];

-	u8         reserved_at_20[0x10];
+	u8         host_total_vfs[0x10];
 	u8         host_pci_bus[0x10];

 	u8         reserved_at_40[0x10];
@ -9704,7 +9723,7 @@ struct mlx5_ifc_host_params_context_bits {
 	u8         reserved_at_80[0x180];
 };

-struct mlx5_ifc_query_host_params_in_bits {
+struct mlx5_ifc_query_esw_functions_in_bits {
 	u8         opcode[0x10];
 	u8         reserved_at_10[0x10];

@ -9714,7 +9733,7 @@ struct mlx5_ifc_query_host_params_in_bits {
 	u8         reserved_at_40[0x40];
 };

-struct mlx5_ifc_query_host_params_out_bits {
+struct mlx5_ifc_query_esw_functions_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];

--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@ -530,8 +530,10 @@ struct devlink_ops {
 	int (*eswitch_inline_mode_get)(struct devlink *devlink, u8 *p_inline_mode);
 	int (*eswitch_inline_mode_set)(struct devlink *devlink, u8 inline_mode,
 				       struct netlink_ext_ack *extack);
-	int (*eswitch_encap_mode_get)(struct devlink *devlink, u8 *p_encap_mode);
-	int (*eswitch_encap_mode_set)(struct devlink *devlink, u8 encap_mode,
+	int (*eswitch_encap_mode_get)(struct devlink *devlink,
+				      enum devlink_eswitch_encap_mode *p_encap_mode);
+	int (*eswitch_encap_mode_set)(struct devlink *devlink,
+				      enum devlink_eswitch_encap_mode encap_mode,
 				      struct netlink_ext_ack *extack);
 	int (*info_get)(struct devlink *devlink, struct devlink_info_req *req,
 			struct netlink_ext_ack *extack);
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@ -1552,7 +1552,8 @@ static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
 				   u32 seq, int flags)
 {
 	const struct devlink_ops *ops = devlink->ops;
-	u8 inline_mode, encap_mode;
+	enum devlink_eswitch_encap_mode encap_mode;
+	u8 inline_mode;
 	void *hdr;
 	int err = 0;
 	u16 mode;
@ -1628,7 +1629,8 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb,
 {
 	struct devlink *devlink = info->user_ptr[0];
 	const struct devlink_ops *ops = devlink->ops;
-	u8 inline_mode, encap_mode;
+	enum devlink_eswitch_encap_mode encap_mode;
+	u8 inline_mode;
 	int err = 0;
 	u16 mode;