IB/mlx5: Add steering SW ICM device memory type

This patch adds support for allocating, deallocating and registering a new
device memory type, STEERING_SW_ICM.  This memory can be allocated and
used by a privileged user for direct rule insertion and management of the
device's steering tables.

The type is provided by the user via the dedicated attribute in the
alloc_dm ioctl command.

Signed-off-by: Ariel Levkovich <lariel@mellanox.com>
Reviewed-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
This commit is contained in:
Ariel Levkovich 2019-05-05 17:07:13 +03:00 committed by Jason Gunthorpe
parent 4056b12efd
commit 25c13324d0
6 changed files with 292 additions and 9 deletions

View File

@ -157,7 +157,7 @@ int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr,
return -ENOMEM;
}
int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, u64 addr, u64 length)
int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length)
{
struct mlx5_core_dev *dev = dm->dev;
u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr);
@ -186,6 +186,131 @@ int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, u64 addr, u64 length)
return err;
}
int mlx5_cmd_alloc_sw_icm(struct mlx5_dm *dm, int type, u64 length,
u16 uid, phys_addr_t *addr, u32 *obj_id)
{
struct mlx5_core_dev *dev = dm->dev;
u32 num_blocks = DIV_ROUND_UP(length, MLX5_SW_ICM_BLOCK_SIZE(dev));
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
u32 in[MLX5_ST_SZ_DW(create_sw_icm_in)] = {};
unsigned long *block_map;
u64 icm_start_addr;
u32 log_icm_size;
u32 max_blocks;
u64 block_idx;
void *sw_icm;
int ret;
MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM);
MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid);
switch (type) {
case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
icm_start_addr = MLX5_CAP64_DEV_MEM(dev,
steering_sw_icm_start_address);
log_icm_size = MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size);
block_map = dm->steering_sw_icm_alloc_blocks;
break;
case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
icm_start_addr = MLX5_CAP64_DEV_MEM(dev,
header_modify_sw_icm_start_address);
log_icm_size = MLX5_CAP_DEV_MEM(dev,
log_header_modify_sw_icm_size);
block_map = dm->header_modify_sw_icm_alloc_blocks;
break;
default:
return -EINVAL;
}
max_blocks = BIT(log_icm_size - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev));
spin_lock(&dm->lock);
block_idx = bitmap_find_next_zero_area(block_map,
max_blocks,
0,
num_blocks, 0);
if (block_idx < max_blocks)
bitmap_set(block_map,
block_idx, num_blocks);
spin_unlock(&dm->lock);
if (block_idx >= max_blocks)
return -ENOMEM;
sw_icm = MLX5_ADDR_OF(create_sw_icm_in, in, sw_icm);
icm_start_addr += block_idx << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
MLX5_SET64(sw_icm, sw_icm, sw_icm_start_addr,
icm_start_addr);
MLX5_SET(sw_icm, sw_icm, log_sw_icm_size, ilog2(length));
ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
if (ret) {
spin_lock(&dm->lock);
bitmap_clear(block_map,
block_idx, num_blocks);
spin_unlock(&dm->lock);
return ret;
}
*addr = icm_start_addr;
*obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
return 0;
}
int mlx5_cmd_dealloc_sw_icm(struct mlx5_dm *dm, int type, u64 length,
u16 uid, phys_addr_t addr, u32 obj_id)
{
struct mlx5_core_dev *dev = dm->dev;
u32 num_blocks = DIV_ROUND_UP(length, MLX5_SW_ICM_BLOCK_SIZE(dev));
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
unsigned long *block_map;
u64 start_idx;
int err;
switch (type) {
case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
start_idx =
(addr - MLX5_CAP64_DEV_MEM(
dev, steering_sw_icm_start_address)) >>
MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
block_map = dm->steering_sw_icm_alloc_blocks;
break;
case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
start_idx =
(addr -
MLX5_CAP64_DEV_MEM(
dev, header_modify_sw_icm_start_address)) >>
MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
block_map = dm->header_modify_sw_icm_alloc_blocks;
break;
default:
return -EINVAL;
}
MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM);
MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id);
MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid);
err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
if (err)
return err;
spin_lock(&dm->lock);
bitmap_clear(block_map,
start_idx, num_blocks);
spin_unlock(&dm->lock);
return 0;
}
int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out)
{
u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};

View File

@ -46,7 +46,7 @@ int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev,
void *in, int in_size);
int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr,
u64 length, u32 alignment);
int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, u64 addr, u64 length);
int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length);
void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid);
void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid);
void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid);
@ -65,4 +65,8 @@ int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id,
u16 uid);
int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
u16 opmod, u8 port);
int mlx5_cmd_alloc_sw_icm(struct mlx5_dm *dm, int type, u64 length,
u16 uid, phys_addr_t *addr, u32 *obj_id);
int mlx5_cmd_dealloc_sw_icm(struct mlx5_dm *dm, int type, u64 length,
u16 uid, phys_addr_t addr, u32 obj_id);
#endif /* MLX5_IB_CMD_H */

View File

@ -2264,6 +2264,28 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
return 0;
}
static inline int check_dm_type_support(struct mlx5_ib_dev *dev,
u32 type)
{
switch (type) {
case MLX5_IB_UAPI_DM_TYPE_MEMIC:
if (!MLX5_CAP_DEV_MEM(dev->mdev, memic))
return -EOPNOTSUPP;
break;
case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
if (!capable(CAP_SYS_RAWIO) ||
!capable(CAP_NET_RAW))
return -EPERM;
if (!(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) ||
MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, sw_owner)))
return -EOPNOTSUPP;
break;
}
return 0;
}
static int handle_alloc_dm_memic(struct ib_ucontext *ctx,
struct mlx5_ib_dm *dm,
struct ib_dm_alloc_attr *attr,
@ -2309,6 +2331,40 @@ static int handle_alloc_dm_memic(struct ib_ucontext *ctx,
return err;
}
static int handle_alloc_dm_sw_icm(struct ib_ucontext *ctx,
struct mlx5_ib_dm *dm,
struct ib_dm_alloc_attr *attr,
struct uverbs_attr_bundle *attrs,
int type)
{
struct mlx5_dm *dm_db = &to_mdev(ctx->device)->dm;
u64 act_size;
int err;
/* Allocation size must a multiple of the basic block size
* and a power of 2.
*/
act_size = roundup(attr->length, MLX5_SW_ICM_BLOCK_SIZE(dm_db->dev));
act_size = roundup_pow_of_two(act_size);
dm->size = act_size;
err = mlx5_cmd_alloc_sw_icm(dm_db, type, act_size,
to_mucontext(ctx)->devx_uid, &dm->dev_addr,
&dm->icm_dm.obj_id);
if (err)
return err;
err = uverbs_copy_to(attrs,
MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
&dm->dev_addr, sizeof(dm->dev_addr));
if (err)
mlx5_cmd_dealloc_sw_icm(dm_db, type, dm->size,
to_mucontext(ctx)->devx_uid,
dm->dev_addr, dm->icm_dm.obj_id);
return err;
}
struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
struct ib_ucontext *context,
struct ib_dm_alloc_attr *attr,
@ -2327,6 +2383,10 @@ struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
mlx5_ib_dbg(to_mdev(ibdev), "alloc_dm req: dm_type=%d user_length=0x%llx log_alignment=%d\n",
type, attr->length, attr->alignment);
err = check_dm_type_support(to_mdev(ibdev), type);
if (err)
return ERR_PTR(err);
dm = kzalloc(sizeof(*dm), GFP_KERNEL);
if (!dm)
return ERR_PTR(-ENOMEM);
@ -2339,6 +2399,10 @@ struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
attr,
attrs);
break;
case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
err = handle_alloc_dm_sw_icm(context, dm, attr, attrs, type);
break;
default:
err = -EOPNOTSUPP;
}
@ -2355,6 +2419,8 @@ struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs)
{
struct mlx5_ib_ucontext *ctx = rdma_udata_to_drv_context(
&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
struct mlx5_dm *dm_db = &to_mdev(ibdm->device)->dm;
struct mlx5_ib_dm *dm = to_mdm(ibdm);
u32 page_idx;
@ -2371,11 +2437,16 @@ int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs)
MLX5_CAP64_DEV_MEM(dm_db->dev,
memic_bar_start_addr)) >>
PAGE_SHIFT;
bitmap_clear(rdma_udata_to_drv_context(&attrs->driver_udata,
struct mlx5_ib_ucontext,
ibucontext)
->dm_pages,
page_idx, DIV_ROUND_UP(dm->size, PAGE_SIZE));
bitmap_clear(ctx->dm_pages, page_idx,
DIV_ROUND_UP(dm->size, PAGE_SIZE));
break;
case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
ret = mlx5_cmd_dealloc_sw_icm(dm_db, dm->type, dm->size,
ctx->devx_uid, dm->dev_addr,
dm->icm_dm.obj_id);
if (ret)
return ret;
break;
default:
return -EOPNOTSUPP;
@ -5902,6 +5973,8 @@ static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device,
static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
mlx5_ib_cleanup_multiport_master(dev);
if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
srcu_barrier(&dev->mr_srcu);
@ -5909,11 +5982,29 @@ static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
}
WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES));
WARN_ON(dev->dm.steering_sw_icm_alloc_blocks &&
!bitmap_empty(
dev->dm.steering_sw_icm_alloc_blocks,
BIT(MLX5_CAP_DEV_MEM(mdev, log_steering_sw_icm_size) -
MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev))));
kfree(dev->dm.steering_sw_icm_alloc_blocks);
WARN_ON(dev->dm.header_modify_sw_icm_alloc_blocks &&
!bitmap_empty(dev->dm.header_modify_sw_icm_alloc_blocks,
BIT(MLX5_CAP_DEV_MEM(
mdev, log_header_modify_sw_icm_size) -
MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev))));
kfree(dev->dm.header_modify_sw_icm_alloc_blocks);
}
static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
u64 header_modify_icm_blocks = 0;
u64 steering_icm_blocks = 0;
int err;
int i;
@ -5959,16 +6050,51 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
INIT_LIST_HEAD(&dev->qp_list);
spin_lock_init(&dev->reset_flow_resource_lock);
if (MLX5_CAP_GEN_64(mdev, general_obj_types) &
MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM) {
if (MLX5_CAP64_DEV_MEM(mdev, steering_sw_icm_start_address)) {
steering_icm_blocks =
BIT(MLX5_CAP_DEV_MEM(mdev,
log_steering_sw_icm_size) -
MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev));
dev->dm.steering_sw_icm_alloc_blocks =
kcalloc(BITS_TO_LONGS(steering_icm_blocks),
sizeof(unsigned long), GFP_KERNEL);
if (!dev->dm.steering_sw_icm_alloc_blocks)
goto err_mp;
}
if (MLX5_CAP64_DEV_MEM(mdev,
header_modify_sw_icm_start_address)) {
header_modify_icm_blocks = BIT(
MLX5_CAP_DEV_MEM(
mdev, log_header_modify_sw_icm_size) -
MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev));
dev->dm.header_modify_sw_icm_alloc_blocks =
kcalloc(BITS_TO_LONGS(header_modify_icm_blocks),
sizeof(unsigned long), GFP_KERNEL);
if (!dev->dm.header_modify_sw_icm_alloc_blocks)
goto err_dm;
}
}
spin_lock_init(&dev->dm.lock);
dev->dm.dev = mdev;
if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
err = init_srcu_struct(&dev->mr_srcu);
if (err)
goto err_mp;
goto err_dm;
}
return 0;
err_dm:
kfree(dev->dm.steering_sw_icm_alloc_blocks);
kfree(dev->dm.header_modify_sw_icm_alloc_blocks);
err_mp:
mlx5_ib_cleanup_multiport_master(dev);
@ -6151,7 +6277,9 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_xrc_ops);
}
if (MLX5_CAP_DEV_MEM(mdev, memic))
if (MLX5_CAP_DEV_MEM(mdev, memic) ||
MLX5_CAP_GEN_64(dev->mdev, general_obj_types) &
MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM)
ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_dm_ops);
if (mlx5_accel_ipsec_device_caps(dev->mdev) &

View File

@ -118,6 +118,10 @@ enum {
MLX5_MEMIC_BASE_SIZE = 1 << MLX5_MEMIC_BASE_ALIGN,
};
#define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev) \
(MLX5_CAP_DEV_MEM(dev, log_sw_icm_alloc_granularity))
#define MLX5_SW_ICM_BLOCK_SIZE(dev) (1 << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))
struct mlx5_ib_ucontext {
struct ib_ucontext ibucontext;
struct list_head db_page_list;
@ -557,6 +561,12 @@ struct mlx5_ib_dm {
phys_addr_t dev_addr;
u32 type;
size_t size;
union {
struct {
u32 obj_id;
} icm_dm;
/* other dm types specific params should be added here */
};
};
#define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)
@ -567,6 +577,11 @@ struct mlx5_ib_dm {
IB_ACCESS_REMOTE_ATOMIC |\
IB_ZERO_BASED)
#define MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS (IB_ACCESS_LOCAL_WRITE |\
IB_ACCESS_REMOTE_WRITE |\
IB_ACCESS_REMOTE_READ |\
IB_ZERO_BASED)
struct mlx5_ib_mr {
struct ib_mr ibmr;
void *descs;
@ -854,6 +869,8 @@ struct mlx5_dm {
*/
spinlock_t lock;
DECLARE_BITMAP(memic_alloc_pages, MLX5_MAX_MEMIC_PAGES);
unsigned long *steering_sw_icm_alloc_blocks;
unsigned long *header_modify_sw_icm_alloc_blocks;
};
struct mlx5_read_counters_attr {

View File

@ -1247,6 +1247,13 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
mode = MLX5_MKC_ACCESS_MODE_MEMIC;
start_addr -= pci_resource_start(dev->pdev, 0);
break;
case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
if (attr->access_flags & ~MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS)
return ERR_PTR(-EINVAL);
mode = MLX5_MKC_ACCESS_MODE_SW_ICM;
break;
default:
return ERR_PTR(-EINVAL);
}

View File

@ -59,6 +59,8 @@ struct mlx5_ib_uapi_devx_async_cmd_hdr {
enum mlx5_ib_uapi_dm_type {
MLX5_IB_UAPI_DM_TYPE_MEMIC,
MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM,
MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM,
};
#endif