mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-11-25 00:40:56 +07:00
RDMA/mlx5: Avoid double lookups on the pagefault path
Now that the locking is simplified combine pagefault_implicit_mr() with implicit_mr_get_data() so that we sweep over the idx range only once, and do the single xlt update at the end, after the child umems are setup. This avoids double iteration/xa_loads plus the sketchy failure path if the xa_load() fails. Link: https://lore.kernel.org/r/20191009160934.3143-12-jgg@ziepe.ca Reviewed-by: Artemy Kovalyov <artemyko@mellanox.com> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
This commit is contained in:
parent
3389baa831
commit
b70d785d23
@ -419,68 +419,6 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct mlx5_ib_mr *implicit_mr_get_data(struct mlx5_ib_mr *imr,
|
||||
u64 io_virt, size_t bcnt)
|
||||
{
|
||||
struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem);
|
||||
unsigned long end_idx = (io_virt + bcnt - 1) >> MLX5_IMR_MTT_SHIFT;
|
||||
unsigned long idx = io_virt >> MLX5_IMR_MTT_SHIFT;
|
||||
unsigned long inv_start_idx = end_idx + 1;
|
||||
unsigned long inv_len = 0;
|
||||
struct mlx5_ib_mr *result = NULL;
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(&imr->dev->odp_srcu);
|
||||
|
||||
for (idx = idx; idx <= end_idx; idx++) {
|
||||
struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx);
|
||||
|
||||
if (unlikely(!mtt)) {
|
||||
mtt = implicit_get_child_mr(imr, idx);
|
||||
if (IS_ERR(mtt)) {
|
||||
result = mtt;
|
||||
goto out;
|
||||
}
|
||||
inv_start_idx = min(inv_start_idx, idx);
|
||||
inv_len = idx - inv_start_idx + 1;
|
||||
}
|
||||
|
||||
/* Return first odp if region not covered by single one */
|
||||
if (likely(!result))
|
||||
result = mtt;
|
||||
}
|
||||
|
||||
/*
|
||||
* Any time the implicit_children are changed we must perform an
|
||||
* update of the xlt before exiting to ensure the HW and the
|
||||
* implicit_children remains synchronized.
|
||||
*/
|
||||
out:
|
||||
if (likely(!inv_len))
|
||||
return result;
|
||||
|
||||
/*
|
||||
* Notice this is not strictly ordered right, the KSM is updated after
|
||||
* the implicit_leaves is updated, so a parallel page fault could see
|
||||
* a MR that is not yet visible in the KSM. This is similar to a
|
||||
* parallel page fault seeing a MR that is being concurrently removed
|
||||
* from the KSM. Both of these improbable situations are resolved
|
||||
* safely by resuming the HW and then taking another page fault. The
|
||||
* next pagefault handler will see the new information.
|
||||
*/
|
||||
mutex_lock(&odp_imr->umem_mutex);
|
||||
ret = mlx5_ib_update_xlt(imr, inv_start_idx, inv_len, 0,
|
||||
MLX5_IB_UPD_XLT_INDIRECT |
|
||||
MLX5_IB_UPD_XLT_ATOMIC);
|
||||
mutex_unlock(&odp_imr->umem_mutex);
|
||||
if (ret) {
|
||||
mlx5_ib_err(to_mdev(imr->ibmr.pd->device),
|
||||
"Failed to update PAS\n");
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
|
||||
struct ib_udata *udata,
|
||||
int access_flags)
|
||||
@ -647,6 +585,84 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int pagefault_implicit_mr(struct mlx5_ib_mr *imr,
|
||||
struct ib_umem_odp *odp_imr, u64 user_va,
|
||||
size_t bcnt, u32 *bytes_mapped, u32 flags)
|
||||
{
|
||||
unsigned long end_idx = (user_va + bcnt - 1) >> MLX5_IMR_MTT_SHIFT;
|
||||
unsigned long upd_start_idx = end_idx + 1;
|
||||
unsigned long upd_len = 0;
|
||||
unsigned long npages = 0;
|
||||
int err;
|
||||
int ret;
|
||||
|
||||
if (unlikely(user_va >= mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE ||
|
||||
mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE - user_va < bcnt))
|
||||
return -EFAULT;
|
||||
|
||||
/* Fault each child mr that intersects with our interval. */
|
||||
while (bcnt) {
|
||||
unsigned long idx = user_va >> MLX5_IMR_MTT_SHIFT;
|
||||
struct ib_umem_odp *umem_odp;
|
||||
struct mlx5_ib_mr *mtt;
|
||||
u64 len;
|
||||
|
||||
mtt = xa_load(&imr->implicit_children, idx);
|
||||
if (unlikely(!mtt)) {
|
||||
mtt = implicit_get_child_mr(imr, idx);
|
||||
if (IS_ERR(mtt)) {
|
||||
ret = PTR_ERR(mtt);
|
||||
goto out;
|
||||
}
|
||||
upd_start_idx = min(upd_start_idx, idx);
|
||||
upd_len = idx - upd_start_idx + 1;
|
||||
}
|
||||
|
||||
umem_odp = to_ib_umem_odp(mtt->umem);
|
||||
len = min_t(u64, user_va + bcnt, ib_umem_end(umem_odp)) -
|
||||
user_va;
|
||||
|
||||
ret = pagefault_real_mr(mtt, umem_odp, user_va, len,
|
||||
bytes_mapped, flags);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
user_va += len;
|
||||
bcnt -= len;
|
||||
npages += ret;
|
||||
}
|
||||
|
||||
ret = npages;
|
||||
|
||||
/*
|
||||
* Any time the implicit_children are changed we must perform an
|
||||
* update of the xlt before exiting to ensure the HW and the
|
||||
* implicit_children remains synchronized.
|
||||
*/
|
||||
out:
|
||||
if (likely(!upd_len))
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Notice this is not strictly ordered right, the KSM is updated after
|
||||
* the implicit_children is updated, so a parallel page fault could
|
||||
* see a MR that is not yet visible in the KSM. This is similar to a
|
||||
* parallel page fault seeing a MR that is being concurrently removed
|
||||
* from the KSM. Both of these improbable situations are resolved
|
||||
* safely by resuming the HW and then taking another page fault. The
|
||||
* next pagefault handler will see the new information.
|
||||
*/
|
||||
mutex_lock(&odp_imr->umem_mutex);
|
||||
err = mlx5_ib_update_xlt(imr, upd_start_idx, upd_len, 0,
|
||||
MLX5_IB_UPD_XLT_INDIRECT |
|
||||
MLX5_IB_UPD_XLT_ATOMIC);
|
||||
mutex_unlock(&odp_imr->umem_mutex);
|
||||
if (err) {
|
||||
mlx5_ib_err(imr->dev, "Failed to update PAS\n");
|
||||
return err;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns:
|
||||
* -EFAULT: The io_virt->bcnt is not within the MR, it covers pages that are
|
||||
@ -660,8 +676,6 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
|
||||
u32 *bytes_mapped, u32 flags)
|
||||
{
|
||||
struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
|
||||
struct mlx5_ib_mr *mtt;
|
||||
int npages = 0;
|
||||
|
||||
if (!odp->is_implicit_odp) {
|
||||
if (unlikely(io_virt < ib_umem_start(odp) ||
|
||||
@ -670,48 +684,8 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
|
||||
return pagefault_real_mr(mr, odp, io_virt, bcnt, bytes_mapped,
|
||||
flags);
|
||||
}
|
||||
|
||||
if (unlikely(io_virt >= mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE ||
|
||||
mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE - io_virt < bcnt))
|
||||
return -EFAULT;
|
||||
|
||||
mtt = implicit_mr_get_data(mr, io_virt, bcnt);
|
||||
if (IS_ERR(mtt))
|
||||
return PTR_ERR(mtt);
|
||||
|
||||
/* Fault each child mr that intersects with our interval. */
|
||||
while (bcnt) {
|
||||
struct ib_umem_odp *umem_odp = to_ib_umem_odp(mtt->umem);
|
||||
u64 end = min_t(u64, io_virt + bcnt, ib_umem_end(umem_odp));
|
||||
u64 len = end - io_virt;
|
||||
int ret;
|
||||
|
||||
ret = pagefault_real_mr(mtt, umem_odp, io_virt, len,
|
||||
bytes_mapped, flags);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
io_virt += len;
|
||||
bcnt -= len;
|
||||
npages += ret;
|
||||
|
||||
if (unlikely(bcnt)) {
|
||||
mtt = xa_load(&mr->implicit_children,
|
||||
io_virt >> MLX5_IMR_MTT_SHIFT);
|
||||
|
||||
/*
|
||||
* implicit_mr_get_data sets up all the leaves, this
|
||||
* means they got invalidated before we got to them.
|
||||
*/
|
||||
if (!mtt) {
|
||||
mlx5_ib_dbg(
|
||||
mr->dev,
|
||||
"next implicit leaf removed at 0x%llx.\n",
|
||||
io_virt);
|
||||
return -EAGAIN;
|
||||
}
|
||||
}
|
||||
}
|
||||
return npages;
|
||||
return pagefault_implicit_mr(mr, odp, io_virt, bcnt, bytes_mapped,
|
||||
flags);
|
||||
}
|
||||
|
||||
struct pf_frame {
|
||||
|
Loading…
Reference in New Issue
Block a user