RDMA/hfi1: Use mmu_interval_notifier_insert for user_exp_rcv

This converts one of the two users of mmu_notifiers to use the new API.
The conversion is fairly straightforward, however the existing use of
notifiers here seems to be racey.

Link: https://lore.kernel.org/r/20191112202231.3856-7-jgg@ziepe.ca
Tested-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
This commit is contained in:
Jason Gunthorpe 2019-11-12 16:22:23 -04:00
parent f25a546e65
commit 3889551db2
4 changed files with 60 additions and 93 deletions

View File

@ -1138,7 +1138,7 @@ static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len)
HFI1_CAP_UGET_MASK(uctxt->flags, MASK) | HFI1_CAP_UGET_MASK(uctxt->flags, MASK) |
HFI1_CAP_KGET_MASK(uctxt->flags, K2U); HFI1_CAP_KGET_MASK(uctxt->flags, K2U);
/* adjust flag if this fd is not able to cache */ /* adjust flag if this fd is not able to cache */
if (!fd->handler) if (!fd->use_mn)
cinfo.runtime_flags |= HFI1_CAP_TID_UNMAP; /* no caching */ cinfo.runtime_flags |= HFI1_CAP_TID_UNMAP; /* no caching */
cinfo.num_active = hfi1_count_active_units(); cinfo.num_active = hfi1_count_active_units();

View File

@ -1444,7 +1444,7 @@ struct hfi1_filedata {
/* for cpu affinity; -1 if none */ /* for cpu affinity; -1 if none */
int rec_cpu_num; int rec_cpu_num;
u32 tid_n_pinned; u32 tid_n_pinned;
struct mmu_rb_handler *handler; bool use_mn;
struct tid_rb_node **entry_to_rb; struct tid_rb_node **entry_to_rb;
spinlock_t tid_lock; /* protect tid_[limit,used] counters */ spinlock_t tid_lock; /* protect tid_[limit,used] counters */
u32 tid_limit; u32 tid_limit;

View File

@ -59,11 +59,11 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd,
struct tid_user_buf *tbuf, struct tid_user_buf *tbuf,
u32 rcventry, struct tid_group *grp, u32 rcventry, struct tid_group *grp,
u16 pageidx, unsigned int npages); u16 pageidx, unsigned int npages);
static int tid_rb_insert(void *arg, struct mmu_rb_node *node);
static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
struct tid_rb_node *tnode); struct tid_rb_node *tnode);
static void tid_rb_remove(void *arg, struct mmu_rb_node *node); static bool tid_rb_invalidate(struct mmu_interval_notifier *mni,
static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode); const struct mmu_notifier_range *range,
unsigned long cur_seq);
static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *, static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *,
struct tid_group *grp, struct tid_group *grp,
unsigned int start, u16 count, unsigned int start, u16 count,
@ -73,10 +73,8 @@ static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
struct tid_group **grp); struct tid_group **grp);
static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node); static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node);
static struct mmu_rb_ops tid_rb_ops = { static const struct mmu_interval_notifier_ops tid_mn_ops = {
.insert = tid_rb_insert, .invalidate = tid_rb_invalidate,
.remove = tid_rb_remove,
.invalidate = tid_rb_invalidate
}; };
/* /*
@ -87,7 +85,6 @@ static struct mmu_rb_ops tid_rb_ops = {
int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd, int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
struct hfi1_ctxtdata *uctxt) struct hfi1_ctxtdata *uctxt)
{ {
struct hfi1_devdata *dd = uctxt->dd;
int ret = 0; int ret = 0;
spin_lock_init(&fd->tid_lock); spin_lock_init(&fd->tid_lock);
@ -109,20 +106,7 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
fd->entry_to_rb = NULL; fd->entry_to_rb = NULL;
return -ENOMEM; return -ENOMEM;
} }
fd->use_mn = true;
/*
* Register MMU notifier callbacks. If the registration
* fails, continue without TID caching for this context.
*/
ret = hfi1_mmu_rb_register(fd, fd->mm, &tid_rb_ops,
dd->pport->hfi1_wq,
&fd->handler);
if (ret) {
dd_dev_info(dd,
"Failed MMU notifier registration %d\n",
ret);
ret = 0;
}
} }
/* /*
@ -139,7 +123,7 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
* init. * init.
*/ */
spin_lock(&fd->tid_lock); spin_lock(&fd->tid_lock);
if (uctxt->subctxt_cnt && fd->handler) { if (uctxt->subctxt_cnt && fd->use_mn) {
u16 remainder; u16 remainder;
fd->tid_limit = uctxt->expected_count / uctxt->subctxt_cnt; fd->tid_limit = uctxt->expected_count / uctxt->subctxt_cnt;
@ -158,18 +142,10 @@ void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
{ {
struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_ctxtdata *uctxt = fd->uctxt;
/* if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list))
* The notifier would have been removed when the process'es mm unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd);
* was freed. if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list))
*/ unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd);
if (fd->handler) {
hfi1_mmu_rb_unregister(fd->handler);
} else {
if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list))
unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd);
if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list))
unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd);
}
kfree(fd->invalid_tids); kfree(fd->invalid_tids);
fd->invalid_tids = NULL; fd->invalid_tids = NULL;
@ -201,7 +177,7 @@ static void unpin_rcv_pages(struct hfi1_filedata *fd,
if (mapped) { if (mapped) {
pci_unmap_single(dd->pcidev, node->dma_addr, pci_unmap_single(dd->pcidev, node->dma_addr,
node->mmu.len, PCI_DMA_FROMDEVICE); node->npages * PAGE_SIZE, PCI_DMA_FROMDEVICE);
pages = &node->pages[idx]; pages = &node->pages[idx];
} else { } else {
pages = &tidbuf->pages[idx]; pages = &tidbuf->pages[idx];
@ -777,8 +753,7 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd,
return -EFAULT; return -EFAULT;
} }
node->mmu.addr = tbuf->vaddr + (pageidx * PAGE_SIZE); node->fdata = fd;
node->mmu.len = npages * PAGE_SIZE;
node->phys = page_to_phys(pages[0]); node->phys = page_to_phys(pages[0]);
node->npages = npages; node->npages = npages;
node->rcventry = rcventry; node->rcventry = rcventry;
@ -787,23 +762,35 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd,
node->freed = false; node->freed = false;
memcpy(node->pages, pages, sizeof(struct page *) * npages); memcpy(node->pages, pages, sizeof(struct page *) * npages);
if (!fd->handler) if (fd->use_mn) {
ret = tid_rb_insert(fd, &node->mmu); ret = mmu_interval_notifier_insert(
else &node->notifier, fd->mm,
ret = hfi1_mmu_rb_insert(fd->handler, &node->mmu); tbuf->vaddr + (pageidx * PAGE_SIZE), npages * PAGE_SIZE,
&tid_mn_ops);
if (ret) { if (ret)
hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d", goto out_unmap;
node->rcventry, node->mmu.addr, node->phys, ret); /*
pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE, * FIXME: This is in the wrong order, the notifier should be
PCI_DMA_FROMDEVICE); * established before the pages are pinned by pin_rcv_pages.
kfree(node); */
return -EFAULT; mmu_interval_read_begin(&node->notifier);
} }
fd->entry_to_rb[node->rcventry - uctxt->expected_base] = node;
hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, ilog2(npages) + 1); hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, ilog2(npages) + 1);
trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, npages, trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, npages,
node->mmu.addr, node->phys, phys); node->notifier.interval_tree.start, node->phys,
phys);
return 0; return 0;
out_unmap:
hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d",
node->rcventry, node->notifier.interval_tree.start,
node->phys, ret);
pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE,
PCI_DMA_FROMDEVICE);
kfree(node);
return -EFAULT;
} }
static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
@ -833,10 +820,9 @@ static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
if (grp) if (grp)
*grp = node->grp; *grp = node->grp;
if (!fd->handler) if (fd->use_mn)
cacheless_tid_rb_remove(fd, node); mmu_interval_notifier_remove(&node->notifier);
else cacheless_tid_rb_remove(fd, node);
hfi1_mmu_rb_remove(fd->handler, &node->mmu);
return 0; return 0;
} }
@ -847,7 +833,8 @@ static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node)
struct hfi1_devdata *dd = uctxt->dd; struct hfi1_devdata *dd = uctxt->dd;
trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry, trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry,
node->npages, node->mmu.addr, node->phys, node->npages,
node->notifier.interval_tree.start, node->phys,
node->dma_addr); node->dma_addr);
/* /*
@ -894,30 +881,29 @@ static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt,
if (!node || node->rcventry != rcventry) if (!node || node->rcventry != rcventry)
continue; continue;
if (fd->use_mn)
mmu_interval_notifier_remove(
&node->notifier);
cacheless_tid_rb_remove(fd, node); cacheless_tid_rb_remove(fd, node);
} }
} }
} }
} }
/* static bool tid_rb_invalidate(struct mmu_interval_notifier *mni,
* Always return 0 from this function. A non-zero return indicates that the const struct mmu_notifier_range *range,
* remove operation will be called and that memory should be unpinned. unsigned long cur_seq)
* However, the driver cannot unpin out from under PSM. Instead, retain the
* memory (by returning 0) and inform PSM that the memory is going away. PSM
* will call back later when it has removed the memory from its list.
*/
static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode)
{ {
struct hfi1_filedata *fdata = arg;
struct hfi1_ctxtdata *uctxt = fdata->uctxt;
struct tid_rb_node *node = struct tid_rb_node *node =
container_of(mnode, struct tid_rb_node, mmu); container_of(mni, struct tid_rb_node, notifier);
struct hfi1_filedata *fdata = node->fdata;
struct hfi1_ctxtdata *uctxt = fdata->uctxt;
if (node->freed) if (node->freed)
return 0; return true;
trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, node->mmu.addr, trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt,
node->notifier.interval_tree.start,
node->rcventry, node->npages, node->dma_addr); node->rcventry, node->npages, node->dma_addr);
node->freed = true; node->freed = true;
@ -946,18 +932,7 @@ static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode)
fdata->invalid_tid_idx++; fdata->invalid_tid_idx++;
} }
spin_unlock(&fdata->invalid_lock); spin_unlock(&fdata->invalid_lock);
return 0; return true;
}
static int tid_rb_insert(void *arg, struct mmu_rb_node *node)
{
struct hfi1_filedata *fdata = arg;
struct tid_rb_node *tnode =
container_of(node, struct tid_rb_node, mmu);
u32 base = fdata->uctxt->expected_base;
fdata->entry_to_rb[tnode->rcventry - base] = tnode;
return 0;
} }
static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
@ -968,12 +943,3 @@ static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
fdata->entry_to_rb[tnode->rcventry - base] = NULL; fdata->entry_to_rb[tnode->rcventry - base] = NULL;
clear_tid_node(fdata, tnode); clear_tid_node(fdata, tnode);
} }
static void tid_rb_remove(void *arg, struct mmu_rb_node *node)
{
struct hfi1_filedata *fdata = arg;
struct tid_rb_node *tnode =
container_of(node, struct tid_rb_node, mmu);
cacheless_tid_rb_remove(fdata, tnode);
}

View File

@ -65,7 +65,8 @@ struct tid_user_buf {
}; };
struct tid_rb_node { struct tid_rb_node {
struct mmu_rb_node mmu; struct mmu_interval_notifier notifier;
struct hfi1_filedata *fdata;
unsigned long phys; unsigned long phys;
struct tid_group *grp; struct tid_group *grp;
u32 rcventry; u32 rcventry;