linux_dsm_epyc7002/drivers/infiniband/hw/qib/qib_user_pages.c
John Hubbard ea99697458 RDMA: Convert put_page() to put_user_page*()
For infiniband code that retains pages via get_user_pages*(), release
those pages via the new put_user_page(), or put_user_pages*(), instead of
put_page()

This is a tiny part of the second step of fixing the problem described in
[1]. The steps are:

1) Provide put_user_page*() routines, intended to be used for releasing
   pages that were pinned via get_user_pages*().

2) Convert all of the call sites for get_user_pages*(), to invoke
   put_user_page*(), instead of put_page(). This involves dozens of call
   sites, and will take some time.

3) After (2) is complete, use get_user_pages*() and put_user_page*() to
   implement tracking of these pages. This tracking will be separate from
   the existing struct page refcounting.

4) Use the tracking and identification of these pages, to implement
   special handling (especially in writeback paths) when the pages are
   backed by a filesystem. Again, [1] provides details as to why that is
   desirable.

[1] https://lwn.net/Articles/753027/ : "The Trouble with get_user_pages()"

Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Jérôme Glisse <jglisse@redhat.com>
Acked-by: Jason Gunthorpe <jgg@mellanox.com>
Tested-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
2019-05-27 20:11:11 -03:00

141 lines
4.3 KiB
C

/*
* Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/mm.h>
#include <linux/sched/signal.h>
#include <linux/device.h>
#include "qib.h"
static void __qib_release_user_pages(struct page **p, size_t num_pages,
int dirty)
{
if (dirty)
put_user_pages_dirty_lock(p, num_pages);
else
put_user_pages(p, num_pages);
}
/**
* qib_map_page - a safety wrapper around pci_map_page()
*
* A dma_addr of all 0's is interpreted by the chip as "disabled".
* Unfortunately, it can also be a valid dma_addr returned on some
* architectures.
*
* The powerpc iommu assigns dma_addrs in ascending order, so we don't
* have to bother with retries or mapping a dummy page to insure we
* don't just get the same mapping again.
*
* I'm sure we won't be so lucky with other iommu's, so FIXME.
*/
int qib_map_page(struct pci_dev *hwdev, struct page *page, dma_addr_t *daddr)
{
dma_addr_t phys;
phys = pci_map_page(hwdev, page, 0, PAGE_SIZE, PCI_DMA_FROMDEVICE);
if (pci_dma_mapping_error(hwdev, phys))
return -ENOMEM;
if (!phys) {
pci_unmap_page(hwdev, phys, PAGE_SIZE, PCI_DMA_FROMDEVICE);
phys = pci_map_page(hwdev, page, 0, PAGE_SIZE,
PCI_DMA_FROMDEVICE);
if (pci_dma_mapping_error(hwdev, phys))
return -ENOMEM;
/*
* FIXME: If we get 0 again, we should keep this page,
* map another, then free the 0 page.
*/
}
*daddr = phys;
return 0;
}
/**
* qib_get_user_pages - lock user pages into memory
* @start_page: the start page
* @num_pages: the number of pages
* @p: the output page structures
*
* This function takes a given start page (page aligned user virtual
* address) and pins it and the following specified number of pages. For
* now, num_pages is always 1, but that will probably change at some point
* (because caller is doing expected sends on a single virtually contiguous
* buffer, so we can do all pages at once).
*/
int qib_get_user_pages(unsigned long start_page, size_t num_pages,
struct page **p)
{
unsigned long locked, lock_limit;
size_t got;
int ret;
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
locked = atomic64_add_return(num_pages, &current->mm->pinned_vm);
if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
ret = -ENOMEM;
goto bail;
}
down_read(&current->mm->mmap_sem);
for (got = 0; got < num_pages; got += ret) {
ret = get_user_pages(start_page + got * PAGE_SIZE,
num_pages - got,
FOLL_LONGTERM | FOLL_WRITE | FOLL_FORCE,
p + got, NULL);
if (ret < 0) {
up_read(&current->mm->mmap_sem);
goto bail_release;
}
}
up_read(&current->mm->mmap_sem);
return 0;
bail_release:
__qib_release_user_pages(p, got, 0);
bail:
atomic64_sub(num_pages, &current->mm->pinned_vm);
return ret;
}
void qib_release_user_pages(struct page **p, size_t num_pages)
{
__qib_release_user_pages(p, num_pages, 1);
/* during close after signal, mm can be NULL */
if (current->mm)
atomic64_sub(num_pages, &current->mm->pinned_vm);
}