linux_dsm_epyc7002/kernel/dma/mapping.c
Linus Torvalds 45824fc0da powerpc updates for 5.4
- Initial support for running on a system with an Ultravisor, which is software
    that runs below the hypervisor and protects guests against some attacks by
    the hypervisor.
 
  - Support for building the kernel to run as a "Secure Virtual Machine", ie. as
    a guest capable of running on a system with an Ultravisor.
 
  - Some changes to our DMA code on bare metal, to allow devices with medium
    sized DMA masks (> 32 && < 59 bits) to use more than 2GB of DMA space.
 
  - Support for firmware assisted crash dumps on bare metal (powernv).
 
  - Two series fixing bugs in and refactoring our PCI EEH code.
 
  - A large series refactoring our exception entry code to use gas macros, both
    to make it more readable and also enable some future optimisations.
 
 As well as many cleanups and other minor features & fixups.
 
 Thanks to:
   Adam Zerella, Alexey Kardashevskiy, Alistair Popple, Andrew Donnellan, Aneesh
   Kumar K.V, Anju T Sudhakar, Anshuman Khandual, Balbir Singh, Benjamin
   Herrenschmidt, Cédric Le Goater, Christophe JAILLET, Christophe Leroy,
   Christopher M. Riedl, Christoph Hellwig, Claudio Carvalho, Daniel Axtens,
   David Gibson, David Hildenbrand, Desnes A. Nunes do Rosario, Ganesh Goudar,
   Gautham R. Shenoy, Greg Kurz, Guerney Hunt, Gustavo Romero, Halil Pasic, Hari
   Bathini, Joakim Tjernlund, Jonathan Neuschafer, Jordan Niethe, Leonardo Bras,
   Lianbo Jiang, Madhavan Srinivasan, Mahesh Salgaonkar, Mahesh Salgaonkar,
   Masahiro Yamada, Maxiwell S. Garcia, Michael Anderson, Nathan Chancellor,
   Nathan Lynch, Naveen N. Rao, Nicholas Piggin, Oliver O'Halloran, Qian Cai, Ram
   Pai, Ravi Bangoria, Reza Arbab, Ryan Grimm, Sam Bobroff, Santosh Sivaraj,
   Segher Boessenkool, Sukadev Bhattiprolu, Thiago Bauermann, Thiago Jung
   Bauermann, Thomas Gleixner, Tom Lendacky, Vasant Hegde.
 -----BEGIN PGP SIGNATURE-----
 
 iQJHBAABCAAxFiEEJFGtCPCthwEv2Y/bUevqPMjhpYAFAl2EtEcTHG1wZUBlbGxl
 cm1hbi5pZC5hdQAKCRBR6+o8yOGlgPfsD/9uXyBXn3anI/H08+mk74k5gCsmMQpn
 D442CD/ByogZcccp23yBTlhawtCE03hcHnCLygn0Xgd8a4YvHts/RGHUe3fPHqlG
 bEyZ7jsLVz5ebNZQP7r4eGs2pSzCajwJy2N9HJ/C1ojf15rrfRxoVJtnyhE2wXpm
 DL+6o2K+nUCB3gTQ1Inr3DnWzoGOOUfNTOea2u+J+yfHwGRqOBYpevwqiwy5eelK
 aRjUJCqMTvrzra49MeFwjo0Nt3/Y8UNcwA+JlGdeR8bRuWhFrYmyBRiZEKPaujNO
 5EAfghBBlB0KQCqvF/tRM/c0OftHqK59AMobP9T7u9oOaBXeF/FpZX/iXjzNDPsN
 j9Oo2tKLTu/YVEXqBFuREGP+znANr1Wo4CFyOG8SbvYz0HFjR6XbtRJsS+0e8GWl
 kqX5/ZhYz3lBnKSNe9jgWOrh/J0KCSFigBTEWJT3xsn4YE8x8kK2l9KPqAIldWEP
 sKb2UjGS7v0NKq+NvShH88Q9AeQUEIjTcg/9aDDQDe6FaRQ7KiF8bUxSdwSPi+Fn
 j0lnF6i+1ATWZKuCr85veVi7C5qoe/+MqalnmP7MxULyzgXLLxUgN0SzEYO6QofK
 LQK/VaH2XVr5+M5YAb7K4/NX5gbM3s1bKrCiUy4EyHNvgG7gricYdbz6HgAjKpR7
 oP0rHfgmVYvF1g==
 =WlW+
 -----END PGP SIGNATURE-----

Merge tag 'powerpc-5.4-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull powerpc updates from Michael Ellerman:
 "This is a bit late, partly due to me travelling, and partly due to a
  power outage knocking out some of my test systems *while* I was
  travelling.

   - Initial support for running on a system with an Ultravisor, which
     is software that runs below the hypervisor and protects guests
     against some attacks by the hypervisor.

   - Support for building the kernel to run as a "Secure Virtual
     Machine", ie. as a guest capable of running on a system with an
     Ultravisor.

   - Some changes to our DMA code on bare metal, to allow devices with
     medium sized DMA masks (> 32 && < 59 bits) to use more than 2GB of
     DMA space.

   - Support for firmware assisted crash dumps on bare metal (powernv).

   - Two series fixing bugs in and refactoring our PCI EEH code.

   - A large series refactoring our exception entry code to use gas
     macros, both to make it more readable and also enable some future
     optimisations.

  As well as many cleanups and other minor features & fixups.

  Thanks to: Adam Zerella, Alexey Kardashevskiy, Alistair Popple, Andrew
  Donnellan, Aneesh Kumar K.V, Anju T Sudhakar, Anshuman Khandual,
  Balbir Singh, Benjamin Herrenschmidt, Cédric Le Goater, Christophe
  JAILLET, Christophe Leroy, Christopher M. Riedl, Christoph Hellwig,
  Claudio Carvalho, Daniel Axtens, David Gibson, David Hildenbrand,
  Desnes A. Nunes do Rosario, Ganesh Goudar, Gautham R. Shenoy, Greg
  Kurz, Guerney Hunt, Gustavo Romero, Halil Pasic, Hari Bathini, Joakim
  Tjernlund, Jonathan Neuschafer, Jordan Niethe, Leonardo Bras, Lianbo
  Jiang, Madhavan Srinivasan, Mahesh Salgaonkar, Mahesh Salgaonkar,
  Masahiro Yamada, Maxiwell S. Garcia, Michael Anderson, Nathan
  Chancellor, Nathan Lynch, Naveen N. Rao, Nicholas Piggin, Oliver
  O'Halloran, Qian Cai, Ram Pai, Ravi Bangoria, Reza Arbab, Ryan Grimm,
  Sam Bobroff, Santosh Sivaraj, Segher Boessenkool, Sukadev Bhattiprolu,
  Thiago Bauermann, Thiago Jung Bauermann, Thomas Gleixner, Tom
  Lendacky, Vasant Hegde"

* tag 'powerpc-5.4-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (264 commits)
  powerpc/mm/mce: Keep irqs disabled during lockless page table walk
  powerpc: Use ftrace_graph_ret_addr() when unwinding
  powerpc/ftrace: Enable HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
  ftrace: Look up the address of return_to_handler() using helpers
  powerpc: dump kernel log before carrying out fadump or kdump
  docs: powerpc: Add missing documentation reference
  powerpc/xmon: Fix output of XIVE IPI
  powerpc/xmon: Improve output of XIVE interrupts
  powerpc/mm/radix: remove useless kernel messages
  powerpc/fadump: support holes in kernel boot memory area
  powerpc/fadump: remove RMA_START and RMA_END macros
  powerpc/fadump: update documentation about option to release opalcore
  powerpc/fadump: consider f/w load area
  powerpc/opalcore: provide an option to invalidate /sys/firmware/opal/core file
  powerpc/opalcore: export /sys/firmware/opal/core for analysing opal crashes
  powerpc/fadump: update documentation about CONFIG_PRESERVE_FA_DUMP
  powerpc/fadump: add support to preserve crash data on FADUMP disabled kernel
  powerpc/fadump: improve how crashed kernel's memory is reserved
  powerpc/fadump: consider reserved ranges while releasing memory
  powerpc/fadump: make crash memory ranges array allocation generic
  ...
2019-09-20 11:48:06 -07:00

439 lines
12 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* arch-independent dma-mapping routines
*
* Copyright (c) 2006 SUSE Linux Products GmbH
* Copyright (c) 2006 Tejun Heo <teheo@suse.de>
*/
#include <linux/memblock.h> /* for max_pfn */
#include <linux/acpi.h>
#include <linux/dma-direct.h>
#include <linux/dma-noncoherent.h>
#include <linux/export.h>
#include <linux/gfp.h>
#include <linux/of_device.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
/*
* Managed DMA API
*/
struct dma_devres {
size_t size;
void *vaddr;
dma_addr_t dma_handle;
unsigned long attrs;
};
static void dmam_release(struct device *dev, void *res)
{
struct dma_devres *this = res;
dma_free_attrs(dev, this->size, this->vaddr, this->dma_handle,
this->attrs);
}
static int dmam_match(struct device *dev, void *res, void *match_data)
{
struct dma_devres *this = res, *match = match_data;
if (this->vaddr == match->vaddr) {
WARN_ON(this->size != match->size ||
this->dma_handle != match->dma_handle);
return 1;
}
return 0;
}
/**
* dmam_free_coherent - Managed dma_free_coherent()
* @dev: Device to free coherent memory for
* @size: Size of allocation
* @vaddr: Virtual address of the memory to free
* @dma_handle: DMA handle of the memory to free
*
* Managed dma_free_coherent().
*/
void dmam_free_coherent(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_handle)
{
struct dma_devres match_data = { size, vaddr, dma_handle };
dma_free_coherent(dev, size, vaddr, dma_handle);
WARN_ON(devres_destroy(dev, dmam_release, dmam_match, &match_data));
}
EXPORT_SYMBOL(dmam_free_coherent);
/**
* dmam_alloc_attrs - Managed dma_alloc_attrs()
* @dev: Device to allocate non_coherent memory for
* @size: Size of allocation
* @dma_handle: Out argument for allocated DMA handle
* @gfp: Allocation flags
* @attrs: Flags in the DMA_ATTR_* namespace.
*
* Managed dma_alloc_attrs(). Memory allocated using this function will be
* automatically released on driver detach.
*
* RETURNS:
* Pointer to allocated memory on success, NULL on failure.
*/
void *dmam_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t gfp, unsigned long attrs)
{
struct dma_devres *dr;
void *vaddr;
dr = devres_alloc(dmam_release, sizeof(*dr), gfp);
if (!dr)
return NULL;
vaddr = dma_alloc_attrs(dev, size, dma_handle, gfp, attrs);
if (!vaddr) {
devres_free(dr);
return NULL;
}
dr->vaddr = vaddr;
dr->dma_handle = *dma_handle;
dr->size = size;
dr->attrs = attrs;
devres_add(dev, dr);
return vaddr;
}
EXPORT_SYMBOL(dmam_alloc_attrs);
/*
* Create scatter-list for the already allocated DMA buffer.
*/
int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt,
void *cpu_addr, dma_addr_t dma_addr, size_t size,
unsigned long attrs)
{
struct page *page;
int ret;
if (!dev_is_dma_coherent(dev)) {
unsigned long pfn;
if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_COHERENT_TO_PFN))
return -ENXIO;
/* If the PFN is not valid, we do not have a struct page */
pfn = arch_dma_coherent_to_pfn(dev, cpu_addr, dma_addr);
if (!pfn_valid(pfn))
return -ENXIO;
page = pfn_to_page(pfn);
} else {
page = virt_to_page(cpu_addr);
}
ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
if (!ret)
sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
return ret;
}
/*
* The whole dma_get_sgtable() idea is fundamentally unsafe - it seems
* that the intention is to allow exporting memory allocated via the
* coherent DMA APIs through the dma_buf API, which only accepts a
* scattertable. This presents a couple of problems:
* 1. Not all memory allocated via the coherent DMA APIs is backed by
* a struct page
* 2. Passing coherent DMA memory into the streaming APIs is not allowed
* as we will try to flush the memory through a different alias to that
* actually being used (and the flushes are redundant.)
*/
int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt,
void *cpu_addr, dma_addr_t dma_addr, size_t size,
unsigned long attrs)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
if (dma_is_direct(ops))
return dma_common_get_sgtable(dev, sgt, cpu_addr, dma_addr,
size, attrs);
if (!ops->get_sgtable)
return -ENXIO;
return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size, attrs);
}
EXPORT_SYMBOL(dma_get_sgtable_attrs);
#ifdef CONFIG_MMU
/*
* Return the page attributes used for mapping dma_alloc_* memory, either in
* kernel space if remapping is needed, or to userspace through dma_mmap_*.
*/
pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs)
{
if (dev_is_dma_coherent(dev) ||
(IS_ENABLED(CONFIG_DMA_NONCOHERENT_CACHE_SYNC) &&
(attrs & DMA_ATTR_NON_CONSISTENT)))
return prot;
#ifdef CONFIG_ARCH_HAS_DMA_WRITE_COMBINE
if (attrs & DMA_ATTR_WRITE_COMBINE)
return pgprot_writecombine(prot);
#endif
return pgprot_dmacoherent(prot);
}
#endif /* CONFIG_MMU */
/*
* Create userspace mapping for the DMA-coherent memory.
*/
int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
void *cpu_addr, dma_addr_t dma_addr, size_t size,
unsigned long attrs)
{
#ifdef CONFIG_MMU
unsigned long user_count = vma_pages(vma);
unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
unsigned long off = vma->vm_pgoff;
unsigned long pfn;
int ret = -ENXIO;
vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs);
if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
return ret;
if (off >= count || user_count > count - off)
return -ENXIO;
if (!dev_is_dma_coherent(dev)) {
if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_COHERENT_TO_PFN))
return -ENXIO;
/* If the PFN is not valid, we do not have a struct page */
pfn = arch_dma_coherent_to_pfn(dev, cpu_addr, dma_addr);
if (!pfn_valid(pfn))
return -ENXIO;
} else {
pfn = page_to_pfn(virt_to_page(cpu_addr));
}
return remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff,
user_count << PAGE_SHIFT, vma->vm_page_prot);
#else
return -ENXIO;
#endif /* CONFIG_MMU */
}
/**
* dma_can_mmap - check if a given device supports dma_mmap_*
* @dev: device to check
*
* Returns %true if @dev supports dma_mmap_coherent() and dma_mmap_attrs() to
* map DMA allocations to userspace.
*/
bool dma_can_mmap(struct device *dev)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
if (dma_is_direct(ops)) {
return IS_ENABLED(CONFIG_MMU) &&
(dev_is_dma_coherent(dev) ||
IS_ENABLED(CONFIG_ARCH_HAS_DMA_COHERENT_TO_PFN));
}
return ops->mmap != NULL;
}
EXPORT_SYMBOL_GPL(dma_can_mmap);
/**
* dma_mmap_attrs - map a coherent DMA allocation into user space
* @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
* @vma: vm_area_struct describing requested user mapping
* @cpu_addr: kernel CPU-view address returned from dma_alloc_attrs
* @dma_addr: device-view address returned from dma_alloc_attrs
* @size: size of memory originally requested in dma_alloc_attrs
* @attrs: attributes of mapping properties requested in dma_alloc_attrs
*
* Map a coherent DMA buffer previously allocated by dma_alloc_attrs into user
* space. The coherent DMA buffer must not be freed by the driver until the
* user space mapping has been released.
*/
int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
void *cpu_addr, dma_addr_t dma_addr, size_t size,
unsigned long attrs)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
if (dma_is_direct(ops))
return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size,
attrs);
if (!ops->mmap)
return -ENXIO;
return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
}
EXPORT_SYMBOL(dma_mmap_attrs);
u64 dma_get_required_mask(struct device *dev)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
if (dma_is_direct(ops))
return dma_direct_get_required_mask(dev);
if (ops->get_required_mask)
return ops->get_required_mask(dev);
/*
* We require every DMA ops implementation to at least support a 32-bit
* DMA mask (and use bounce buffering if that isn't supported in
* hardware). As the direct mapping code has its own routine to
* actually report an optimal mask we default to 32-bit here as that
* is the right thing for most IOMMUs, and at least not actively
* harmful in general.
*/
return DMA_BIT_MASK(32);
}
EXPORT_SYMBOL_GPL(dma_get_required_mask);
void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t flag, unsigned long attrs)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
void *cpu_addr;
WARN_ON_ONCE(!dev->coherent_dma_mask);
if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr))
return cpu_addr;
/* let the implementation decide on the zone to allocate from: */
flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM);
if (dma_is_direct(ops))
cpu_addr = dma_direct_alloc(dev, size, dma_handle, flag, attrs);
else if (ops->alloc)
cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs);
else
return NULL;
debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr);
return cpu_addr;
}
EXPORT_SYMBOL(dma_alloc_attrs);
void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t dma_handle, unsigned long attrs)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
if (dma_release_from_dev_coherent(dev, get_order(size), cpu_addr))
return;
/*
* On non-coherent platforms which implement DMA-coherent buffers via
* non-cacheable remaps, ops->free() may call vunmap(). Thus getting
* this far in IRQ context is a) at risk of a BUG_ON() or trying to
* sleep on some machines, and b) an indication that the driver is
* probably misusing the coherent API anyway.
*/
WARN_ON(irqs_disabled());
if (!cpu_addr)
return;
debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
if (dma_is_direct(ops))
dma_direct_free(dev, size, cpu_addr, dma_handle, attrs);
else if (ops->free)
ops->free(dev, size, cpu_addr, dma_handle, attrs);
}
EXPORT_SYMBOL(dma_free_attrs);
int dma_supported(struct device *dev, u64 mask)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
if (dma_is_direct(ops))
return dma_direct_supported(dev, mask);
if (!ops->dma_supported)
return 1;
return ops->dma_supported(dev, mask);
}
EXPORT_SYMBOL(dma_supported);
#ifdef CONFIG_ARCH_HAS_DMA_SET_MASK
void arch_dma_set_mask(struct device *dev, u64 mask);
#else
#define arch_dma_set_mask(dev, mask) do { } while (0)
#endif
int dma_set_mask(struct device *dev, u64 mask)
{
/*
* Truncate the mask to the actually supported dma_addr_t width to
* avoid generating unsupportable addresses.
*/
mask = (dma_addr_t)mask;
if (!dev->dma_mask || !dma_supported(dev, mask))
return -EIO;
arch_dma_set_mask(dev, mask);
*dev->dma_mask = mask;
return 0;
}
EXPORT_SYMBOL(dma_set_mask);
#ifndef CONFIG_ARCH_HAS_DMA_SET_COHERENT_MASK
int dma_set_coherent_mask(struct device *dev, u64 mask)
{
/*
* Truncate the mask to the actually supported dma_addr_t width to
* avoid generating unsupportable addresses.
*/
mask = (dma_addr_t)mask;
if (!dma_supported(dev, mask))
return -EIO;
dev->coherent_dma_mask = mask;
return 0;
}
EXPORT_SYMBOL(dma_set_coherent_mask);
#endif
void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
enum dma_data_direction dir)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
BUG_ON(!valid_dma_direction(dir));
if (dma_is_direct(ops))
arch_dma_cache_sync(dev, vaddr, size, dir);
else if (ops->cache_sync)
ops->cache_sync(dev, vaddr, size, dir);
}
EXPORT_SYMBOL(dma_cache_sync);
size_t dma_max_mapping_size(struct device *dev)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
size_t size = SIZE_MAX;
if (dma_is_direct(ops))
size = dma_direct_max_mapping_size(dev);
else if (ops && ops->max_mapping_size)
size = ops->max_mapping_size(dev);
return size;
}
EXPORT_SYMBOL_GPL(dma_max_mapping_size);
unsigned long dma_get_merge_boundary(struct device *dev)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
if (!ops || !ops->get_merge_boundary)
return 0; /* can't merge */
return ops->get_merge_boundary(dev);
}
EXPORT_SYMBOL_GPL(dma_get_merge_boundary);