x86, AMD IOMMU: add comments to core code

This patch adds comments about how the AMD IOMMU core code works for the DMA
remapping functionality.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Cc: iommu@lists.linux-foundation.org
Cc: bhavna.sarathy@amd.com
Cc: robert.richter@amd.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Joerg Roedel 2008-07-11 17:14:22 +02:00 committed by Ingo Molnar
parent b65233a9c1
commit 431b2a2015

View File

@ -34,6 +34,9 @@
static DEFINE_RWLOCK(amd_iommu_devtable_lock);
/*
* general struct to manage commands send to an IOMMU
*/
struct command {
u32 data[4];
};
@ -41,11 +44,22 @@ struct command {
static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
struct unity_map_entry *e);
/* returns !0 if the IOMMU is caching non-present entries in its TLB */
static int iommu_has_npcache(struct amd_iommu *iommu)
{
return iommu->cap & IOMMU_CAP_NPCACHE;
}
/****************************************************************************
*
* IOMMU command queuing functions
*
****************************************************************************/
/*
* Writes the command to the IOMMUs command buffer and informs the
* hardware about the new command. Must be called with iommu->lock held.
*/
static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
{
u32 tail, head;
@ -63,6 +77,10 @@ static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
return 0;
}
/*
* General queuing function for commands. Takes iommu->lock and calls
* __iommu_queue_command().
*/
static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
{
unsigned long flags;
@ -75,6 +93,13 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
return ret;
}
/*
* This function is called whenever we need to ensure that the IOMMU has
* completed execution of all commands we sent. It sends a
* COMPLETION_WAIT command and waits for it to finish. The IOMMU informs
* us about that by writing a value to a physical address we pass with
* the command.
*/
static int iommu_completion_wait(struct amd_iommu *iommu)
{
int ret;
@ -101,6 +126,9 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
return 0;
}
/*
* Command send function for invalidating a device table entry
*/
static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
{
struct command cmd;
@ -116,6 +144,9 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
return iommu_queue_command(iommu, &cmd);
}
/*
* Generic command send function for invalidaing TLB entries
*/
static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
u64 address, u16 domid, int pde, int s)
{
@ -127,9 +158,9 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
cmd.data[1] |= domid;
cmd.data[2] = LOW_U32(address);
cmd.data[3] = HIGH_U32(address);
if (s)
if (s) /* size bit - we flush more than one 4kb page */
cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
if (pde)
if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
iommu->need_sync = 1;
@ -137,6 +168,11 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
return iommu_queue_command(iommu, &cmd);
}
/*
* TLB invalidation function which is called from the mapping functions.
* It invalidates a single PTE if the range to flush is within a single
* page. Otherwise it flushes the whole TLB of the IOMMU.
*/
static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
u64 address, size_t size)
{
@ -159,6 +195,20 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
return 0;
}
/****************************************************************************
*
* The functions below are used the create the page table mappings for
* unity mapped regions.
*
****************************************************************************/
/*
* Generic mapping functions. It maps a physical address into a DMA
* address space. It allocates the page table pages if necessary.
* In the future it can be extended to a generic mapping function
* supporting all features of AMD IOMMU page tables like level skipping
* and full 64 bit address spaces.
*/
static int iommu_map(struct protection_domain *dom,
unsigned long bus_addr,
unsigned long phys_addr,
@ -209,6 +259,10 @@ static int iommu_map(struct protection_domain *dom,
return 0;
}
/*
* This function checks if a specific unity mapping entry is needed for
* this specific IOMMU.
*/
static int iommu_for_unity_map(struct amd_iommu *iommu,
struct unity_map_entry *entry)
{
@ -223,6 +277,12 @@ static int iommu_for_unity_map(struct amd_iommu *iommu,
return 0;
}
/*
* Init the unity mappings for a specific IOMMU in the system
*
* Basically iterates over all unity mapping entries and applies them to
* the default domain DMA of that IOMMU if necessary.
*/
static int iommu_init_unity_mappings(struct amd_iommu *iommu)
{
struct unity_map_entry *entry;
@ -239,6 +299,10 @@ static int iommu_init_unity_mappings(struct amd_iommu *iommu)
return 0;
}
/*
* This function actually applies the mapping to the page table of the
* dma_ops domain.
*/
static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
struct unity_map_entry *e)
{
@ -261,6 +325,9 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
return 0;
}
/*
* Inits the unity mappings required for a specific device
*/
static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
u16 devid)
{
@ -278,12 +345,26 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
return 0;
}
/****************************************************************************
*
* The next functions belong to the address allocator for the dma_ops
* interface functions. They work like the allocators in the other IOMMU
* drivers. Its basically a bitmap which marks the allocated pages in
* the aperture. Maybe it could be enhanced in the future to a more
* efficient allocator.
*
****************************************************************************/
static unsigned long dma_mask_to_pages(unsigned long mask)
{
return (mask >> PAGE_SHIFT) +
(PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT);
}
/*
* The address allocator core function.
*
* called with domain->lock held
*/
static unsigned long dma_ops_alloc_addresses(struct device *dev,
struct dma_ops_domain *dom,
unsigned int pages)
@ -317,6 +398,11 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev,
return address;
}
/*
* The address free function.
*
* called with domain->lock held
*/
static void dma_ops_free_addresses(struct dma_ops_domain *dom,
unsigned long address,
unsigned int pages)
@ -325,6 +411,16 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
iommu_area_free(dom->bitmap, address, pages);
}
/****************************************************************************
*
* The next functions belong to the domain allocation. A domain is
* allocated for every IOMMU as the default domain. If device isolation
* is enabled, every device get its own domain. The most important thing
* about domains is the page table mapping the DMA address space they
* contain.
*
****************************************************************************/
static u16 domain_id_alloc(void)
{
unsigned long flags;
@ -342,6 +438,10 @@ static u16 domain_id_alloc(void)
return id;
}
/*
* Used to reserve address ranges in the aperture (e.g. for exclusion
* ranges.
*/
static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
unsigned long start_page,
unsigned int pages)
@ -382,6 +482,10 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
free_page((unsigned long)p1);
}
/*
* Free a domain, only used if something went wrong in the
* allocation path and we need to free an already allocated page table
*/
static void dma_ops_domain_free(struct dma_ops_domain *dom)
{
if (!dom)
@ -396,6 +500,11 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
kfree(dom);
}
/*
* Allocates a new protection domain usable for the dma_ops functions.
* It also intializes the page table and the address allocator data
* structures required for the dma_ops interface
*/
static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
unsigned order)
{
@ -436,6 +545,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
dma_dom->bitmap[0] = 1;
dma_dom->next_bit = 0;
/* Intialize the exclusion range if necessary */
if (iommu->exclusion_start &&
iommu->exclusion_start < dma_dom->aperture_size) {
unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
@ -444,6 +554,11 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
dma_ops_reserve_addresses(dma_dom, startpage, pages);
}
/*
* At the last step, build the page tables so we don't need to
* allocate page table pages in the dma_ops mapping/unmapping
* path.
*/
num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512);
dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *),
GFP_KERNEL);
@ -472,6 +587,10 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
return NULL;
}
/*
* Find out the protection domain structure for a given PCI device. This
* will give us the pointer to the page table root for example.
*/
static struct protection_domain *domain_for_device(u16 devid)
{
struct protection_domain *dom;
@ -484,6 +603,10 @@ static struct protection_domain *domain_for_device(u16 devid)
return dom;
}
/*
* If a device is not yet associated with a domain, this function does
* assigns it visible for the hardware
*/
static void set_device_domain(struct amd_iommu *iommu,
struct protection_domain *domain,
u16 devid)
@ -508,6 +631,19 @@ static void set_device_domain(struct amd_iommu *iommu,
iommu->need_sync = 1;
}
/*****************************************************************************
*
* The next functions belong to the dma_ops mapping/unmapping code.
*
*****************************************************************************/
/*
* In the dma_ops path we only have the struct device. This function
* finds the corresponding IOMMU, the protection domain and the
* requestor id for a given device.
* If the device is not yet associated with a domain this is also done
* in this function.
*/
static int get_device_resources(struct device *dev,
struct amd_iommu **iommu,
struct protection_domain **domain,
@ -522,6 +658,7 @@ static int get_device_resources(struct device *dev,
pcidev = to_pci_dev(dev);
_bdf = (pcidev->bus->number << 8) | pcidev->devfn;
/* device not translated by any IOMMU in the system? */
if (_bdf >= amd_iommu_last_bdf) {
*iommu = NULL;
*domain = NULL;
@ -547,6 +684,10 @@ static int get_device_resources(struct device *dev,
return 1;
}
/*
* This is the generic map function. It maps one 4kb page at paddr to
* the given address in the DMA address space for the domain.
*/
static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
struct dma_ops_domain *dom,
unsigned long address,
@ -578,6 +719,9 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
return (dma_addr_t)address;
}
/*
* The generic unmapping function for on page in the DMA address space.
*/
static void dma_ops_domain_unmap(struct amd_iommu *iommu,
struct dma_ops_domain *dom,
unsigned long address)
@ -597,6 +741,12 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,
*pte = 0ULL;
}
/*
* This function contains common code for mapping of a physically
* contiguous memory region into DMA address space. It is uses by all
* mapping functions provided by this IOMMU driver.
* Must be called with the domain lock held.
*/
static dma_addr_t __map_single(struct device *dev,
struct amd_iommu *iommu,
struct dma_ops_domain *dma_dom,
@ -628,6 +778,10 @@ static dma_addr_t __map_single(struct device *dev,
return address;
}
/*
* Does the reverse of the __map_single function. Must be called with
* the domain lock held too
*/
static void __unmap_single(struct amd_iommu *iommu,
struct dma_ops_domain *dma_dom,
dma_addr_t dma_addr,
@ -652,6 +806,9 @@ static void __unmap_single(struct amd_iommu *iommu,
dma_ops_free_addresses(dma_dom, dma_addr, pages);
}
/*
* The exported map_single function for dma_ops.
*/
static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
size_t size, int dir)
{
@ -664,6 +821,7 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
get_device_resources(dev, &iommu, &domain, &devid);
if (iommu == NULL || domain == NULL)
/* device not handled by any AMD IOMMU */
return (dma_addr_t)paddr;
spin_lock_irqsave(&domain->lock, flags);
@ -683,6 +841,9 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
return addr;
}
/*
* The exported unmap_single function for dma_ops.
*/
static void unmap_single(struct device *dev, dma_addr_t dma_addr,
size_t size, int dir)
{
@ -692,6 +853,7 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
u16 devid;
if (!get_device_resources(dev, &iommu, &domain, &devid))
/* device not handled by any AMD IOMMU */
return;
spin_lock_irqsave(&domain->lock, flags);
@ -706,6 +868,10 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
spin_unlock_irqrestore(&domain->lock, flags);
}
/*
* This is a special map_sg function which is used if we should map a
* device which is not handled by an AMD IOMMU in the system.
*/
static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
int nelems, int dir)
{
@ -720,6 +886,10 @@ static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
return nelems;
}
/*
* The exported map_sg function for dma_ops (handles scatter-gather
* lists).
*/
static int map_sg(struct device *dev, struct scatterlist *sglist,
int nelems, int dir)
{
@ -775,6 +945,10 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
goto out;
}
/*
* The exported map_sg function for dma_ops (handles scatter-gather
* lists).
*/
static void unmap_sg(struct device *dev, struct scatterlist *sglist,
int nelems, int dir)
{
@ -804,6 +978,9 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
spin_unlock_irqrestore(&domain->lock, flags);
}
/*
* The exported alloc_coherent function for dma_ops.
*/
static void *alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_addr, gfp_t flag)
{
@ -851,6 +1028,11 @@ static void *alloc_coherent(struct device *dev, size_t size,
return virt_addr;
}
/*
* The exported free_coherent function for dma_ops.
* FIXME: fix the generic x86 DMA layer so that it actually calls that
* function.
*/
static void free_coherent(struct device *dev, size_t size,
void *virt_addr, dma_addr_t dma_addr)
{
@ -879,6 +1061,8 @@ static void free_coherent(struct device *dev, size_t size,
}
/*
* The function for pre-allocating protection domains.
*
* If the driver core informs the DMA layer if a driver grabs a device
* we don't need to preallocate the protection domains anymore.
* For now we have to.
@ -921,12 +1105,20 @@ static struct dma_mapping_ops amd_iommu_dma_ops = {
.unmap_sg = unmap_sg,
};
/*
* The function which clues the AMD IOMMU driver into dma_ops.
*/
int __init amd_iommu_init_dma_ops(void)
{
struct amd_iommu *iommu;
int order = amd_iommu_aperture_order;
int ret;
/*
* first allocate a default protection domain for every IOMMU we
* found in the system. Devices not assigned to any other
* protection domain will be assigned to the default one.
*/
list_for_each_entry(iommu, &amd_iommu_list, list) {
iommu->default_dom = dma_ops_domain_alloc(iommu, order);
if (iommu->default_dom == NULL)
@ -936,6 +1128,10 @@ int __init amd_iommu_init_dma_ops(void)
goto free_domains;
}
/*
* If device isolation is enabled, pre-allocate the protection
* domains for each device.
*/
if (amd_iommu_isolate)
prealloc_protection_domains();
@ -947,6 +1143,7 @@ int __init amd_iommu_init_dma_ops(void)
gart_iommu_aperture = 0;
#endif
/* Make the driver finally visible to the drivers */
dma_ops = &amd_iommu_dma_ops;
return 0;