media: staging: media: tegra-vde: Add IOMMU support

All Tegra's could provide memory isolation for the video decoder
hardware using IOMMU, it is also required for Tegra30+ in order
to handle sparse dmabuf's which GPU exports in a default kernel
configuration.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
This commit is contained in:
Dmitry Osipenko 2019-06-23 13:07:29 -04:00 committed by Mauro Carvalho Chehab
parent 6bc5a4a192
commit b301f8de19
6 changed files with 346 additions and 90 deletions

View File

@ -3,6 +3,7 @@ config TEGRA_VDE
tristate "NVIDIA Tegra Video Decoder Engine driver"
depends on ARCH_TEGRA || COMPILE_TEST
select DMA_SHARED_BUFFER
select IOMMU_IOVA if IOMMU_SUPPORT
select SRAM
help
Say Y here to enable support for the NVIDIA Tegra video decoder

View File

@ -1,2 +1,3 @@
# SPDX-License-Identifier: GPL-2.0
tegra-vde-y := vde.o iommu.o
obj-$(CONFIG_TEGRA_VDE) += tegra-vde.o

View File

@ -0,0 +1,159 @@
// SPDX-License-Identifier: GPL-2.0+
/*
* NVIDIA Tegra Video decoder driver
*
* Copyright (C) 2016-2019 GRATE-DRIVER project
*/
#include <linux/iommu.h>
#include <linux/iova.h>
#include <linux/kernel.h>
#include <linux/platform_device.h>
#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
#include <asm/dma-iommu.h>
#endif
#include "vde.h"
int tegra_vde_iommu_map(struct tegra_vde *vde,
struct sg_table *sgt,
struct iova **iovap,
dma_addr_t *addrp,
size_t size)
{
struct iova *iova;
unsigned long shift;
unsigned long end;
dma_addr_t addr;
end = vde->domain->geometry.aperture_end;
size = iova_align(&vde->iova, size);
shift = iova_shift(&vde->iova);
iova = alloc_iova(&vde->iova, size >> shift, end >> shift, true);
if (!iova)
return -ENOMEM;
addr = iova_dma_addr(&vde->iova, iova);
size = iommu_map_sg(vde->domain, addr, sgt->sgl, sgt->nents,
IOMMU_READ | IOMMU_WRITE);
if (!size) {
__free_iova(&vde->iova, iova);
return -ENXIO;
}
*iovap = iova;
*addrp = addr;
return 0;
}
void tegra_vde_iommu_unmap(struct tegra_vde *vde, struct iova *iova)
{
unsigned long shift = iova_shift(&vde->iova);
unsigned long size = iova_size(iova) << shift;
dma_addr_t addr = iova_dma_addr(&vde->iova, iova);
iommu_unmap(vde->domain, addr, size);
__free_iova(&vde->iova, iova);
}
int tegra_vde_iommu_init(struct tegra_vde *vde)
{
struct device *dev = vde->miscdev.parent;
struct iova *iova;
unsigned long order;
unsigned long shift;
int err;
vde->group = iommu_group_get(dev);
if (!vde->group)
return 0;
#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
if (dev->archdata.mapping) {
struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
arm_iommu_detach_device(dev);
arm_iommu_release_mapping(mapping);
}
#endif
vde->domain = iommu_domain_alloc(&platform_bus_type);
if (!vde->domain) {
err = -ENOMEM;
goto put_group;
}
err = iova_cache_get();
if (err)
goto free_domain;
order = __ffs(vde->domain->pgsize_bitmap);
init_iova_domain(&vde->iova, 1UL << order, 0);
err = iommu_attach_group(vde->domain, vde->group);
if (err)
goto put_iova;
/*
* We're using some static addresses that are not accessible by VDE
* to trap invalid memory accesses.
*/
shift = iova_shift(&vde->iova);
iova = reserve_iova(&vde->iova, 0x60000000 >> shift,
0x70000000 >> shift);
if (!iova) {
err = -ENOMEM;
goto detach_group;
}
vde->iova_resv_static_addresses = iova;
/*
* BSEV's end-address wraps around due to integer overflow during
* of hardware context preparation if IOVA is allocated at the end
* of address space and VDE can't handle that. Hence simply reserve
* the last page to avoid the problem.
*/
iova = reserve_iova(&vde->iova, 0xffffffff >> shift,
(0xffffffff >> shift) + 1);
if (!iova) {
err = -ENOMEM;
goto unreserve_iova;
}
vde->iova_resv_last_page = iova;
return 0;
unreserve_iova:
__free_iova(&vde->iova, vde->iova_resv_static_addresses);
detach_group:
iommu_detach_group(vde->domain, vde->group);
put_iova:
put_iova_domain(&vde->iova);
iova_cache_put();
free_domain:
iommu_domain_free(vde->domain);
put_group:
iommu_group_put(vde->group);
return err;
}
void tegra_vde_iommu_deinit(struct tegra_vde *vde)
{
if (vde->domain) {
__free_iova(&vde->iova, vde->iova_resv_last_page);
__free_iova(&vde->iova, vde->iova_resv_static_addresses);
iommu_detach_group(vde->domain, vde->group);
put_iova_domain(&vde->iova);
iova_cache_put();
iommu_domain_free(vde->domain);
iommu_group_put(vde->group);
vde->domain = NULL;
}
}

View File

@ -8,6 +8,8 @@
#include <linux/tracepoint.h>
#include "vde.h"
DECLARE_EVENT_CLASS(register_access,
TP_PROTO(struct tegra_vde *vde, void __iomem *base,
u32 offset, u32 value),

View File

@ -22,6 +22,10 @@
#include <soc/tegra/pmc.h>
#include "uapi.h"
#include "vde.h"
#define CREATE_TRACE_POINTS
#include "trace.h"
#define ICMDQUE_WR 0x00
#define CMDQUE_CONTROL 0x08
@ -33,6 +37,10 @@
#define BSE_DMA_BUSY BIT(23)
struct video_frame {
struct iova *y_iova;
struct iova *cb_iova;
struct iova *cr_iova;
struct iova *aux_iova;
struct dma_buf_attachment *y_dmabuf_attachment;
struct dma_buf_attachment *cb_dmabuf_attachment;
struct dma_buf_attachment *cr_dmabuf_attachment;
@ -49,63 +57,6 @@ struct video_frame {
u32 flags;
};
struct tegra_vde {
void __iomem *sxe;
void __iomem *bsev;
void __iomem *mbe;
void __iomem *ppe;
void __iomem *mce;
void __iomem *tfe;
void __iomem *ppb;
void __iomem *vdma;
void __iomem *frameid;
struct mutex lock;
struct miscdevice miscdev;
struct reset_control *rst;
struct reset_control *rst_mc;
struct gen_pool *iram_pool;
struct completion decode_completion;
struct clk *clk;
dma_addr_t iram_lists_addr;
u32 *iram;
};
static __maybe_unused char const *
tegra_vde_reg_base_name(struct tegra_vde *vde, void __iomem *base)
{
if (vde->sxe == base)
return "SXE";
if (vde->bsev == base)
return "BSEV";
if (vde->mbe == base)
return "MBE";
if (vde->ppe == base)
return "PPE";
if (vde->mce == base)
return "MCE";
if (vde->tfe == base)
return "TFE";
if (vde->ppb == base)
return "PPB";
if (vde->vdma == base)
return "VDMA";
if (vde->frameid == base)
return "FRAMEID";
return "???";
}
#define CREATE_TRACE_POINTS
#include "trace.h"
static void tegra_vde_writel(struct tegra_vde *vde,
u32 value, void __iomem *base, u32 offset)
{
@ -543,28 +494,35 @@ static void tegra_vde_decode_frame(struct tegra_vde *vde,
vde->sxe, 0x00);
}
static void tegra_vde_detach_and_put_dmabuf(struct dma_buf_attachment *a,
static void tegra_vde_detach_and_put_dmabuf(struct tegra_vde *vde,
enum dma_data_direction dma_dir,
struct dma_buf_attachment *a,
struct sg_table *sgt,
enum dma_data_direction dma_dir)
struct iova *iova)
{
struct dma_buf *dmabuf = a->dmabuf;
if (vde->domain)
tegra_vde_iommu_unmap(vde, iova);
dma_buf_unmap_attachment(a, sgt, dma_dir);
dma_buf_detach(dmabuf, a);
dma_buf_put(dmabuf);
}
static int tegra_vde_attach_dmabuf(struct device *dev,
static int tegra_vde_attach_dmabuf(struct tegra_vde *vde,
int fd,
unsigned long offset,
size_t min_size,
size_t align_size,
struct dma_buf_attachment **a,
dma_addr_t *addr,
dma_addr_t *addrp,
struct sg_table **s,
struct iova **iovap,
size_t *size,
enum dma_data_direction dma_dir)
{
struct device *dev = vde->miscdev.parent;
struct dma_buf_attachment *attachment;
struct dma_buf *dmabuf;
struct sg_table *sgt;
@ -602,13 +560,23 @@ static int tegra_vde_attach_dmabuf(struct device *dev,
goto err_detach;
}
if (sgt->nents != 1) {
dev_err(dev, "Sparse DMA region is unsupported\n");
if (!vde->domain && sgt->nents > 1) {
dev_err(dev, "Sparse DMA region is unsupported, please enable IOMMU\n");
err = -EINVAL;
goto err_unmap;
}
*addr = sg_dma_address(sgt->sgl) + offset;
if (vde->domain) {
err = tegra_vde_iommu_map(vde, sgt, iovap, addrp, dmabuf->size);
if (err) {
dev_err(dev, "IOMMU mapping failed: %d\n", err);
goto err_unmap;
}
} else {
*addrp = sg_dma_address(sgt->sgl);
}
*addrp = *addrp + offset;
*a = attachment;
*s = sgt;
@ -627,7 +595,7 @@ static int tegra_vde_attach_dmabuf(struct device *dev,
return err;
}
static int tegra_vde_attach_dmabufs_to_frame(struct device *dev,
static int tegra_vde_attach_dmabufs_to_frame(struct tegra_vde *vde,
struct video_frame *frame,
struct tegra_vde_h264_frame *src,
enum dma_data_direction dma_dir,
@ -636,29 +604,32 @@ static int tegra_vde_attach_dmabufs_to_frame(struct device *dev,
{
int err;
err = tegra_vde_attach_dmabuf(dev, src->y_fd,
err = tegra_vde_attach_dmabuf(vde, src->y_fd,
src->y_offset, lsize, SZ_256,
&frame->y_dmabuf_attachment,
&frame->y_addr,
&frame->y_sgt,
&frame->y_iova,
NULL, dma_dir);
if (err)
return err;
err = tegra_vde_attach_dmabuf(dev, src->cb_fd,
err = tegra_vde_attach_dmabuf(vde, src->cb_fd,
src->cb_offset, csize, SZ_256,
&frame->cb_dmabuf_attachment,
&frame->cb_addr,
&frame->cb_sgt,
&frame->cb_iova,
NULL, dma_dir);
if (err)
goto err_release_y;
err = tegra_vde_attach_dmabuf(dev, src->cr_fd,
err = tegra_vde_attach_dmabuf(vde, src->cr_fd,
src->cr_offset, csize, SZ_256,
&frame->cr_dmabuf_attachment,
&frame->cr_addr,
&frame->cr_sgt,
&frame->cr_iova,
NULL, dma_dir);
if (err)
goto err_release_cb;
@ -668,11 +639,12 @@ static int tegra_vde_attach_dmabufs_to_frame(struct device *dev,
return 0;
}
err = tegra_vde_attach_dmabuf(dev, src->aux_fd,
err = tegra_vde_attach_dmabuf(vde, src->aux_fd,
src->aux_offset, csize, SZ_256,
&frame->aux_dmabuf_attachment,
&frame->aux_addr,
&frame->aux_sgt,
&frame->aux_iova,
NULL, dma_dir);
if (err)
goto err_release_cr;
@ -680,34 +652,49 @@ static int tegra_vde_attach_dmabufs_to_frame(struct device *dev,
return 0;
err_release_cr:
tegra_vde_detach_and_put_dmabuf(frame->cr_dmabuf_attachment,
frame->cr_sgt, dma_dir);
tegra_vde_detach_and_put_dmabuf(vde, dma_dir,
frame->cr_dmabuf_attachment,
frame->cr_sgt,
frame->cr_iova);
err_release_cb:
tegra_vde_detach_and_put_dmabuf(frame->cb_dmabuf_attachment,
frame->cb_sgt, dma_dir);
tegra_vde_detach_and_put_dmabuf(vde, dma_dir,
frame->cb_dmabuf_attachment,
frame->cb_sgt,
frame->cb_iova);
err_release_y:
tegra_vde_detach_and_put_dmabuf(frame->y_dmabuf_attachment,
frame->y_sgt, dma_dir);
tegra_vde_detach_and_put_dmabuf(vde, dma_dir,
frame->y_dmabuf_attachment,
frame->y_sgt,
frame->y_iova);
return err;
}
static void tegra_vde_release_frame_dmabufs(struct video_frame *frame,
static void tegra_vde_release_frame_dmabufs(struct tegra_vde *vde,
struct video_frame *frame,
enum dma_data_direction dma_dir,
bool baseline_profile)
{
if (!baseline_profile)
tegra_vde_detach_and_put_dmabuf(frame->aux_dmabuf_attachment,
frame->aux_sgt, dma_dir);
tegra_vde_detach_and_put_dmabuf(vde, dma_dir,
frame->aux_dmabuf_attachment,
frame->aux_sgt,
frame->aux_iova);
tegra_vde_detach_and_put_dmabuf(frame->cr_dmabuf_attachment,
frame->cr_sgt, dma_dir);
tegra_vde_detach_and_put_dmabuf(vde, dma_dir,
frame->cr_dmabuf_attachment,
frame->cr_sgt,
frame->cr_iova);
tegra_vde_detach_and_put_dmabuf(frame->cb_dmabuf_attachment,
frame->cb_sgt, dma_dir);
tegra_vde_detach_and_put_dmabuf(vde, dma_dir,
frame->cb_dmabuf_attachment,
frame->cb_sgt,
frame->cb_iova);
tegra_vde_detach_and_put_dmabuf(frame->y_dmabuf_attachment,
frame->y_sgt, dma_dir);
tegra_vde_detach_and_put_dmabuf(vde, dma_dir,
frame->y_dmabuf_attachment,
frame->y_sgt,
frame->y_iova);
}
static int tegra_vde_validate_frame(struct device *dev,
@ -800,6 +787,7 @@ static int tegra_vde_ioctl_decode_h264(struct tegra_vde *vde,
struct video_frame *dpb_frames;
struct dma_buf_attachment *bitstream_data_dmabuf_attachment;
struct sg_table *bitstream_sgt;
struct iova *bitstream_iova;
enum dma_data_direction dma_dir;
dma_addr_t bitstream_data_addr;
dma_addr_t bsev_ptr;
@ -819,12 +807,13 @@ static int tegra_vde_ioctl_decode_h264(struct tegra_vde *vde,
if (ret)
return ret;
ret = tegra_vde_attach_dmabuf(dev, ctx.bitstream_data_fd,
ret = tegra_vde_attach_dmabuf(vde, ctx.bitstream_data_fd,
ctx.bitstream_data_offset,
SZ_16K, SZ_16K,
&bitstream_data_dmabuf_attachment,
&bitstream_data_addr,
&bitstream_sgt,
&bitstream_iova,
&bitstream_data_size,
DMA_TO_DEVICE);
if (ret)
@ -866,7 +855,7 @@ static int tegra_vde_ioctl_decode_h264(struct tegra_vde *vde,
dma_dir = (i == 0) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
ret = tegra_vde_attach_dmabufs_to_frame(dev, &dpb_frames[i],
ret = tegra_vde_attach_dmabufs_to_frame(vde, &dpb_frames[i],
&frames[i], dma_dir,
ctx.baseline_profile,
lsize, csize);
@ -954,7 +943,7 @@ static int tegra_vde_ioctl_decode_h264(struct tegra_vde *vde,
while (i--) {
dma_dir = (i == 0) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
tegra_vde_release_frame_dmabufs(&dpb_frames[i], dma_dir,
tegra_vde_release_frame_dmabufs(vde, &dpb_frames[i], dma_dir,
ctx.baseline_profile);
}
@ -965,8 +954,10 @@ static int tegra_vde_ioctl_decode_h264(struct tegra_vde *vde,
kfree(frames);
release_bitstream_dmabuf:
tegra_vde_detach_and_put_dmabuf(bitstream_data_dmabuf_attachment,
bitstream_sgt, DMA_TO_DEVICE);
tegra_vde_detach_and_put_dmabuf(vde, DMA_TO_DEVICE,
bitstream_data_dmabuf_attachment,
bitstream_sgt,
bitstream_iova);
return ret;
}
@ -1176,10 +1167,16 @@ static int tegra_vde_probe(struct platform_device *pdev)
vde->miscdev.fops = &tegra_vde_fops;
vde->miscdev.parent = dev;
err = tegra_vde_iommu_init(vde);
if (err) {
dev_err(dev, "Failed to initialize IOMMU: %d\n", err);
goto err_gen_free;
}
err = misc_register(&vde->miscdev);
if (err) {
dev_err(dev, "Failed to register misc device: %d\n", err);
goto err_gen_free;
goto err_deinit_iommu;
}
pm_runtime_enable(dev);
@ -1197,6 +1194,9 @@ static int tegra_vde_probe(struct platform_device *pdev)
err_misc_unreg:
misc_deregister(&vde->miscdev);
err_deinit_iommu:
tegra_vde_iommu_deinit(vde);
err_gen_free:
gen_pool_free(vde->iram_pool, (unsigned long)vde->iram,
gen_pool_size(vde->iram_pool));
@ -1221,6 +1221,8 @@ static int tegra_vde_remove(struct platform_device *pdev)
misc_deregister(&vde->miscdev);
tegra_vde_iommu_deinit(vde);
gen_pool_free(vde->iram_pool, (unsigned long)vde->iram,
gen_pool_size(vde->iram_pool));

View File

@ -0,0 +1,91 @@
/* SPDX-License-Identifier: GPL-2.0+ */
/*
* NVIDIA Tegra Video decoder driver
*
* Copyright (C) 2016-2019 GRATE-DRIVER project
*/
#ifndef TEGRA_VDE_H
#define TEGRA_VDE_H
#include <linux/completion.h>
#include <linux/miscdevice.h>
#include <linux/mutex.h>
#include <linux/types.h>
#include <linux/iova.h>
struct clk;
struct gen_pool;
struct iommu_group;
struct iommu_domain;
struct reset_control;
struct tegra_vde {
void __iomem *sxe;
void __iomem *bsev;
void __iomem *mbe;
void __iomem *ppe;
void __iomem *mce;
void __iomem *tfe;
void __iomem *ppb;
void __iomem *vdma;
void __iomem *frameid;
struct mutex lock;
struct miscdevice miscdev;
struct reset_control *rst;
struct reset_control *rst_mc;
struct gen_pool *iram_pool;
struct completion decode_completion;
struct clk *clk;
struct iommu_domain *domain;
struct iommu_group *group;
struct iova_domain iova;
struct iova *iova_resv_static_addresses;
struct iova *iova_resv_last_page;
dma_addr_t iram_lists_addr;
u32 *iram;
};
int tegra_vde_iommu_init(struct tegra_vde *vde);
void tegra_vde_iommu_deinit(struct tegra_vde *vde);
int tegra_vde_iommu_map(struct tegra_vde *vde,
struct sg_table *sgt,
struct iova **iovap,
dma_addr_t *addrp,
size_t size);
void tegra_vde_iommu_unmap(struct tegra_vde *vde, struct iova *iova);
static __maybe_unused char const *
tegra_vde_reg_base_name(struct tegra_vde *vde, void __iomem *base)
{
if (vde->sxe == base)
return "SXE";
if (vde->bsev == base)
return "BSEV";
if (vde->mbe == base)
return "MBE";
if (vde->ppe == base)
return "PPE";
if (vde->mce == base)
return "MCE";
if (vde->tfe == base)
return "TFE";
if (vde->ppb == base)
return "PPB";
if (vde->vdma == base)
return "VDMA";
if (vde->frameid == base)
return "FRAMEID";
return "???";
}
#endif /* TEGRA_VDE_H */