linux_dsm_epyc7002/drivers/gpu/host1x/job.c

720 lines
16 KiB
C
Raw Normal View History

/*
* Tegra host1x Job
*
* Copyright (c) 2010-2015, NVIDIA Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/dma-mapping.h>
#include <linux/err.h>
#include <linux/host1x.h>
#include <linux/kref.h>
#include <linux/module.h>
#include <linux/scatterlist.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <trace/events/host1x.h>
#include "channel.h"
#include "dev.h"
#include "job.h"
#include "syncpt.h"
#define HOST1X_WAIT_SYNCPT_OFFSET 0x8
struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
u32 num_cmdbufs, u32 num_relocs,
u32 num_waitchks)
{
struct host1x_job *job = NULL;
unsigned int num_unpins = num_cmdbufs + num_relocs;
u64 total;
void *mem;
/* Check that we're not going to overflow */
total = sizeof(struct host1x_job) +
(u64)num_relocs * sizeof(struct host1x_reloc) +
(u64)num_unpins * sizeof(struct host1x_job_unpin_data) +
(u64)num_waitchks * sizeof(struct host1x_waitchk) +
(u64)num_cmdbufs * sizeof(struct host1x_job_gather) +
(u64)num_unpins * sizeof(dma_addr_t) +
(u64)num_unpins * sizeof(u32 *);
if (total > ULONG_MAX)
return NULL;
mem = job = kzalloc(total, GFP_KERNEL);
if (!job)
return NULL;
kref_init(&job->ref);
job->channel = ch;
/* Redistribute memory to the structs */
mem += sizeof(struct host1x_job);
job->relocarray = num_relocs ? mem : NULL;
mem += num_relocs * sizeof(struct host1x_reloc);
job->unpins = num_unpins ? mem : NULL;
mem += num_unpins * sizeof(struct host1x_job_unpin_data);
job->waitchk = num_waitchks ? mem : NULL;
mem += num_waitchks * sizeof(struct host1x_waitchk);
job->gathers = num_cmdbufs ? mem : NULL;
mem += num_cmdbufs * sizeof(struct host1x_job_gather);
job->addr_phys = num_unpins ? mem : NULL;
job->reloc_addr_phys = job->addr_phys;
job->gather_addr_phys = &job->addr_phys[num_relocs];
return job;
}
EXPORT_SYMBOL(host1x_job_alloc);
struct host1x_job *host1x_job_get(struct host1x_job *job)
{
kref_get(&job->ref);
return job;
}
EXPORT_SYMBOL(host1x_job_get);
static void job_free(struct kref *ref)
{
struct host1x_job *job = container_of(ref, struct host1x_job, ref);
kfree(job);
}
void host1x_job_put(struct host1x_job *job)
{
kref_put(&job->ref, job_free);
}
EXPORT_SYMBOL(host1x_job_put);
void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo,
u32 words, u32 offset)
{
struct host1x_job_gather *cur_gather = &job->gathers[job->num_gathers];
cur_gather->words = words;
cur_gather->bo = bo;
cur_gather->offset = offset;
job->num_gathers++;
}
EXPORT_SYMBOL(host1x_job_add_gather);
/*
* NULL an already satisfied WAIT_SYNCPT host method, by patching its
* args in the command stream. The method data is changed to reference
* a reserved (never given out or incr) HOST1X_SYNCPT_RESERVED syncpt
* with a matching threshold value of 0, so is guaranteed to be popped
* by the host HW.
*/
static void host1x_syncpt_patch_offset(struct host1x_syncpt *sp,
struct host1x_bo *h, u32 offset)
{
void *patch_addr = NULL;
/* patch the wait */
patch_addr = host1x_bo_kmap(h, offset >> PAGE_SHIFT);
if (patch_addr) {
host1x_syncpt_patch_wait(sp,
patch_addr + (offset & ~PAGE_MASK));
host1x_bo_kunmap(h, offset >> PAGE_SHIFT, patch_addr);
} else
pr_err("Could not map cmdbuf for wait check\n");
}
/*
* Check driver supplied waitchk structs for syncpt thresholds
* that have already been satisfied and NULL the comparison (to
* avoid a wrap condition in the HW).
*/
static int do_waitchks(struct host1x_job *job, struct host1x *host,
struct host1x_job_gather *g)
{
struct host1x_bo *patch = g->bo;
int i;
/* compare syncpt vs wait threshold */
for (i = 0; i < job->num_waitchk; i++) {
struct host1x_waitchk *wait = &job->waitchk[i];
struct host1x_syncpt *sp =
host1x_syncpt_get(host, wait->syncpt_id);
/* validate syncpt id */
if (wait->syncpt_id > host1x_syncpt_nb_pts(host))
continue;
/* skip all other gathers */
if (patch != wait->bo)
continue;
trace_host1x_syncpt_wait_check(wait->bo, wait->offset,
wait->syncpt_id, wait->thresh,
host1x_syncpt_read_min(sp));
if (host1x_syncpt_is_expired(sp, wait->thresh)) {
dev_dbg(host->dev,
"drop WAIT id %u (%s) thresh 0x%x, min 0x%x\n",
wait->syncpt_id, sp->name, wait->thresh,
host1x_syncpt_read_min(sp));
host1x_syncpt_patch_offset(sp, patch,
g->offset + wait->offset);
}
wait->bo = NULL;
}
return 0;
}
static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
{
unsigned int i;
int err;
job->num_unpins = 0;
for (i = 0; i < job->num_relocs; i++) {
struct host1x_reloc *reloc = &job->relocarray[i];
struct sg_table *sgt;
dma_addr_t phys_addr;
reloc->target.bo = host1x_bo_get(reloc->target.bo);
if (!reloc->target.bo) {
err = -EINVAL;
goto unpin;
}
phys_addr = host1x_bo_pin(reloc->target.bo, &sgt);
job->addr_phys[job->num_unpins] = phys_addr;
job->unpins[job->num_unpins].bo = reloc->target.bo;
job->unpins[job->num_unpins].sgt = sgt;
job->num_unpins++;
}
for (i = 0; i < job->num_gathers; i++) {
struct host1x_job_gather *g = &job->gathers[i];
size_t gather_size = 0;
struct scatterlist *sg;
struct sg_table *sgt;
dma_addr_t phys_addr;
unsigned long shift;
struct iova *alloc;
unsigned int j;
g->bo = host1x_bo_get(g->bo);
if (!g->bo) {
err = -EINVAL;
goto unpin;
}
phys_addr = host1x_bo_pin(g->bo, &sgt);
if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) {
for_each_sg(sgt->sgl, sg, sgt->nents, j)
gather_size += sg->length;
gather_size = iova_align(&host->iova, gather_size);
shift = iova_shift(&host->iova);
alloc = alloc_iova(&host->iova, gather_size >> shift,
host->iova_end >> shift, true);
if (!alloc) {
err = -ENOMEM;
goto unpin;
}
err = iommu_map_sg(host->domain,
iova_dma_addr(&host->iova, alloc),
sgt->sgl, sgt->nents, IOMMU_READ);
if (err == 0) {
__free_iova(&host->iova, alloc);
err = -EINVAL;
goto unpin;
}
job->addr_phys[job->num_unpins] =
iova_dma_addr(&host->iova, alloc);
job->unpins[job->num_unpins].size = gather_size;
} else {
job->addr_phys[job->num_unpins] = phys_addr;
}
job->gather_addr_phys[i] = job->addr_phys[job->num_unpins];
job->unpins[job->num_unpins].bo = g->bo;
job->unpins[job->num_unpins].sgt = sgt;
job->num_unpins++;
}
return 0;
unpin:
host1x_job_unpin(job);
return err;
}
static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g)
{
int i = 0;
u32 last_page = ~0;
void *cmdbuf_page_addr = NULL;
struct host1x_bo *cmdbuf = g->bo;
/* pin & patch the relocs for one gather */
for (i = 0; i < job->num_relocs; i++) {
struct host1x_reloc *reloc = &job->relocarray[i];
u32 reloc_addr = (job->reloc_addr_phys[i] +
reloc->target.offset) >> reloc->shift;
u32 *target;
/* skip all other gathers */
if (cmdbuf != reloc->cmdbuf.bo)
continue;
if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) {
target = (u32 *)job->gather_copy_mapped +
reloc->cmdbuf.offset / sizeof(u32) +
g->offset / sizeof(u32);
goto patch_reloc;
}
if (last_page != reloc->cmdbuf.offset >> PAGE_SHIFT) {
if (cmdbuf_page_addr)
host1x_bo_kunmap(cmdbuf, last_page,
cmdbuf_page_addr);
cmdbuf_page_addr = host1x_bo_kmap(cmdbuf,
reloc->cmdbuf.offset >> PAGE_SHIFT);
last_page = reloc->cmdbuf.offset >> PAGE_SHIFT;
if (unlikely(!cmdbuf_page_addr)) {
pr_err("Could not map cmdbuf for relocation\n");
return -ENOMEM;
}
}
target = cmdbuf_page_addr + (reloc->cmdbuf.offset & ~PAGE_MASK);
patch_reloc:
*target = reloc_addr;
}
if (cmdbuf_page_addr)
host1x_bo_kunmap(cmdbuf, last_page, cmdbuf_page_addr);
return 0;
}
static bool check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf,
unsigned int offset)
{
offset *= sizeof(u32);
if (reloc->cmdbuf.bo != cmdbuf || reloc->cmdbuf.offset != offset)
return false;
/* relocation shift value validation isn't implemented yet */
if (reloc->shift)
return false;
return true;
}
static bool check_wait(struct host1x_waitchk *wait, struct host1x_bo *cmdbuf,
unsigned int offset)
{
offset *= sizeof(u32);
if (wait->bo != cmdbuf || wait->offset != offset)
return false;
return true;
}
struct host1x_firewall {
struct host1x_job *job;
struct device *dev;
unsigned int num_relocs;
struct host1x_reloc *reloc;
unsigned int num_waitchks;
struct host1x_waitchk *waitchk;
struct host1x_bo *cmdbuf;
unsigned int offset;
u32 words;
u32 class;
u32 reg;
u32 mask;
u32 count;
};
static int check_register(struct host1x_firewall *fw, unsigned long offset)
{
if (!fw->job->is_addr_reg)
return 0;
if (fw->job->is_addr_reg(fw->dev, fw->class, offset)) {
if (!fw->num_relocs)
return -EINVAL;
if (!check_reloc(fw->reloc, fw->cmdbuf, fw->offset))
return -EINVAL;
fw->num_relocs--;
fw->reloc++;
}
if (offset == HOST1X_WAIT_SYNCPT_OFFSET) {
if (fw->class != HOST1X_CLASS_HOST1X)
return -EINVAL;
if (!fw->num_waitchks)
return -EINVAL;
if (!check_wait(fw->waitchk, fw->cmdbuf, fw->offset))
return -EINVAL;
fw->num_waitchks--;
fw->waitchk++;
}
return 0;
}
static int check_class(struct host1x_firewall *fw, u32 class)
{
if (!fw->job->is_valid_class) {
if (fw->class != class)
return -EINVAL;
} else {
if (!fw->job->is_valid_class(fw->class))
return -EINVAL;
}
return 0;
}
static int check_mask(struct host1x_firewall *fw)
{
u32 mask = fw->mask;
u32 reg = fw->reg;
int ret;
while (mask) {
if (fw->words == 0)
return -EINVAL;
if (mask & 1) {
ret = check_register(fw, reg);
if (ret < 0)
return ret;
fw->words--;
fw->offset++;
}
mask >>= 1;
reg++;
}
return 0;
}
static int check_incr(struct host1x_firewall *fw)
{
u32 count = fw->count;
u32 reg = fw->reg;
int ret;
while (count) {
if (fw->words == 0)
return -EINVAL;
ret = check_register(fw, reg);
if (ret < 0)
return ret;
reg++;
fw->words--;
fw->offset++;
count--;
}
return 0;
}
static int check_nonincr(struct host1x_firewall *fw)
{
u32 count = fw->count;
int ret;
while (count) {
if (fw->words == 0)
return -EINVAL;
ret = check_register(fw, fw->reg);
if (ret < 0)
return ret;
fw->words--;
fw->offset++;
count--;
}
return 0;
}
static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g)
{
u32 *cmdbuf_base = (u32 *)fw->job->gather_copy_mapped +
(g->offset / sizeof(u32));
u32 job_class = fw->class;
int err = 0;
fw->words = g->words;
fw->cmdbuf = g->bo;
fw->offset = 0;
while (fw->words && !err) {
u32 word = cmdbuf_base[fw->offset];
u32 opcode = (word & 0xf0000000) >> 28;
fw->mask = 0;
fw->reg = 0;
fw->count = 0;
fw->words--;
fw->offset++;
switch (opcode) {
case 0:
fw->class = word >> 6 & 0x3ff;
fw->mask = word & 0x3f;
fw->reg = word >> 16 & 0xfff;
err = check_class(fw, job_class);
if (!err)
err = check_mask(fw);
if (err)
goto out;
break;
case 1:
fw->reg = word >> 16 & 0xfff;
fw->count = word & 0xffff;
err = check_incr(fw);
if (err)
goto out;
break;
case 2:
fw->reg = word >> 16 & 0xfff;
fw->count = word & 0xffff;
err = check_nonincr(fw);
if (err)
goto out;
break;
case 3:
fw->mask = word & 0xffff;
fw->reg = word >> 16 & 0xfff;
err = check_mask(fw);
if (err)
goto out;
break;
case 4:
case 14:
break;
default:
err = -EINVAL;
break;
}
}
out:
return err;
}
static inline int copy_gathers(struct host1x_job *job, struct device *dev)
{
struct host1x_firewall fw;
size_t size = 0;
size_t offset = 0;
int i;
fw.job = job;
fw.dev = dev;
fw.reloc = job->relocarray;
fw.num_relocs = job->num_relocs;
fw.waitchk = job->waitchk;
fw.num_waitchks = job->num_waitchk;
fw.class = job->class;
for (i = 0; i < job->num_gathers; i++) {
struct host1x_job_gather *g = &job->gathers[i];
size += g->words * sizeof(u32);
}
/*
* Try a non-blocking allocation from a higher priority pools first,
* as awaiting for the allocation here is a major performance hit.
*/
dma, mm/pat: Rename dma_*_writecombine() to dma_*_wc() Rename dma_*_writecombine() to dma_*_wc(), so that the naming is coherent across the various write-combining APIs. Keep the old names for compatibility for a while, these can be removed at a later time. A guard is left to enable backporting of the rename, and later remove of the old mapping defines seemlessly. Build tested successfully with allmodconfig. The following Coccinelle SmPL patch was used for this simple transformation: @ rename_dma_alloc_writecombine @ expression dev, size, dma_addr, gfp; @@ -dma_alloc_writecombine(dev, size, dma_addr, gfp) +dma_alloc_wc(dev, size, dma_addr, gfp) @ rename_dma_free_writecombine @ expression dev, size, cpu_addr, dma_addr; @@ -dma_free_writecombine(dev, size, cpu_addr, dma_addr) +dma_free_wc(dev, size, cpu_addr, dma_addr) @ rename_dma_mmap_writecombine @ expression dev, vma, cpu_addr, dma_addr, size; @@ -dma_mmap_writecombine(dev, vma, cpu_addr, dma_addr, size) +dma_mmap_wc(dev, vma, cpu_addr, dma_addr, size) We also keep the old names as compatibility helpers, and guard against their definition to make backporting easier. Generated-by: Coccinelle SmPL Suggested-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Luis R. Rodriguez <mcgrof@suse.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: airlied@linux.ie Cc: akpm@linux-foundation.org Cc: benh@kernel.crashing.org Cc: bhelgaas@google.com Cc: bp@suse.de Cc: dan.j.williams@intel.com Cc: daniel.vetter@ffwll.ch Cc: dhowells@redhat.com Cc: julia.lawall@lip6.fr Cc: konrad.wilk@oracle.com Cc: linux-fbdev@vger.kernel.org Cc: linux-pci@vger.kernel.org Cc: luto@amacapital.net Cc: mst@redhat.com Cc: tomi.valkeinen@ti.com Cc: toshi.kani@hp.com Cc: vinod.koul@intel.com Cc: xen-devel@lists.xensource.com Link: http://lkml.kernel.org/r/1453516462-4844-1-git-send-email-mcgrof@do-not-panic.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
2016-01-23 09:34:22 +07:00
job->gather_copy_mapped = dma_alloc_wc(dev, size, &job->gather_copy,
GFP_NOWAIT);
/* the higher priority allocation failed, try the generic-blocking */
if (!job->gather_copy_mapped)
job->gather_copy_mapped = dma_alloc_wc(dev, size,
&job->gather_copy,
GFP_KERNEL);
if (!job->gather_copy_mapped)
return -ENOMEM;
job->gather_copy_size = size;
for (i = 0; i < job->num_gathers; i++) {
struct host1x_job_gather *g = &job->gathers[i];
void *gather;
/* Copy the gather */
gather = host1x_bo_mmap(g->bo);
memcpy(job->gather_copy_mapped + offset, gather + g->offset,
g->words * sizeof(u32));
host1x_bo_munmap(g->bo, gather);
/* Store the location in the buffer */
g->base = job->gather_copy;
g->offset = offset;
/* Validate the job */
if (validate(&fw, g))
return -EINVAL;
offset += g->words * sizeof(u32);
}
/* No relocs and waitchks should remain at this point */
if (fw.num_relocs || fw.num_waitchks)
return -EINVAL;
return 0;
}
int host1x_job_pin(struct host1x_job *job, struct device *dev)
{
int err;
unsigned int i, j;
struct host1x *host = dev_get_drvdata(dev->parent);
DECLARE_BITMAP(waitchk_mask, host1x_syncpt_nb_pts(host));
bitmap_zero(waitchk_mask, host1x_syncpt_nb_pts(host));
for (i = 0; i < job->num_waitchk; i++) {
u32 syncpt_id = job->waitchk[i].syncpt_id;
if (syncpt_id < host1x_syncpt_nb_pts(host))
set_bit(syncpt_id, waitchk_mask);
}
/* get current syncpt values for waitchk */
for_each_set_bit(i, waitchk_mask, host1x_syncpt_nb_pts(host))
host1x_syncpt_load(host->syncpt + i);
/* pin memory */
err = pin_job(host, job);
if (err)
goto out;
if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) {
err = copy_gathers(job, dev);
if (err)
goto out;
}
/* patch gathers */
for (i = 0; i < job->num_gathers; i++) {
struct host1x_job_gather *g = &job->gathers[i];
/* process each gather mem only once */
if (g->handled)
continue;
/* copy_gathers() sets gathers base if firewall is enabled */
if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
g->base = job->gather_addr_phys[i];
for (j = i + 1; j < job->num_gathers; j++) {
if (job->gathers[j].bo == g->bo) {
job->gathers[j].handled = true;
job->gathers[j].base = g->base;
}
}
err = do_relocs(job, g);
if (err)
break;
err = do_waitchks(job, host, g);
if (err)
break;
}
out:
if (err)
host1x_job_unpin(job);
wmb();
return err;
}
EXPORT_SYMBOL(host1x_job_pin);
void host1x_job_unpin(struct host1x_job *job)
{
struct host1x *host = dev_get_drvdata(job->channel->dev->parent);
unsigned int i;
for (i = 0; i < job->num_unpins; i++) {
struct host1x_job_unpin_data *unpin = &job->unpins[i];
if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) {
iommu_unmap(host->domain, job->addr_phys[i],
unpin->size);
free_iova(&host->iova,
iova_pfn(&host->iova, job->addr_phys[i]));
}
host1x_bo_unpin(unpin->bo, unpin->sgt);
host1x_bo_put(unpin->bo);
}
job->num_unpins = 0;
if (job->gather_copy_size)
dma, mm/pat: Rename dma_*_writecombine() to dma_*_wc() Rename dma_*_writecombine() to dma_*_wc(), so that the naming is coherent across the various write-combining APIs. Keep the old names for compatibility for a while, these can be removed at a later time. A guard is left to enable backporting of the rename, and later remove of the old mapping defines seemlessly. Build tested successfully with allmodconfig. The following Coccinelle SmPL patch was used for this simple transformation: @ rename_dma_alloc_writecombine @ expression dev, size, dma_addr, gfp; @@ -dma_alloc_writecombine(dev, size, dma_addr, gfp) +dma_alloc_wc(dev, size, dma_addr, gfp) @ rename_dma_free_writecombine @ expression dev, size, cpu_addr, dma_addr; @@ -dma_free_writecombine(dev, size, cpu_addr, dma_addr) +dma_free_wc(dev, size, cpu_addr, dma_addr) @ rename_dma_mmap_writecombine @ expression dev, vma, cpu_addr, dma_addr, size; @@ -dma_mmap_writecombine(dev, vma, cpu_addr, dma_addr, size) +dma_mmap_wc(dev, vma, cpu_addr, dma_addr, size) We also keep the old names as compatibility helpers, and guard against their definition to make backporting easier. Generated-by: Coccinelle SmPL Suggested-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Luis R. Rodriguez <mcgrof@suse.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: airlied@linux.ie Cc: akpm@linux-foundation.org Cc: benh@kernel.crashing.org Cc: bhelgaas@google.com Cc: bp@suse.de Cc: dan.j.williams@intel.com Cc: daniel.vetter@ffwll.ch Cc: dhowells@redhat.com Cc: julia.lawall@lip6.fr Cc: konrad.wilk@oracle.com Cc: linux-fbdev@vger.kernel.org Cc: linux-pci@vger.kernel.org Cc: luto@amacapital.net Cc: mst@redhat.com Cc: tomi.valkeinen@ti.com Cc: toshi.kani@hp.com Cc: vinod.koul@intel.com Cc: xen-devel@lists.xensource.com Link: http://lkml.kernel.org/r/1453516462-4844-1-git-send-email-mcgrof@do-not-panic.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
2016-01-23 09:34:22 +07:00
dma_free_wc(job->channel->dev, job->gather_copy_size,
job->gather_copy_mapped, job->gather_copy);
}
EXPORT_SYMBOL(host1x_job_unpin);
/*
* Debug routine used to dump job entries
*/
void host1x_job_dump(struct device *dev, struct host1x_job *job)
{
dev_dbg(dev, " SYNCPT_ID %d\n", job->syncpt_id);
dev_dbg(dev, " SYNCPT_VAL %d\n", job->syncpt_end);
dev_dbg(dev, " FIRST_GET 0x%x\n", job->first_get);
dev_dbg(dev, " TIMEOUT %d\n", job->timeout);
dev_dbg(dev, " NUM_SLOTS %d\n", job->num_slots);
dev_dbg(dev, " NUM_HANDLES %d\n", job->num_unpins);
}