linux_dsm_epyc7002/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
Lucas Stach f232d9ec02 drm/etnaviv: fix TS cache flushing on GPUs with BLT engine
As seen in the Vivante kernel driver, most GPUs with the BLT engine have
a broken TS cache flush. The workaround is to temporarily set the BLT
command to CLEAR_IMAGE, without actually executing the clear. Apparently
this state change is enough to trigger the required TS cache flush. As
the BLT engine is completely asychronous, we also need a few more stall
states to synchronize the flush with the frontend.

Root-caused-by: Jonathan Marek <jonathan@marek.ca>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
2020-03-20 18:40:44 +01:00

540 lines
16 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2014-2018 Etnaviv Project
*/
#include <drm/drm_drv.h>
#include "etnaviv_cmdbuf.h"
#include "etnaviv_gpu.h"
#include "etnaviv_gem.h"
#include "etnaviv_mmu.h"
#include "common.xml.h"
#include "state.xml.h"
#include "state_blt.xml.h"
#include "state_hi.xml.h"
#include "state_3d.xml.h"
#include "cmdstream.xml.h"
/*
* Command Buffer helper:
*/
static inline void OUT(struct etnaviv_cmdbuf *buffer, u32 data)
{
u32 *vaddr = (u32 *)buffer->vaddr;
BUG_ON(buffer->user_size >= buffer->size);
vaddr[buffer->user_size / 4] = data;
buffer->user_size += 4;
}
static inline void CMD_LOAD_STATE(struct etnaviv_cmdbuf *buffer,
u32 reg, u32 value)
{
u32 index = reg >> VIV_FE_LOAD_STATE_HEADER_OFFSET__SHR;
buffer->user_size = ALIGN(buffer->user_size, 8);
/* write a register via cmd stream */
OUT(buffer, VIV_FE_LOAD_STATE_HEADER_OP_LOAD_STATE |
VIV_FE_LOAD_STATE_HEADER_COUNT(1) |
VIV_FE_LOAD_STATE_HEADER_OFFSET(index));
OUT(buffer, value);
}
static inline void CMD_END(struct etnaviv_cmdbuf *buffer)
{
buffer->user_size = ALIGN(buffer->user_size, 8);
OUT(buffer, VIV_FE_END_HEADER_OP_END);
}
static inline void CMD_WAIT(struct etnaviv_cmdbuf *buffer)
{
buffer->user_size = ALIGN(buffer->user_size, 8);
OUT(buffer, VIV_FE_WAIT_HEADER_OP_WAIT | 200);
}
static inline void CMD_LINK(struct etnaviv_cmdbuf *buffer,
u16 prefetch, u32 address)
{
buffer->user_size = ALIGN(buffer->user_size, 8);
OUT(buffer, VIV_FE_LINK_HEADER_OP_LINK |
VIV_FE_LINK_HEADER_PREFETCH(prefetch));
OUT(buffer, address);
}
static inline void CMD_STALL(struct etnaviv_cmdbuf *buffer,
u32 from, u32 to)
{
buffer->user_size = ALIGN(buffer->user_size, 8);
OUT(buffer, VIV_FE_STALL_HEADER_OP_STALL);
OUT(buffer, VIV_FE_STALL_TOKEN_FROM(from) | VIV_FE_STALL_TOKEN_TO(to));
}
static inline void CMD_SEM(struct etnaviv_cmdbuf *buffer, u32 from, u32 to)
{
CMD_LOAD_STATE(buffer, VIVS_GL_SEMAPHORE_TOKEN,
VIVS_GL_SEMAPHORE_TOKEN_FROM(from) |
VIVS_GL_SEMAPHORE_TOKEN_TO(to));
}
static void etnaviv_cmd_select_pipe(struct etnaviv_gpu *gpu,
struct etnaviv_cmdbuf *buffer, u8 pipe)
{
u32 flush = 0;
lockdep_assert_held(&gpu->lock);
/*
* This assumes that if we're switching to 2D, we're switching
* away from 3D, and vice versa. Hence, if we're switching to
* the 2D core, we need to flush the 3D depth and color caches,
* otherwise we need to flush the 2D pixel engine cache.
*/
if (gpu->exec_state == ETNA_PIPE_2D)
flush = VIVS_GL_FLUSH_CACHE_PE2D;
else if (gpu->exec_state == ETNA_PIPE_3D)
flush = VIVS_GL_FLUSH_CACHE_DEPTH | VIVS_GL_FLUSH_CACHE_COLOR;
CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE, flush);
CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
CMD_LOAD_STATE(buffer, VIVS_GL_PIPE_SELECT,
VIVS_GL_PIPE_SELECT_PIPE(pipe));
}
static void etnaviv_buffer_dump(struct etnaviv_gpu *gpu,
struct etnaviv_cmdbuf *buf, u32 off, u32 len)
{
u32 size = buf->size;
u32 *ptr = buf->vaddr + off;
dev_info(gpu->dev, "virt %p phys 0x%08x free 0x%08x\n",
ptr, etnaviv_cmdbuf_get_va(buf,
&gpu->mmu_context->cmdbuf_mapping) +
off, size - len * 4 - off);
print_hex_dump(KERN_INFO, "cmd ", DUMP_PREFIX_OFFSET, 16, 4,
ptr, len * 4, 0);
}
/*
* Safely replace the WAIT of a waitlink with a new command and argument.
* The GPU may be executing this WAIT while we're modifying it, so we have
* to write it in a specific order to avoid the GPU branching to somewhere
* else. 'wl_offset' is the offset to the first byte of the WAIT command.
*/
static void etnaviv_buffer_replace_wait(struct etnaviv_cmdbuf *buffer,
unsigned int wl_offset, u32 cmd, u32 arg)
{
u32 *lw = buffer->vaddr + wl_offset;
lw[1] = arg;
mb();
lw[0] = cmd;
mb();
}
/*
* Ensure that there is space in the command buffer to contiguously write
* 'cmd_dwords' 64-bit words into the buffer, wrapping if necessary.
*/
static u32 etnaviv_buffer_reserve(struct etnaviv_gpu *gpu,
struct etnaviv_cmdbuf *buffer, unsigned int cmd_dwords)
{
if (buffer->user_size + cmd_dwords * sizeof(u64) > buffer->size)
buffer->user_size = 0;
return etnaviv_cmdbuf_get_va(buffer,
&gpu->mmu_context->cmdbuf_mapping) +
buffer->user_size;
}
u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu)
{
struct etnaviv_cmdbuf *buffer = &gpu->buffer;
lockdep_assert_held(&gpu->lock);
/* initialize buffer */
buffer->user_size = 0;
CMD_WAIT(buffer);
CMD_LINK(buffer, 2,
etnaviv_cmdbuf_get_va(buffer, &gpu->mmu_context->cmdbuf_mapping)
+ buffer->user_size - 4);
return buffer->user_size / 8;
}
u16 etnaviv_buffer_config_mmuv2(struct etnaviv_gpu *gpu, u32 mtlb_addr, u32 safe_addr)
{
struct etnaviv_cmdbuf *buffer = &gpu->buffer;
lockdep_assert_held(&gpu->lock);
buffer->user_size = 0;
if (gpu->identity.features & chipFeatures_PIPE_3D) {
CMD_LOAD_STATE(buffer, VIVS_GL_PIPE_SELECT,
VIVS_GL_PIPE_SELECT_PIPE(ETNA_PIPE_3D));
CMD_LOAD_STATE(buffer, VIVS_MMUv2_CONFIGURATION,
mtlb_addr | VIVS_MMUv2_CONFIGURATION_MODE_MODE4_K);
CMD_LOAD_STATE(buffer, VIVS_MMUv2_SAFE_ADDRESS, safe_addr);
CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
}
if (gpu->identity.features & chipFeatures_PIPE_2D) {
CMD_LOAD_STATE(buffer, VIVS_GL_PIPE_SELECT,
VIVS_GL_PIPE_SELECT_PIPE(ETNA_PIPE_2D));
CMD_LOAD_STATE(buffer, VIVS_MMUv2_CONFIGURATION,
mtlb_addr | VIVS_MMUv2_CONFIGURATION_MODE_MODE4_K);
CMD_LOAD_STATE(buffer, VIVS_MMUv2_SAFE_ADDRESS, safe_addr);
CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
}
CMD_END(buffer);
buffer->user_size = ALIGN(buffer->user_size, 8);
return buffer->user_size / 8;
}
u16 etnaviv_buffer_config_pta(struct etnaviv_gpu *gpu, unsigned short id)
{
struct etnaviv_cmdbuf *buffer = &gpu->buffer;
lockdep_assert_held(&gpu->lock);
buffer->user_size = 0;
CMD_LOAD_STATE(buffer, VIVS_MMUv2_PTA_CONFIG,
VIVS_MMUv2_PTA_CONFIG_INDEX(id));
CMD_END(buffer);
buffer->user_size = ALIGN(buffer->user_size, 8);
return buffer->user_size / 8;
}
void etnaviv_buffer_end(struct etnaviv_gpu *gpu)
{
struct etnaviv_cmdbuf *buffer = &gpu->buffer;
unsigned int waitlink_offset = buffer->user_size - 16;
u32 link_target, flush = 0;
bool has_blt = !!(gpu->identity.minor_features5 &
chipMinorFeatures5_BLT_ENGINE);
lockdep_assert_held(&gpu->lock);
if (gpu->exec_state == ETNA_PIPE_2D)
flush = VIVS_GL_FLUSH_CACHE_PE2D;
else if (gpu->exec_state == ETNA_PIPE_3D)
flush = VIVS_GL_FLUSH_CACHE_DEPTH |
VIVS_GL_FLUSH_CACHE_COLOR |
VIVS_GL_FLUSH_CACHE_TEXTURE |
VIVS_GL_FLUSH_CACHE_TEXTUREVS |
VIVS_GL_FLUSH_CACHE_SHADER_L2;
if (flush) {
unsigned int dwords = 7;
if (has_blt)
dwords += 10;
link_target = etnaviv_buffer_reserve(gpu, buffer, dwords);
CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
if (has_blt) {
CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x1);
CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x0);
}
CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE, flush);
if (gpu->exec_state == ETNA_PIPE_3D) {
if (has_blt) {
CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x1);
CMD_LOAD_STATE(buffer, VIVS_BLT_SET_COMMAND, 0x1);
CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x0);
} else {
CMD_LOAD_STATE(buffer, VIVS_TS_FLUSH_CACHE,
VIVS_TS_FLUSH_CACHE_FLUSH);
}
}
CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
if (has_blt) {
CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x1);
CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x0);
}
CMD_END(buffer);
etnaviv_buffer_replace_wait(buffer, waitlink_offset,
VIV_FE_LINK_HEADER_OP_LINK |
VIV_FE_LINK_HEADER_PREFETCH(dwords),
link_target);
} else {
/* Replace the last link-wait with an "END" command */
etnaviv_buffer_replace_wait(buffer, waitlink_offset,
VIV_FE_END_HEADER_OP_END, 0);
}
}
/* Append a 'sync point' to the ring buffer. */
void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event)
{
struct etnaviv_cmdbuf *buffer = &gpu->buffer;
unsigned int waitlink_offset = buffer->user_size - 16;
u32 dwords, target;
lockdep_assert_held(&gpu->lock);
/*
* We need at most 3 dwords in the return target:
* 1 event + 1 end + 1 wait + 1 link.
*/
dwords = 4;
target = etnaviv_buffer_reserve(gpu, buffer, dwords);
/* Signal sync point event */
CMD_LOAD_STATE(buffer, VIVS_GL_EVENT, VIVS_GL_EVENT_EVENT_ID(event) |
VIVS_GL_EVENT_FROM_PE);
/* Stop the FE to 'pause' the GPU */
CMD_END(buffer);
/* Append waitlink */
CMD_WAIT(buffer);
CMD_LINK(buffer, 2,
etnaviv_cmdbuf_get_va(buffer, &gpu->mmu_context->cmdbuf_mapping)
+ buffer->user_size - 4);
/*
* Kick off the 'sync point' command by replacing the previous
* WAIT with a link to the address in the ring buffer.
*/
etnaviv_buffer_replace_wait(buffer, waitlink_offset,
VIV_FE_LINK_HEADER_OP_LINK |
VIV_FE_LINK_HEADER_PREFETCH(dwords),
target);
}
/* Append a command buffer to the ring buffer. */
void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
struct etnaviv_iommu_context *mmu_context, unsigned int event,
struct etnaviv_cmdbuf *cmdbuf)
{
struct etnaviv_cmdbuf *buffer = &gpu->buffer;
unsigned int waitlink_offset = buffer->user_size - 16;
u32 return_target, return_dwords;
u32 link_target, link_dwords;
bool switch_context = gpu->exec_state != exec_state;
bool switch_mmu_context = gpu->mmu_context != mmu_context;
unsigned int new_flush_seq = READ_ONCE(gpu->mmu_context->flush_seq);
bool need_flush = switch_mmu_context || gpu->flush_seq != new_flush_seq;
bool has_blt = !!(gpu->identity.minor_features5 &
chipMinorFeatures5_BLT_ENGINE);
lockdep_assert_held(&gpu->lock);
if (drm_debug_enabled(DRM_UT_DRIVER))
etnaviv_buffer_dump(gpu, buffer, 0, 0x50);
link_target = etnaviv_cmdbuf_get_va(cmdbuf,
&gpu->mmu_context->cmdbuf_mapping);
link_dwords = cmdbuf->size / 8;
/*
* If we need maintenance prior to submitting this buffer, we will
* need to append a mmu flush load state, followed by a new
* link to this buffer - a total of four additional words.
*/
if (need_flush || switch_context) {
u32 target, extra_dwords;
/* link command */
extra_dwords = 1;
/* flush command */
if (need_flush) {
if (gpu->mmu_context->global->version == ETNAVIV_IOMMU_V1)
extra_dwords += 1;
else
extra_dwords += 3;
}
/* pipe switch commands */
if (switch_context)
extra_dwords += 4;
/* PTA load command */
if (switch_mmu_context && gpu->sec_mode == ETNA_SEC_KERNEL)
extra_dwords += 1;
target = etnaviv_buffer_reserve(gpu, buffer, extra_dwords);
/*
* Switch MMU context if necessary. Must be done after the
* link target has been calculated, as the jump forward in the
* kernel ring still uses the last active MMU context before
* the switch.
*/
if (switch_mmu_context) {
struct etnaviv_iommu_context *old_context = gpu->mmu_context;
etnaviv_iommu_context_get(mmu_context);
gpu->mmu_context = mmu_context;
etnaviv_iommu_context_put(old_context);
}
if (need_flush) {
/* Add the MMU flush */
if (gpu->mmu_context->global->version == ETNAVIV_IOMMU_V1) {
CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_MMU,
VIVS_GL_FLUSH_MMU_FLUSH_FEMMU |
VIVS_GL_FLUSH_MMU_FLUSH_UNK1 |
VIVS_GL_FLUSH_MMU_FLUSH_UNK2 |
VIVS_GL_FLUSH_MMU_FLUSH_PEMMU |
VIVS_GL_FLUSH_MMU_FLUSH_UNK4);
} else {
u32 flush = VIVS_MMUv2_CONFIGURATION_MODE_MASK |
VIVS_MMUv2_CONFIGURATION_FLUSH_FLUSH;
if (switch_mmu_context &&
gpu->sec_mode == ETNA_SEC_KERNEL) {
unsigned short id =
etnaviv_iommuv2_get_pta_id(gpu->mmu_context);
CMD_LOAD_STATE(buffer,
VIVS_MMUv2_PTA_CONFIG,
VIVS_MMUv2_PTA_CONFIG_INDEX(id));
}
if (gpu->sec_mode == ETNA_SEC_NONE)
flush |= etnaviv_iommuv2_get_mtlb_addr(gpu->mmu_context);
CMD_LOAD_STATE(buffer, VIVS_MMUv2_CONFIGURATION,
flush);
CMD_SEM(buffer, SYNC_RECIPIENT_FE,
SYNC_RECIPIENT_PE);
CMD_STALL(buffer, SYNC_RECIPIENT_FE,
SYNC_RECIPIENT_PE);
}
gpu->flush_seq = new_flush_seq;
}
if (switch_context) {
etnaviv_cmd_select_pipe(gpu, buffer, exec_state);
gpu->exec_state = exec_state;
}
/* And the link to the submitted buffer */
link_target = etnaviv_cmdbuf_get_va(cmdbuf,
&gpu->mmu_context->cmdbuf_mapping);
CMD_LINK(buffer, link_dwords, link_target);
/* Update the link target to point to above instructions */
link_target = target;
link_dwords = extra_dwords;
}
/*
* Append a LINK to the submitted command buffer to return to
* the ring buffer. return_target is the ring target address.
* We need at most 7 dwords in the return target: 2 cache flush +
* 2 semaphore stall + 1 event + 1 wait + 1 link.
*/
return_dwords = 7;
/*
* When the BLT engine is present we need 6 more dwords in the return
* target: 3 enable/flush/disable + 4 enable/semaphore stall/disable,
* but we don't need the normal TS flush state.
*/
if (has_blt)
return_dwords += 6;
return_target = etnaviv_buffer_reserve(gpu, buffer, return_dwords);
CMD_LINK(cmdbuf, return_dwords, return_target);
/*
* Append a cache flush, stall, event, wait and link pointing back to
* the wait command to the ring buffer.
*/
if (gpu->exec_state == ETNA_PIPE_2D) {
CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE,
VIVS_GL_FLUSH_CACHE_PE2D);
} else {
CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE,
VIVS_GL_FLUSH_CACHE_DEPTH |
VIVS_GL_FLUSH_CACHE_COLOR);
if (has_blt) {
CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x1);
CMD_LOAD_STATE(buffer, VIVS_BLT_SET_COMMAND, 0x1);
CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x0);
} else {
CMD_LOAD_STATE(buffer, VIVS_TS_FLUSH_CACHE,
VIVS_TS_FLUSH_CACHE_FLUSH);
}
}
CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
if (has_blt) {
CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x1);
CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x0);
}
CMD_LOAD_STATE(buffer, VIVS_GL_EVENT, VIVS_GL_EVENT_EVENT_ID(event) |
VIVS_GL_EVENT_FROM_PE);
CMD_WAIT(buffer);
CMD_LINK(buffer, 2,
etnaviv_cmdbuf_get_va(buffer, &gpu->mmu_context->cmdbuf_mapping)
+ buffer->user_size - 4);
if (drm_debug_enabled(DRM_UT_DRIVER))
pr_info("stream link to 0x%08x @ 0x%08x %p\n",
return_target,
etnaviv_cmdbuf_get_va(cmdbuf, &gpu->mmu_context->cmdbuf_mapping),
cmdbuf->vaddr);
if (drm_debug_enabled(DRM_UT_DRIVER)) {
print_hex_dump(KERN_INFO, "cmd ", DUMP_PREFIX_OFFSET, 16, 4,
cmdbuf->vaddr, cmdbuf->size, 0);
pr_info("link op: %p\n", buffer->vaddr + waitlink_offset);
pr_info("addr: 0x%08x\n", link_target);
pr_info("back: 0x%08x\n", return_target);
pr_info("event: %d\n", event);
}
/*
* Kick off the submitted command by replacing the previous
* WAIT with a link to the address in the ring buffer.
*/
etnaviv_buffer_replace_wait(buffer, waitlink_offset,
VIV_FE_LINK_HEADER_OP_LINK |
VIV_FE_LINK_HEADER_PREFETCH(link_dwords),
link_target);
if (drm_debug_enabled(DRM_UT_DRIVER))
etnaviv_buffer_dump(gpu, buffer, 0, 0x50);
}