linux_dsm_epyc7002/drivers/video/fbdev/aty/radeon_accel.c

329 lines
8.9 KiB
C
Raw Normal View History

#include "radeonfb.h"
/* the accelerated functions here are patterned after the
* "ACCEL_MMIO" ifdef branches in XFree86
* --dte
*/
static void radeon_fixup_offset(struct radeonfb_info *rinfo)
{
u32 local_base;
/* *** Ugly workaround *** */
/*
* On some platforms, the video memory is mapped at 0 in radeon chip space
* (like PPCs) by the firmware. X will always move it up so that it's seen
* by the chip to be at the same address as the PCI BAR.
* That means that when switching back from X, there is a mismatch between
* the offsets programmed into the engine. This means that potentially,
* accel operations done before radeonfb has a chance to re-init the engine
* will have incorrect offsets, and potentially trash system memory !
*
* The correct fix is for fbcon to never call any accel op before the engine
* has properly been re-initialized (by a call to set_var), but this is a
* complex fix. This workaround in the meantime, called before every accel
* operation, makes sure the offsets are in sync.
*/
radeon_fifo_wait (1);
local_base = INREG(MC_FB_LOCATION) << 16;
if (local_base == rinfo->fb_local_base)
return;
rinfo->fb_local_base = local_base;
radeon_fifo_wait (3);
OUTREG(DEFAULT_PITCH_OFFSET, (rinfo->pitch << 0x16) |
(rinfo->fb_local_base >> 10));
OUTREG(DST_PITCH_OFFSET, (rinfo->pitch << 0x16) | (rinfo->fb_local_base >> 10));
OUTREG(SRC_PITCH_OFFSET, (rinfo->pitch << 0x16) | (rinfo->fb_local_base >> 10));
}
static void radeonfb_prim_fillrect(struct radeonfb_info *rinfo,
const struct fb_fillrect *region)
{
radeon_fifo_wait(4);
OUTREG(DP_GUI_MASTER_CNTL,
rinfo->dp_gui_master_cntl /* contains, like GMC_DST_32BPP */
| GMC_BRUSH_SOLID_COLOR
| ROP3_P);
if (radeon_get_dstbpp(rinfo->depth) != DST_8BPP)
OUTREG(DP_BRUSH_FRGD_CLR, rinfo->pseudo_palette[region->color]);
else
OUTREG(DP_BRUSH_FRGD_CLR, region->color);
OUTREG(DP_WRITE_MSK, 0xffffffff);
OUTREG(DP_CNTL, (DST_X_LEFT_TO_RIGHT | DST_Y_TOP_TO_BOTTOM));
radeon_fifo_wait(2);
radeonfb: fix accel engine hangs Some chips appear to have the 2D engine hang during screen redraw, typically in a sequence of copyarea operations. This appear to be solved by adding a flush of the engine destination pixel cache and waiting for the engine to be idle before issuing the accel operation. The performance impact seems to be fairly small. Here is a trace on an RV370 (PCI device ID 0x5b64), it records the RBBM_STATUS register, then the source x/y, destination x/y, and width/height used for the copy: ---------------------------------------- radeonfb_prim_copyarea: STATUS[00000140] src[210:70] dst[210:60] wh[a0:10] radeonfb_prim_copyarea: STATUS[00000140] src[2b8:70] dst[2b8:60] wh[88:10] radeonfb_prim_copyarea: STATUS[00000140] src[348:70] dst[348:60] wh[40:10] radeonfb_prim_copyarea: STATUS[80020140] src[390:70] dst[390:60] wh[88:10] radeonfb_prim_copyarea: STATUS[8002613f] src[40:80] dst[40:70] wh[28:10] radeonfb_prim_copyarea: STATUS[80026139] src[a8:80] dst[a8:70] wh[38:10] radeonfb_prim_copyarea: STATUS[80026133] src[e8:80] dst[e8:70] wh[80:10] radeonfb_prim_copyarea: STATUS[8002612d] src[170:80] dst[170:70] wh[30:10] radeonfb_prim_copyarea: STATUS[80026127] src[1a8:80] dst[1a8:70] wh[8:10] radeonfb_prim_copyarea: STATUS[80026121] src[1b8:80] dst[1b8:70] wh[88:10] radeonfb_prim_copyarea: STATUS[8002611b] src[248:80] dst[248:70] wh[68:10] ---------------------------------------- When things are going fine the copies complete before the next ROP is even issued, but all of a sudden the 2D unit becomes active (bit 17 in RBBM_STATUS) and the FIFO retry (bit 13) and FIFO pipeline busy (bit 14) are set as well. The FIFO begins to backup until it becomes full. What happens next is the radeon_fifo_wait() times out, and we access the chip illegally leading to a bus error which usually wedges the box. None of this makes it to the console screen, of course :-) radeon_fifo_wait() should be modified to reset the accelerator when this timeout happens instead of programming the chip anyways. ---------------------------------------- radeonfb: FIFO Timeout ! ERROR(0): Cheetah error trap taken afsr[0010080005000000] afar[000007f900800e40] TL1(0) ERROR(0): TPC[595114] TNPC[595118] O7[459788] TSTATE[11009601] ERROR(0): TPC<radeonfb_copyarea+0xfc/0x248> ERROR(0): M_SYND(0), E_SYND(0), Privileged ERROR(0): Highest priority error (0000080000000000) "Bus error response from system bus" ERROR(0): D-cache idx[0] tag[0000000000000000] utag[0000000000000000] stag[0000000000000000] ERROR(0): D-cache data0[0000000000000000] data1[0000000000000000] data2[0000000000000000] data3[0000000000000000] ERROR(0): I-cache idx[0] tag[0000000000000000] utag[0000000000000000] stag[0000000000000000] u[0000000000000000] l[00\ ERROR(0): I-cache INSN0[0000000000000000] INSN1[0000000000000000] INSN2[0000000000000000] INSN3[0000000000000000] ERROR(0): I-cache INSN4[0000000000000000] INSN5[0000000000000000] INSN6[0000000000000000] INSN7[0000000000000000] ERROR(0): E-cache idx[800e40] tag[000000000e049f4c] ERROR(0): E-cache data0[fffff8127d300180] data1[00000000004b5384] data2[0000000000000000] data3[0000000000000000] Ker:xnel panic - not syncing: Irrecoverable deferred error trap. ---------------------------------------- Another quirk is that these copyarea calls will not happen until the first drivers/char/vt.c:redraw_screen() occurs. This will only happen if you 1) VC switch or 2) run "consolechars" or 3) unblank the screen. This seems to happen because until a redraw_screen() the screen scrolling method used by fbcon is not finalized yet. I've seen this with other fb drivers too. So if all you do is boot straight into X you will never see this bug on the relevant chips. Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: <stable@kernel.org> [2.6.25.x, 2.6.26.x] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-08-13 05:08:51 +07:00
OUTREG(DSTCACHE_CTLSTAT, RB2D_DC_FLUSH_ALL);
OUTREG(WAIT_UNTIL, (WAIT_2D_IDLECLEAN | WAIT_DMA_GUI_IDLE));
radeon_fifo_wait(2);
OUTREG(DST_Y_X, (region->dy << 16) | region->dx);
OUTREG(DST_WIDTH_HEIGHT, (region->width << 16) | region->height);
}
void radeonfb_fillrect(struct fb_info *info, const struct fb_fillrect *region)
{
struct radeonfb_info *rinfo = info->par;
struct fb_fillrect modded;
int vxres, vyres;
if (info->state != FBINFO_STATE_RUNNING)
return;
if (info->flags & FBINFO_HWACCEL_DISABLED) {
cfb_fillrect(info, region);
return;
}
radeon_fixup_offset(rinfo);
vxres = info->var.xres_virtual;
vyres = info->var.yres_virtual;
memcpy(&modded, region, sizeof(struct fb_fillrect));
if(!modded.width || !modded.height ||
modded.dx >= vxres || modded.dy >= vyres)
return;
if(modded.dx + modded.width > vxres) modded.width = vxres - modded.dx;
if(modded.dy + modded.height > vyres) modded.height = vyres - modded.dy;
radeonfb_prim_fillrect(rinfo, &modded);
}
static void radeonfb_prim_copyarea(struct radeonfb_info *rinfo,
const struct fb_copyarea *area)
{
int xdir, ydir;
u32 sx, sy, dx, dy, w, h;
w = area->width; h = area->height;
dx = area->dx; dy = area->dy;
sx = area->sx; sy = area->sy;
xdir = sx - dx;
ydir = sy - dy;
if ( xdir < 0 ) { sx += w-1; dx += w-1; }
if ( ydir < 0 ) { sy += h-1; dy += h-1; }
radeon_fifo_wait(3);
OUTREG(DP_GUI_MASTER_CNTL,
rinfo->dp_gui_master_cntl /* i.e. GMC_DST_32BPP */
| GMC_BRUSH_NONE
| GMC_SRC_DSTCOLOR
| ROP3_S
| DP_SRC_SOURCE_MEMORY );
OUTREG(DP_WRITE_MSK, 0xffffffff);
OUTREG(DP_CNTL, (xdir>=0 ? DST_X_LEFT_TO_RIGHT : 0)
| (ydir>=0 ? DST_Y_TOP_TO_BOTTOM : 0));
radeon_fifo_wait(2);
radeonfb: fix accel engine hangs Some chips appear to have the 2D engine hang during screen redraw, typically in a sequence of copyarea operations. This appear to be solved by adding a flush of the engine destination pixel cache and waiting for the engine to be idle before issuing the accel operation. The performance impact seems to be fairly small. Here is a trace on an RV370 (PCI device ID 0x5b64), it records the RBBM_STATUS register, then the source x/y, destination x/y, and width/height used for the copy: ---------------------------------------- radeonfb_prim_copyarea: STATUS[00000140] src[210:70] dst[210:60] wh[a0:10] radeonfb_prim_copyarea: STATUS[00000140] src[2b8:70] dst[2b8:60] wh[88:10] radeonfb_prim_copyarea: STATUS[00000140] src[348:70] dst[348:60] wh[40:10] radeonfb_prim_copyarea: STATUS[80020140] src[390:70] dst[390:60] wh[88:10] radeonfb_prim_copyarea: STATUS[8002613f] src[40:80] dst[40:70] wh[28:10] radeonfb_prim_copyarea: STATUS[80026139] src[a8:80] dst[a8:70] wh[38:10] radeonfb_prim_copyarea: STATUS[80026133] src[e8:80] dst[e8:70] wh[80:10] radeonfb_prim_copyarea: STATUS[8002612d] src[170:80] dst[170:70] wh[30:10] radeonfb_prim_copyarea: STATUS[80026127] src[1a8:80] dst[1a8:70] wh[8:10] radeonfb_prim_copyarea: STATUS[80026121] src[1b8:80] dst[1b8:70] wh[88:10] radeonfb_prim_copyarea: STATUS[8002611b] src[248:80] dst[248:70] wh[68:10] ---------------------------------------- When things are going fine the copies complete before the next ROP is even issued, but all of a sudden the 2D unit becomes active (bit 17 in RBBM_STATUS) and the FIFO retry (bit 13) and FIFO pipeline busy (bit 14) are set as well. The FIFO begins to backup until it becomes full. What happens next is the radeon_fifo_wait() times out, and we access the chip illegally leading to a bus error which usually wedges the box. None of this makes it to the console screen, of course :-) radeon_fifo_wait() should be modified to reset the accelerator when this timeout happens instead of programming the chip anyways. ---------------------------------------- radeonfb: FIFO Timeout ! ERROR(0): Cheetah error trap taken afsr[0010080005000000] afar[000007f900800e40] TL1(0) ERROR(0): TPC[595114] TNPC[595118] O7[459788] TSTATE[11009601] ERROR(0): TPC<radeonfb_copyarea+0xfc/0x248> ERROR(0): M_SYND(0), E_SYND(0), Privileged ERROR(0): Highest priority error (0000080000000000) "Bus error response from system bus" ERROR(0): D-cache idx[0] tag[0000000000000000] utag[0000000000000000] stag[0000000000000000] ERROR(0): D-cache data0[0000000000000000] data1[0000000000000000] data2[0000000000000000] data3[0000000000000000] ERROR(0): I-cache idx[0] tag[0000000000000000] utag[0000000000000000] stag[0000000000000000] u[0000000000000000] l[00\ ERROR(0): I-cache INSN0[0000000000000000] INSN1[0000000000000000] INSN2[0000000000000000] INSN3[0000000000000000] ERROR(0): I-cache INSN4[0000000000000000] INSN5[0000000000000000] INSN6[0000000000000000] INSN7[0000000000000000] ERROR(0): E-cache idx[800e40] tag[000000000e049f4c] ERROR(0): E-cache data0[fffff8127d300180] data1[00000000004b5384] data2[0000000000000000] data3[0000000000000000] Ker:xnel panic - not syncing: Irrecoverable deferred error trap. ---------------------------------------- Another quirk is that these copyarea calls will not happen until the first drivers/char/vt.c:redraw_screen() occurs. This will only happen if you 1) VC switch or 2) run "consolechars" or 3) unblank the screen. This seems to happen because until a redraw_screen() the screen scrolling method used by fbcon is not finalized yet. I've seen this with other fb drivers too. So if all you do is boot straight into X you will never see this bug on the relevant chips. Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: <stable@kernel.org> [2.6.25.x, 2.6.26.x] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-08-13 05:08:51 +07:00
OUTREG(DSTCACHE_CTLSTAT, RB2D_DC_FLUSH_ALL);
OUTREG(WAIT_UNTIL, (WAIT_2D_IDLECLEAN | WAIT_DMA_GUI_IDLE));
radeon_fifo_wait(3);
OUTREG(SRC_Y_X, (sy << 16) | sx);
OUTREG(DST_Y_X, (dy << 16) | dx);
OUTREG(DST_HEIGHT_WIDTH, (h << 16) | w);
}
void radeonfb_copyarea(struct fb_info *info, const struct fb_copyarea *area)
{
struct radeonfb_info *rinfo = info->par;
struct fb_copyarea modded;
u32 vxres, vyres;
modded.sx = area->sx;
modded.sy = area->sy;
modded.dx = area->dx;
modded.dy = area->dy;
modded.width = area->width;
modded.height = area->height;
if (info->state != FBINFO_STATE_RUNNING)
return;
if (info->flags & FBINFO_HWACCEL_DISABLED) {
cfb_copyarea(info, area);
return;
}
radeon_fixup_offset(rinfo);
vxres = info->var.xres_virtual;
vyres = info->var.yres_virtual;
if(!modded.width || !modded.height ||
modded.sx >= vxres || modded.sy >= vyres ||
modded.dx >= vxres || modded.dy >= vyres)
return;
if(modded.sx + modded.width > vxres) modded.width = vxres - modded.sx;
if(modded.dx + modded.width > vxres) modded.width = vxres - modded.dx;
if(modded.sy + modded.height > vyres) modded.height = vyres - modded.sy;
if(modded.dy + modded.height > vyres) modded.height = vyres - modded.dy;
radeonfb_prim_copyarea(rinfo, &modded);
}
void radeonfb_imageblit(struct fb_info *info, const struct fb_image *image)
{
struct radeonfb_info *rinfo = info->par;
if (info->state != FBINFO_STATE_RUNNING)
return;
radeon_engine_idle();
cfb_imageblit(info, image);
}
int radeonfb_sync(struct fb_info *info)
{
struct radeonfb_info *rinfo = info->par;
if (info->state != FBINFO_STATE_RUNNING)
return 0;
radeon_engine_idle();
return 0;
}
void radeonfb_engine_reset(struct radeonfb_info *rinfo)
{
u32 clock_cntl_index, mclk_cntl, rbbm_soft_reset;
u32 host_path_cntl;
radeon_engine_flush (rinfo);
clock_cntl_index = INREG(CLOCK_CNTL_INDEX);
mclk_cntl = INPLL(MCLK_CNTL);
OUTPLL(MCLK_CNTL, (mclk_cntl |
FORCEON_MCLKA |
FORCEON_MCLKB |
FORCEON_YCLKA |
FORCEON_YCLKB |
FORCEON_MC |
FORCEON_AIC));
host_path_cntl = INREG(HOST_PATH_CNTL);
rbbm_soft_reset = INREG(RBBM_SOFT_RESET);
if (IS_R300_VARIANT(rinfo)) {
u32 tmp;
OUTREG(RBBM_SOFT_RESET, (rbbm_soft_reset |
SOFT_RESET_CP |
SOFT_RESET_HI |
SOFT_RESET_E2));
INREG(RBBM_SOFT_RESET);
OUTREG(RBBM_SOFT_RESET, 0);
tmp = INREG(RB2D_DSTCACHE_MODE);
OUTREG(RB2D_DSTCACHE_MODE, tmp | (1 << 17)); /* FIXME */
} else {
OUTREG(RBBM_SOFT_RESET, rbbm_soft_reset |
SOFT_RESET_CP |
SOFT_RESET_HI |
SOFT_RESET_SE |
SOFT_RESET_RE |
SOFT_RESET_PP |
SOFT_RESET_E2 |
SOFT_RESET_RB);
INREG(RBBM_SOFT_RESET);
OUTREG(RBBM_SOFT_RESET, rbbm_soft_reset & (u32)
~(SOFT_RESET_CP |
SOFT_RESET_HI |
SOFT_RESET_SE |
SOFT_RESET_RE |
SOFT_RESET_PP |
SOFT_RESET_E2 |
SOFT_RESET_RB));
INREG(RBBM_SOFT_RESET);
}
OUTREG(HOST_PATH_CNTL, host_path_cntl | HDP_SOFT_RESET);
INREG(HOST_PATH_CNTL);
OUTREG(HOST_PATH_CNTL, host_path_cntl);
if (!IS_R300_VARIANT(rinfo))
OUTREG(RBBM_SOFT_RESET, rbbm_soft_reset);
OUTREG(CLOCK_CNTL_INDEX, clock_cntl_index);
OUTPLL(MCLK_CNTL, mclk_cntl);
}
void radeonfb_engine_init (struct radeonfb_info *rinfo)
{
unsigned long temp;
/* disable 3D engine */
OUTREG(RB3D_CNTL, 0);
radeonfb_engine_reset(rinfo);
radeon_fifo_wait (1);
if (IS_R300_VARIANT(rinfo)) {
OUTREG(RB2D_DSTCACHE_MODE, INREG(RB2D_DSTCACHE_MODE) |
RB2D_DC_AUTOFLUSH_ENABLE |
RB2D_DC_DC_DISABLE_IGNORE_PE);
} else {
/* This needs to be double checked with ATI. Latest X driver
* completely "forgets" to set this register on < r3xx, and
* we used to just write 0 there... I'll keep the 0 and update
* that when we have sorted things out on X side.
*/
OUTREG(RB2D_DSTCACHE_MODE, 0);
}
radeon_fifo_wait (3);
/* We re-read MC_FB_LOCATION from card as it can have been
* modified by XFree drivers (ouch !)
*/
rinfo->fb_local_base = INREG(MC_FB_LOCATION) << 16;
OUTREG(DEFAULT_PITCH_OFFSET, (rinfo->pitch << 0x16) |
(rinfo->fb_local_base >> 10));
OUTREG(DST_PITCH_OFFSET, (rinfo->pitch << 0x16) | (rinfo->fb_local_base >> 10));
OUTREG(SRC_PITCH_OFFSET, (rinfo->pitch << 0x16) | (rinfo->fb_local_base >> 10));
radeon_fifo_wait (1);
#if defined(__BIG_ENDIAN)
OUTREGP(DP_DATATYPE, HOST_BIG_ENDIAN_EN, ~HOST_BIG_ENDIAN_EN);
#else
OUTREGP(DP_DATATYPE, 0, ~HOST_BIG_ENDIAN_EN);
#endif
radeon_fifo_wait (2);
OUTREG(DEFAULT_SC_TOP_LEFT, 0);
OUTREG(DEFAULT_SC_BOTTOM_RIGHT, (DEFAULT_SC_RIGHT_MAX |
DEFAULT_SC_BOTTOM_MAX));
temp = radeon_get_dstbpp(rinfo->depth);
rinfo->dp_gui_master_cntl = ((temp << 8) | GMC_CLR_CMP_CNTL_DIS);
radeon_fifo_wait (1);
OUTREG(DP_GUI_MASTER_CNTL, (rinfo->dp_gui_master_cntl |
GMC_BRUSH_SOLID_COLOR |
GMC_SRC_DATATYPE_COLOR));
radeon_fifo_wait (7);
/* clear line drawing regs */
OUTREG(DST_LINE_START, 0);
OUTREG(DST_LINE_END, 0);
/* set brush color regs */
OUTREG(DP_BRUSH_FRGD_CLR, 0xffffffff);
OUTREG(DP_BRUSH_BKGD_CLR, 0x00000000);
/* set source color regs */
OUTREG(DP_SRC_FRGD_CLR, 0xffffffff);
OUTREG(DP_SRC_BKGD_CLR, 0x00000000);
/* default write mask */
OUTREG(DP_WRITE_MSK, 0xffffffff);
radeon_engine_idle ();
}