mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-23 00:06:51 +07:00
0411de8548
The ability to use NVIDIA's fuc has been retained *temporarily* in order to better debug any issues that may be lingering in our initial attempt at writing this ucode. Once I'm fairly confident we're okay, it'll be removed. There's a number of things not implemented by this fuc currently, but most of it is sets of state that our context setup would not have used anyway. No doubt we'll find out what they're for at some point, and implement it if required. This has been tested on 0xc0/0xc4 thus far, and from what I could tell it worked as well as NVIDIA's. It's also been tested on 0xc1, but even with NVIDIA's fuc that chipset doesn't work correctly with nouveau yet. 0xc3/0xc8/0xce should in theory be supported too, but I don't have the hardware to check that. There's no doubt numerous bugs to squash yet, please report any! Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
401 lines
8.5 KiB
Plaintext
401 lines
8.5 KiB
Plaintext
/* fuc microcode util functions for nvc0 PGRAPH
|
|
*
|
|
* Copyright 2011 Red Hat Inc.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
* OTHER DEALINGS IN THE SOFTWARE.
|
|
*
|
|
* Authors: Ben Skeggs
|
|
*/
|
|
|
|
define(`mmctx_data', `.b32 eval((($2 - 1) << 26) | $1)')
|
|
define(`queue_init', `.skip eval((2 * 4) + ((8 * 4) * 2))')
|
|
|
|
ifdef(`include_code', `
|
|
// Error codes
|
|
define(`E_BAD_COMMAND', 0x01)
|
|
define(`E_CMD_OVERFLOW', 0x02)
|
|
|
|
// Util macros to help with debugging ucode hangs etc
|
|
define(`T_WAIT', 0)
|
|
define(`T_MMCTX', 1)
|
|
define(`T_STRWAIT', 2)
|
|
define(`T_STRINIT', 3)
|
|
define(`T_AUTO', 4)
|
|
define(`T_CHAN', 5)
|
|
define(`T_LOAD', 6)
|
|
define(`T_SAVE', 7)
|
|
define(`T_LCHAN', 8)
|
|
define(`T_LCTXH', 9)
|
|
|
|
define(`trace_set', `
|
|
mov $r8 0x83c
|
|
shl b32 $r8 6
|
|
clear b32 $r9
|
|
bset $r9 $1
|
|
iowr I[$r8 + 0x000] $r9 // CC_SCRATCH[7]
|
|
')
|
|
|
|
define(`trace_clr', `
|
|
mov $r8 0x85c
|
|
shl b32 $r8 6
|
|
clear b32 $r9
|
|
bset $r9 $1
|
|
iowr I[$r8 + 0x000] $r9 // CC_SCRATCH[7]
|
|
')
|
|
|
|
// queue_put - add request to queue
|
|
//
|
|
// In : $r13 queue pointer
|
|
// $r14 command
|
|
// $r15 data
|
|
//
|
|
queue_put:
|
|
// make sure we have space..
|
|
ld b32 $r8 D[$r13 + 0x0] // GET
|
|
ld b32 $r9 D[$r13 + 0x4] // PUT
|
|
xor $r8 8
|
|
cmpu b32 $r8 $r9
|
|
bra ne queue_put_next
|
|
mov $r15 E_CMD_OVERFLOW
|
|
call error
|
|
ret
|
|
|
|
// store cmd/data on queue
|
|
queue_put_next:
|
|
and $r8 $r9 7
|
|
shl b32 $r8 3
|
|
add b32 $r8 $r13
|
|
add b32 $r8 8
|
|
st b32 D[$r8 + 0x0] $r14
|
|
st b32 D[$r8 + 0x4] $r15
|
|
|
|
// update PUT
|
|
add b32 $r9 1
|
|
and $r9 0xf
|
|
st b32 D[$r13 + 0x4] $r9
|
|
ret
|
|
|
|
// queue_get - fetch request from queue
|
|
//
|
|
// In : $r13 queue pointer
|
|
//
|
|
// Out: $p1 clear on success (data available)
|
|
// $r14 command
|
|
// $r15 data
|
|
//
|
|
queue_get:
|
|
bset $flags $p1
|
|
ld b32 $r8 D[$r13 + 0x0] // GET
|
|
ld b32 $r9 D[$r13 + 0x4] // PUT
|
|
cmpu b32 $r8 $r9
|
|
bra e queue_get_done
|
|
// fetch first cmd/data pair
|
|
and $r9 $r8 7
|
|
shl b32 $r9 3
|
|
add b32 $r9 $r13
|
|
add b32 $r9 8
|
|
ld b32 $r14 D[$r9 + 0x0]
|
|
ld b32 $r15 D[$r9 + 0x4]
|
|
|
|
// update GET
|
|
add b32 $r8 1
|
|
and $r8 0xf
|
|
st b32 D[$r13 + 0x0] $r8
|
|
bclr $flags $p1
|
|
queue_get_done:
|
|
ret
|
|
|
|
// nv_rd32 - read 32-bit value from nv register
|
|
//
|
|
// In : $r14 register
|
|
// Out: $r15 value
|
|
//
|
|
nv_rd32:
|
|
mov $r11 0x728
|
|
shl b32 $r11 6
|
|
mov b32 $r12 $r14
|
|
bset $r12 31 // MMIO_CTRL_PENDING
|
|
iowr I[$r11 + 0x000] $r12 // MMIO_CTRL
|
|
nv_rd32_wait:
|
|
iord $r12 I[$r11 + 0x000]
|
|
xbit $r12 $r12 31
|
|
bra ne nv_rd32_wait
|
|
mov $r10 6 // DONE_MMIO_RD
|
|
call wait_doneo
|
|
iord $r15 I[$r11 + 0x100] // MMIO_RDVAL
|
|
ret
|
|
|
|
// nv_wr32 - write 32-bit value to nv register
|
|
//
|
|
// In : $r14 register
|
|
// $r15 value
|
|
//
|
|
nv_wr32:
|
|
mov $r11 0x728
|
|
shl b32 $r11 6
|
|
iowr I[$r11 + 0x200] $r15 // MMIO_WRVAL
|
|
mov b32 $r12 $r14
|
|
bset $r12 31 // MMIO_CTRL_PENDING
|
|
bset $r12 30 // MMIO_CTRL_WRITE
|
|
iowr I[$r11 + 0x000] $r12 // MMIO_CTRL
|
|
nv_wr32_wait:
|
|
iord $r12 I[$r11 + 0x000]
|
|
xbit $r12 $r12 31
|
|
bra ne nv_wr32_wait
|
|
ret
|
|
|
|
// (re)set watchdog timer
|
|
//
|
|
// In : $r15 timeout
|
|
//
|
|
watchdog_reset:
|
|
mov $r8 0x430
|
|
shl b32 $r8 6
|
|
bset $r15 31
|
|
iowr I[$r8 + 0x000] $r15
|
|
ret
|
|
|
|
// clear watchdog timer
|
|
watchdog_clear:
|
|
mov $r8 0x430
|
|
shl b32 $r8 6
|
|
iowr I[$r8 + 0x000] $r0
|
|
ret
|
|
|
|
// wait_done{z,o} - wait on FUC_DONE bit to become clear/set
|
|
//
|
|
// In : $r10 bit to wait on
|
|
//
|
|
define(`wait_done', `
|
|
$1:
|
|
trace_set(T_WAIT);
|
|
mov $r8 0x818
|
|
shl b32 $r8 6
|
|
iowr I[$r8 + 0x000] $r10 // CC_SCRATCH[6] = wait bit
|
|
wait_done_$1:
|
|
mov $r8 0x400
|
|
shl b32 $r8 6
|
|
iord $r8 I[$r8 + 0x000] // DONE
|
|
xbit $r8 $r8 $r10
|
|
bra $2 wait_done_$1
|
|
trace_clr(T_WAIT)
|
|
ret
|
|
')
|
|
wait_done(wait_donez, ne)
|
|
wait_done(wait_doneo, e)
|
|
|
|
// mmctx_size - determine size of a mmio list transfer
|
|
//
|
|
// In : $r14 mmio list head
|
|
// $r15 mmio list tail
|
|
// Out: $r15 transfer size (in bytes)
|
|
//
|
|
mmctx_size:
|
|
clear b32 $r9
|
|
nv_mmctx_size_loop:
|
|
ld b32 $r8 D[$r14]
|
|
shr b32 $r8 26
|
|
add b32 $r8 1
|
|
shl b32 $r8 2
|
|
add b32 $r9 $r8
|
|
add b32 $r14 4
|
|
cmpu b32 $r14 $r15
|
|
bra ne nv_mmctx_size_loop
|
|
mov b32 $r15 $r9
|
|
ret
|
|
|
|
// mmctx_xfer - execute a list of mmio transfers
|
|
//
|
|
// In : $r10 flags
|
|
// bit 0: direction (0 = save, 1 = load)
|
|
// bit 1: set if first transfer
|
|
// bit 2: set if last transfer
|
|
// $r11 base
|
|
// $r12 mmio list head
|
|
// $r13 mmio list tail
|
|
// $r14 multi_stride
|
|
// $r15 multi_mask
|
|
//
|
|
mmctx_xfer:
|
|
trace_set(T_MMCTX)
|
|
mov $r8 0x710
|
|
shl b32 $r8 6
|
|
clear b32 $r9
|
|
or $r11 $r11
|
|
bra e mmctx_base_disabled
|
|
iowr I[$r8 + 0x000] $r11 // MMCTX_BASE
|
|
bset $r9 0 // BASE_EN
|
|
mmctx_base_disabled:
|
|
or $r14 $r14
|
|
bra e mmctx_multi_disabled
|
|
iowr I[$r8 + 0x200] $r14 // MMCTX_MULTI_STRIDE
|
|
iowr I[$r8 + 0x300] $r15 // MMCTX_MULTI_MASK
|
|
bset $r9 1 // MULTI_EN
|
|
mmctx_multi_disabled:
|
|
add b32 $r8 0x100
|
|
|
|
xbit $r11 $r10 0
|
|
shl b32 $r11 16 // DIR
|
|
bset $r11 12 // QLIMIT = 0x10
|
|
xbit $r14 $r10 1
|
|
shl b32 $r14 17
|
|
or $r11 $r14 // START_TRIGGER
|
|
iowr I[$r8 + 0x000] $r11 // MMCTX_CTRL
|
|
|
|
// loop over the mmio list, and send requests to the hw
|
|
mmctx_exec_loop:
|
|
// wait for space in mmctx queue
|
|
mmctx_wait_free:
|
|
iord $r14 I[$r8 + 0x000] // MMCTX_CTRL
|
|
and $r14 0x1f
|
|
bra e mmctx_wait_free
|
|
|
|
// queue up an entry
|
|
ld b32 $r14 D[$r12]
|
|
or $r14 $r9
|
|
iowr I[$r8 + 0x300] $r14
|
|
add b32 $r12 4
|
|
cmpu b32 $r12 $r13
|
|
bra ne mmctx_exec_loop
|
|
|
|
xbit $r11 $r10 2
|
|
bra ne mmctx_stop
|
|
// wait for queue to empty
|
|
mmctx_fini_wait:
|
|
iord $r11 I[$r8 + 0x000] // MMCTX_CTRL
|
|
and $r11 0x1f
|
|
cmpu b32 $r11 0x10
|
|
bra ne mmctx_fini_wait
|
|
mov $r10 2 // DONE_MMCTX
|
|
call wait_donez
|
|
bra mmctx_done
|
|
mmctx_stop:
|
|
xbit $r11 $r10 0
|
|
shl b32 $r11 16 // DIR
|
|
bset $r11 12 // QLIMIT = 0x10
|
|
bset $r11 18 // STOP_TRIGGER
|
|
iowr I[$r8 + 0x000] $r11 // MMCTX_CTRL
|
|
mmctx_stop_wait:
|
|
// wait for STOP_TRIGGER to clear
|
|
iord $r11 I[$r8 + 0x000] // MMCTX_CTRL
|
|
xbit $r11 $r11 18
|
|
bra ne mmctx_stop_wait
|
|
mmctx_done:
|
|
trace_clr(T_MMCTX)
|
|
ret
|
|
|
|
// Wait for DONE_STRAND
|
|
//
|
|
strand_wait:
|
|
push $r10
|
|
mov $r10 2
|
|
call wait_donez
|
|
pop $r10
|
|
ret
|
|
|
|
// unknown - call before issuing strand commands
|
|
//
|
|
strand_pre:
|
|
mov $r8 0x4afc
|
|
sethi $r8 0x20000
|
|
mov $r9 0xc
|
|
iowr I[$r8] $r9
|
|
call strand_wait
|
|
ret
|
|
|
|
// unknown - call after issuing strand commands
|
|
//
|
|
strand_post:
|
|
mov $r8 0x4afc
|
|
sethi $r8 0x20000
|
|
mov $r9 0xd
|
|
iowr I[$r8] $r9
|
|
call strand_wait
|
|
ret
|
|
|
|
// Selects strand set?!
|
|
//
|
|
// In: $r14 id
|
|
//
|
|
strand_set:
|
|
mov $r10 0x4ffc
|
|
sethi $r10 0x20000
|
|
sub b32 $r11 $r10 0x500
|
|
mov $r12 0xf
|
|
iowr I[$r10 + 0x000] $r12 // 0x93c = 0xf
|
|
mov $r12 0xb
|
|
iowr I[$r11 + 0x000] $r12 // 0x928 = 0xb
|
|
call strand_wait
|
|
iowr I[$r10 + 0x000] $r14 // 0x93c = <id>
|
|
mov $r12 0xa
|
|
iowr I[$r11 + 0x000] $r12 // 0x928 = 0xa
|
|
call strand_wait
|
|
ret
|
|
|
|
// Initialise strand context data
|
|
//
|
|
// In : $r15 context base
|
|
// Out: $r15 context size (in bytes)
|
|
//
|
|
// Strandset(?) 3 hardcoded currently
|
|
//
|
|
strand_ctx_init:
|
|
trace_set(T_STRINIT)
|
|
call strand_pre
|
|
mov $r14 3
|
|
call strand_set
|
|
mov $r10 0x46fc
|
|
sethi $r10 0x20000
|
|
add b32 $r11 $r10 0x400
|
|
iowr I[$r10 + 0x100] $r0 // STRAND_FIRST_GENE = 0
|
|
mov $r12 1
|
|
iowr I[$r11 + 0x000] $r12 // STRAND_CMD = LATCH_FIRST_GENE
|
|
call strand_wait
|
|
sub b32 $r12 $r0 1
|
|
iowr I[$r10 + 0x000] $r12 // STRAND_GENE_CNT = 0xffffffff
|
|
mov $r12 2
|
|
iowr I[$r11 + 0x000] $r12 // STRAND_CMD = LATCH_GENE_CNT
|
|
call strand_wait
|
|
call strand_post
|
|
|
|
// read the size of each strand, poke the context offset of
|
|
// each into STRAND_{SAVE,LOAD}_SWBASE now, no need to worry
|
|
// about it later then.
|
|
mov $r8 0x880
|
|
shl b32 $r8 6
|
|
iord $r9 I[$r8 + 0x000] // STRANDS
|
|
add b32 $r8 0x2200
|
|
shr b32 $r14 $r15 8
|
|
ctx_init_strand_loop:
|
|
iowr I[$r8 + 0x000] $r14 // STRAND_SAVE_SWBASE
|
|
iowr I[$r8 + 0x100] $r14 // STRAND_LOAD_SWBASE
|
|
iord $r10 I[$r8 + 0x200] // STRAND_SIZE
|
|
shr b32 $r10 6
|
|
add b32 $r10 1
|
|
add b32 $r14 $r10
|
|
add b32 $r8 4
|
|
sub b32 $r9 1
|
|
bra ne ctx_init_strand_loop
|
|
|
|
shl b32 $r14 8
|
|
sub b32 $r15 $r14 $r15
|
|
trace_clr(T_STRINIT)
|
|
ret
|
|
')
|