mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-28 11:18:45 +07:00
drm/amdgpu: enable ras on sdma4
register IH, enable ras features on sdma. create sysfs debugfs file for sdma. Signed-off-by: xinhui pan <xinhui.pan@amd.com> Signed-off-by: Feifei Xu <Feifei.Xu@amd.com> Signed-off-by: Eric Huang <JinhuiEric.Huang@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
2be4c4a9d4
commit
8cf12507d3
@ -30,6 +30,8 @@
|
||||
enum amdgpu_sdma_irq {
|
||||
AMDGPU_SDMA_IRQ_TRAP0 = 0,
|
||||
AMDGPU_SDMA_IRQ_TRAP1,
|
||||
AMDGPU_SDMA_IRQ_ECC0,
|
||||
AMDGPU_SDMA_IRQ_ECC1,
|
||||
|
||||
AMDGPU_SDMA_IRQ_LAST
|
||||
};
|
||||
@ -49,9 +51,11 @@ struct amdgpu_sdma {
|
||||
struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES];
|
||||
struct amdgpu_irq_src trap_irq;
|
||||
struct amdgpu_irq_src illegal_inst_irq;
|
||||
struct amdgpu_irq_src ecc_irq;
|
||||
int num_instances;
|
||||
uint32_t srbm_soft_reset;
|
||||
bool has_page_queue;
|
||||
struct ras_common_if *ras_if;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -41,6 +41,8 @@
|
||||
#include "ivsrcid/sdma0/irqsrcs_sdma0_4_0.h"
|
||||
#include "ivsrcid/sdma1/irqsrcs_sdma1_4_0.h"
|
||||
|
||||
#include "amdgpu_ras.h"
|
||||
|
||||
MODULE_FIRMWARE("amdgpu/vega10_sdma.bin");
|
||||
MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin");
|
||||
MODULE_FIRMWARE("amdgpu/vega12_sdma.bin");
|
||||
@ -1493,6 +1495,83 @@ static int sdma_v4_0_early_init(void *handle)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev,
|
||||
struct amdgpu_iv_entry *entry);
|
||||
|
||||
static int sdma_v4_0_late_init(void *handle)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
struct ras_common_if **ras_if = &adev->sdma.ras_if;
|
||||
struct ras_ih_if ih_info = {
|
||||
.cb = sdma_v4_0_process_ras_data_cb,
|
||||
};
|
||||
struct ras_fs_if fs_info = {
|
||||
.sysfs_name = "sdma_err_count",
|
||||
.debugfs_name = "sdma_err_inject",
|
||||
};
|
||||
struct ras_common_if ras_block = {
|
||||
.block = AMDGPU_RAS_BLOCK__SDMA,
|
||||
.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
|
||||
.sub_block_index = 0,
|
||||
.name = "sdma",
|
||||
};
|
||||
int r;
|
||||
|
||||
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
|
||||
amdgpu_ras_feature_enable(adev, &ras_block, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
|
||||
if (!*ras_if)
|
||||
return -ENOMEM;
|
||||
|
||||
**ras_if = ras_block;
|
||||
|
||||
r = amdgpu_ras_feature_enable(adev, *ras_if, 1);
|
||||
if (r)
|
||||
goto feature;
|
||||
|
||||
ih_info.head = **ras_if;
|
||||
fs_info.head = **ras_if;
|
||||
|
||||
r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
|
||||
if (r)
|
||||
goto interrupt;
|
||||
|
||||
r = amdgpu_ras_debugfs_create(adev, &fs_info);
|
||||
if (r)
|
||||
goto debugfs;
|
||||
|
||||
r = amdgpu_ras_sysfs_create(adev, &fs_info);
|
||||
if (r)
|
||||
goto sysfs;
|
||||
|
||||
r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_ECC0);
|
||||
if (r)
|
||||
goto irq;
|
||||
|
||||
r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_ECC1);
|
||||
if (r) {
|
||||
amdgpu_irq_put(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_ECC0);
|
||||
goto irq;
|
||||
}
|
||||
|
||||
return 0;
|
||||
irq:
|
||||
amdgpu_ras_sysfs_remove(adev, *ras_if);
|
||||
sysfs:
|
||||
amdgpu_ras_debugfs_remove(adev, *ras_if);
|
||||
debugfs:
|
||||
amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
|
||||
interrupt:
|
||||
amdgpu_ras_feature_enable(adev, *ras_if, 0);
|
||||
feature:
|
||||
kfree(*ras_if);
|
||||
*ras_if = NULL;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int sdma_v4_0_sw_init(void *handle)
|
||||
{
|
||||
struct amdgpu_ring *ring;
|
||||
@ -1511,6 +1590,18 @@ static int sdma_v4_0_sw_init(void *handle)
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/* SDMA SRAM ECC event */
|
||||
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, SDMA0_4_0__SRCID__SDMA_SRAM_ECC,
|
||||
&adev->sdma.ecc_irq);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/* SDMA SRAM ECC event */
|
||||
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, SDMA1_4_0__SRCID__SDMA_SRAM_ECC,
|
||||
&adev->sdma.ecc_irq);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
for (i = 0; i < adev->sdma.num_instances; i++) {
|
||||
ring = &adev->sdma.instance[i].ring;
|
||||
ring->ring_obj = NULL;
|
||||
@ -1561,6 +1652,22 @@ static int sdma_v4_0_sw_fini(void *handle)
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
int i;
|
||||
|
||||
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA) &&
|
||||
adev->sdma.ras_if) {
|
||||
struct ras_common_if *ras_if = adev->sdma.ras_if;
|
||||
struct ras_ih_if ih_info = {
|
||||
.head = *ras_if,
|
||||
};
|
||||
|
||||
/*remove fs first*/
|
||||
amdgpu_ras_debugfs_remove(adev, ras_if);
|
||||
amdgpu_ras_sysfs_remove(adev, ras_if);
|
||||
/*remove the IH*/
|
||||
amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
|
||||
amdgpu_ras_feature_enable(adev, ras_if, 0);
|
||||
kfree(ras_if);
|
||||
}
|
||||
|
||||
for (i = 0; i < adev->sdma.num_instances; i++) {
|
||||
amdgpu_ring_fini(&adev->sdma.instance[i].ring);
|
||||
if (adev->sdma.has_page_queue)
|
||||
@ -1598,6 +1705,9 @@ static int sdma_v4_0_hw_fini(void *handle)
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
return 0;
|
||||
|
||||
amdgpu_irq_put(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_ECC0);
|
||||
amdgpu_irq_put(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_ECC1);
|
||||
|
||||
sdma_v4_0_ctx_switch_enable(adev, false);
|
||||
sdma_v4_0_enable(adev, false);
|
||||
|
||||
@ -1714,6 +1824,50 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
uint32_t instance, err_source;
|
||||
|
||||
switch (entry->client_id) {
|
||||
case SOC15_IH_CLIENTID_SDMA0:
|
||||
instance = 0;
|
||||
break;
|
||||
case SOC15_IH_CLIENTID_SDMA1:
|
||||
instance = 1;
|
||||
break;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
||||
switch (entry->src_id) {
|
||||
case SDMA0_4_0__SRCID__SDMA_SRAM_ECC:
|
||||
err_source = 0;
|
||||
break;
|
||||
case SDMA0_4_0__SRCID__SDMA_ECC:
|
||||
err_source = 1;
|
||||
break;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
||||
amdgpu_ras_reset_gpu(adev, 0);
|
||||
|
||||
return AMDGPU_RAS_UE;
|
||||
}
|
||||
|
||||
static int sdma_v4_0_process_ecc_irq(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
struct ras_dispatch_if ih_data = {
|
||||
.head = *adev->sdma.ras_if,
|
||||
.entry = entry,
|
||||
};
|
||||
amdgpu_ras_interrupt_dispatch(adev, &ih_data);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
@ -1741,6 +1895,25 @@ static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sdma_v4_0_set_ecc_irq_state(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
unsigned type,
|
||||
enum amdgpu_interrupt_state state)
|
||||
{
|
||||
u32 sdma_edc_config;
|
||||
|
||||
u32 reg_offset = (type == AMDGPU_SDMA_IRQ_ECC0) ?
|
||||
sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_EDC_CONFIG) :
|
||||
sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_EDC_CONFIG);
|
||||
|
||||
sdma_edc_config = RREG32(reg_offset);
|
||||
sdma_edc_config = REG_SET_FIELD(sdma_edc_config, SDMA0_EDC_CONFIG, ECC_INT_ENABLE,
|
||||
state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
|
||||
WREG32(reg_offset, sdma_edc_config);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void sdma_v4_0_update_medium_grain_clock_gating(
|
||||
struct amdgpu_device *adev,
|
||||
bool enable)
|
||||
@ -1906,7 +2079,7 @@ static void sdma_v4_0_get_clockgating_state(void *handle, u32 *flags)
|
||||
const struct amd_ip_funcs sdma_v4_0_ip_funcs = {
|
||||
.name = "sdma_v4_0",
|
||||
.early_init = sdma_v4_0_early_init,
|
||||
.late_init = NULL,
|
||||
.late_init = sdma_v4_0_late_init,
|
||||
.sw_init = sdma_v4_0_sw_init,
|
||||
.sw_fini = sdma_v4_0_sw_fini,
|
||||
.hw_init = sdma_v4_0_hw_init,
|
||||
@ -2008,11 +2181,20 @@ static const struct amdgpu_irq_src_funcs sdma_v4_0_illegal_inst_irq_funcs = {
|
||||
.process = sdma_v4_0_process_illegal_inst_irq,
|
||||
};
|
||||
|
||||
static const struct amdgpu_irq_src_funcs sdma_v4_0_ecc_irq_funcs = {
|
||||
.set = sdma_v4_0_set_ecc_irq_state,
|
||||
.process = sdma_v4_0_process_ecc_irq,
|
||||
};
|
||||
|
||||
|
||||
|
||||
static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev)
|
||||
{
|
||||
adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
|
||||
adev->sdma.trap_irq.funcs = &sdma_v4_0_trap_irq_funcs;
|
||||
adev->sdma.illegal_inst_irq.funcs = &sdma_v4_0_illegal_inst_irq_funcs;
|
||||
adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
|
||||
adev->sdma.ecc_irq.funcs = &sdma_v4_0_ecc_irq_funcs;
|
||||
}
|
||||
|
||||
/**
|
||||
|
Loading…
Reference in New Issue
Block a user