mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-28 11:18:45 +07:00
drm/amdgpu: Redo XGMI reset synchronization.
Use task barrier in XGMI hive to synchronize ASIC resets across devices in XGMI hive. v2: Return right away with a warning if no xgmi hive, update doc. Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> Reviewed-by: Le Ma <Le.Ma@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
f33a8770cd
commit
c6a6e2db99
@ -66,6 +66,7 @@
|
||||
#include "amdgpu_pmu.h"
|
||||
|
||||
#include <linux/suspend.h>
|
||||
#include <drm/task_barrier.h>
|
||||
|
||||
MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
|
||||
MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
|
||||
@ -2664,14 +2665,38 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
|
||||
{
|
||||
struct amdgpu_device *adev =
|
||||
container_of(__work, struct amdgpu_device, xgmi_reset_work);
|
||||
struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
|
||||
|
||||
if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)
|
||||
adev->asic_reset_res = (adev->in_baco == false) ?
|
||||
amdgpu_device_baco_enter(adev->ddev) :
|
||||
amdgpu_device_baco_exit(adev->ddev);
|
||||
else
|
||||
adev->asic_reset_res = amdgpu_asic_reset(adev);
|
||||
/* It's a bug to not have a hive within this function */
|
||||
if (WARN_ON(!hive))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Use task barrier to synchronize all xgmi reset works across the
|
||||
* hive. task_barrier_enter and task_barrier_exit will block
|
||||
* until all the threads running the xgmi reset works reach
|
||||
* those points. task_barrier_full will do both blocks.
|
||||
*/
|
||||
if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
|
||||
|
||||
task_barrier_enter(&hive->tb);
|
||||
adev->asic_reset_res = amdgpu_device_baco_enter(adev->ddev);
|
||||
|
||||
if (adev->asic_reset_res)
|
||||
goto fail;
|
||||
|
||||
task_barrier_exit(&hive->tb);
|
||||
adev->asic_reset_res = amdgpu_device_baco_exit(adev->ddev);
|
||||
|
||||
if (adev->asic_reset_res)
|
||||
goto fail;
|
||||
} else {
|
||||
|
||||
task_barrier_full(&hive->tb);
|
||||
adev->asic_reset_res = amdgpu_asic_reset(adev);
|
||||
}
|
||||
|
||||
fail:
|
||||
if (adev->asic_reset_res)
|
||||
DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
|
||||
adev->asic_reset_res, adev->ddev->unique);
|
||||
|
Loading…
Reference in New Issue
Block a user