mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-21 16:36:47 +07:00
Merge tag 'drm-amdkfd-next-2018-07-28' of git://people.freedesktop.org/~gabbayo/linux into drm-next
This is amdkfd pull for 4.19. The major changes are: - Add Raven support. Raven refers to Ryzen APUs with integrated GFXv9 GPU. - Integrate GPU reset support In addition, there are a couple of small fixes and improvements, such as: - Better handling and reporting to user of VM faults - Fix race upon context restore - Allow the user to use specific Compute Units - Basic power management Signed-off-by: Dave Airlie <airlied@redhat.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180728122306.GA5235@ogabbay-vm
This commit is contained in:
commit
ecd7963f7c
@ -243,6 +243,33 @@ int amdgpu_amdkfd_resume(struct amdgpu_device *adev)
|
||||
return r;
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev)
|
||||
{
|
||||
int r = 0;
|
||||
|
||||
if (adev->kfd)
|
||||
r = kgd2kfd->pre_reset(adev->kfd);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
|
||||
{
|
||||
int r = 0;
|
||||
|
||||
if (adev->kfd)
|
||||
r = kgd2kfd->post_reset(adev->kfd);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||
|
||||
amdgpu_device_gpu_recover(adev, NULL, false);
|
||||
}
|
||||
|
||||
int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
|
||||
void **mem_obj, uint64_t *gpu_addr,
|
||||
void **cpu_ptr)
|
||||
@ -461,6 +488,14 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
|
||||
return ret;
|
||||
}
|
||||
|
||||
void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||
|
||||
amdgpu_dpm_switch_power_profile(adev,
|
||||
PP_SMC_POWER_PROFILE_COMPUTE, !idle);
|
||||
}
|
||||
|
||||
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
|
||||
{
|
||||
if (adev->kfd) {
|
||||
|
@ -119,6 +119,7 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm);
|
||||
int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
|
||||
uint32_t vmid, uint64_t gpu_addr,
|
||||
uint32_t *ib_cmd, uint32_t ib_len);
|
||||
void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle);
|
||||
|
||||
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
|
||||
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
|
||||
@ -126,6 +127,12 @@ struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void);
|
||||
|
||||
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
|
||||
|
||||
int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev);
|
||||
|
||||
int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev);
|
||||
|
||||
void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd);
|
||||
|
||||
/* Shared API */
|
||||
int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
|
||||
void **mem_obj, uint64_t *gpu_addr,
|
||||
@ -183,6 +190,9 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
|
||||
int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
|
||||
struct dma_fence **ef);
|
||||
|
||||
int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
|
||||
struct kfd_vm_fault_info *info);
|
||||
|
||||
void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
|
||||
void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo);
|
||||
|
||||
|
@ -145,6 +145,7 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
|
||||
uint32_t page_table_base);
|
||||
static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
|
||||
static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
|
||||
static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd);
|
||||
|
||||
/* Because of REG_GET_FIELD() being used, we put this function in the
|
||||
* asic specific file.
|
||||
@ -216,6 +217,10 @@ static const struct kfd2kgd_calls kfd2kgd = {
|
||||
.invalidate_tlbs = invalidate_tlbs,
|
||||
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
|
||||
.submit_ib = amdgpu_amdkfd_submit_ib,
|
||||
.get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info,
|
||||
.read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg,
|
||||
.gpu_recover = amdgpu_amdkfd_gpu_reset,
|
||||
.set_compute_idle = amdgpu_amdkfd_set_compute_idle
|
||||
};
|
||||
|
||||
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
|
||||
@ -571,6 +576,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
|
||||
unsigned long flags, end_jiffies;
|
||||
int retry;
|
||||
|
||||
if (adev->in_gpu_reset)
|
||||
return -EIO;
|
||||
|
||||
acquire_queue(kgd, pipe_id, queue_id);
|
||||
WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
|
||||
|
||||
@ -882,6 +890,9 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
|
||||
int vmid;
|
||||
unsigned int tmp;
|
||||
|
||||
if (adev->in_gpu_reset)
|
||||
return -EIO;
|
||||
|
||||
for (vmid = 0; vmid < 16; vmid++) {
|
||||
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
|
||||
continue;
|
||||
@ -911,3 +922,19 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
|
||||
RREG32(mmVM_INVALIDATE_RESPONSE);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* read_vmid_from_vmfault_reg - read vmid from register
|
||||
*
|
||||
* adev: amdgpu_device pointer
|
||||
* @vmid: vmid pointer
|
||||
* read vmid from register (CIK).
|
||||
*/
|
||||
static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd)
|
||||
{
|
||||
struct amdgpu_device *adev = get_amdgpu_device(kgd);
|
||||
|
||||
uint32_t status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
|
||||
|
||||
return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
|
||||
}
|
||||
|
@ -176,6 +176,9 @@ static const struct kfd2kgd_calls kfd2kgd = {
|
||||
.invalidate_tlbs = invalidate_tlbs,
|
||||
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
|
||||
.submit_ib = amdgpu_amdkfd_submit_ib,
|
||||
.get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info,
|
||||
.gpu_recover = amdgpu_amdkfd_gpu_reset,
|
||||
.set_compute_idle = amdgpu_amdkfd_set_compute_idle
|
||||
};
|
||||
|
||||
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
|
||||
@ -568,6 +571,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
|
||||
int retry;
|
||||
struct vi_mqd *m = get_mqd(mqd);
|
||||
|
||||
if (adev->in_gpu_reset)
|
||||
return -EIO;
|
||||
|
||||
acquire_queue(kgd, pipe_id, queue_id);
|
||||
|
||||
if (m->cp_hqd_vmid == 0)
|
||||
@ -844,6 +850,9 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
|
||||
int vmid;
|
||||
unsigned int tmp;
|
||||
|
||||
if (adev->in_gpu_reset)
|
||||
return -EIO;
|
||||
|
||||
for (vmid = 0; vmid < 16; vmid++) {
|
||||
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
|
||||
continue;
|
||||
|
@ -213,6 +213,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
|
||||
.invalidate_tlbs = invalidate_tlbs,
|
||||
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
|
||||
.submit_ib = amdgpu_amdkfd_submit_ib,
|
||||
.gpu_recover = amdgpu_amdkfd_gpu_reset,
|
||||
.set_compute_idle = amdgpu_amdkfd_set_compute_idle
|
||||
};
|
||||
|
||||
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
|
||||
@ -679,6 +681,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
|
||||
uint32_t temp;
|
||||
struct v9_mqd *m = get_mqd(mqd);
|
||||
|
||||
if (adev->in_gpu_reset)
|
||||
return -EIO;
|
||||
|
||||
acquire_queue(kgd, pipe_id, queue_id);
|
||||
|
||||
if (m->cp_hqd_vmid == 0)
|
||||
@ -866,6 +871,9 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
|
||||
int vmid;
|
||||
struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
|
||||
|
||||
if (adev->in_gpu_reset)
|
||||
return -EIO;
|
||||
|
||||
if (ring->ready)
|
||||
return invalidate_tlbs_with_kiq(adev, pasid);
|
||||
|
||||
|
@ -1621,6 +1621,20 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
|
||||
return ret;
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
|
||||
struct kfd_vm_fault_info *mem)
|
||||
{
|
||||
struct amdgpu_device *adev;
|
||||
|
||||
adev = (struct amdgpu_device *)kgd;
|
||||
if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) {
|
||||
*mem = *adev->gmc.vm_fault_info;
|
||||
mb();
|
||||
atomic_set(&adev->gmc.vm_fault_info_updated, 0);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Evict a userptr BO by stopping the queues if necessary
|
||||
*
|
||||
* Runs in MMU notifier, may be in RECLAIM_FS context. This means it
|
||||
|
@ -3307,6 +3307,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
||||
atomic_inc(&adev->gpu_reset_counter);
|
||||
adev->in_gpu_reset = 1;
|
||||
|
||||
/* Block kfd */
|
||||
amdgpu_amdkfd_pre_reset(adev);
|
||||
|
||||
/* block TTM */
|
||||
resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
|
||||
|
||||
@ -3322,7 +3325,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
||||
if (job && job->base.sched == &ring->sched)
|
||||
continue;
|
||||
|
||||
drm_sched_hw_job_reset(&ring->sched, &job->base);
|
||||
drm_sched_hw_job_reset(&ring->sched, job ? &job->base : NULL);
|
||||
|
||||
/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
|
||||
amdgpu_fence_driver_force_completion(ring);
|
||||
@ -3363,6 +3366,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
||||
dev_info(adev->dev, "GPU reset(%d) succeeded!\n",atomic_read(&adev->gpu_reset_counter));
|
||||
}
|
||||
|
||||
/*unlock kfd */
|
||||
amdgpu_amdkfd_post_reset(adev);
|
||||
amdgpu_vf_error_trans_all(adev);
|
||||
adev->in_gpu_reset = 0;
|
||||
mutex_unlock(&adev->lock_reset);
|
||||
|
@ -105,6 +105,8 @@ struct amdgpu_gmc {
|
||||
/* protects concurrent invalidation */
|
||||
spinlock_t invalidate_lock;
|
||||
bool translate_further;
|
||||
struct kfd_vm_fault_info *vm_fault_info;
|
||||
atomic_t vm_fault_info_updated;
|
||||
|
||||
const struct amdgpu_gmc_funcs *gmc_funcs;
|
||||
};
|
||||
|
@ -495,11 +495,12 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
|
||||
eaddr = eaddr & ((1 << shift) - 1);
|
||||
|
||||
flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
|
||||
if (vm->root.base.bo->shadow)
|
||||
flags |= AMDGPU_GEM_CREATE_SHADOW;
|
||||
if (vm->use_cpu_for_update)
|
||||
flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
|
||||
else
|
||||
flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
|
||||
AMDGPU_GEM_CREATE_SHADOW);
|
||||
flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
|
||||
|
||||
/* walk over the address space and allocate the page tables */
|
||||
for (pt_idx = from; pt_idx <= to; ++pt_idx) {
|
||||
@ -2587,7 +2588,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
|
||||
if (vm->use_cpu_for_update)
|
||||
flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
|
||||
else
|
||||
else if (vm_context != AMDGPU_VM_CONTEXT_COMPUTE)
|
||||
flags |= AMDGPU_GEM_CREATE_SHADOW;
|
||||
|
||||
size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level);
|
||||
@ -2662,8 +2663,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
* - pasid (old PASID is released, because compute manages its own PASIDs)
|
||||
*
|
||||
* Reinitializes the page directory to reflect the changed ATS
|
||||
* setting. May leave behind an unused shadow BO for the page
|
||||
* directory when switching from SDMA updates to CPU updates.
|
||||
* setting.
|
||||
*
|
||||
* Returns:
|
||||
* 0 for success, -errno for errors.
|
||||
@ -2713,6 +2713,9 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
|
||||
vm->pasid = 0;
|
||||
}
|
||||
|
||||
/* Free the shadow bo for compute VM */
|
||||
amdgpu_bo_unref(&vm->root.base.bo->shadow);
|
||||
|
||||
error:
|
||||
amdgpu_bo_unreserve(vm->root.base.bo);
|
||||
return r;
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include "cik.h"
|
||||
#include "gmc_v7_0.h"
|
||||
#include "amdgpu_ucode.h"
|
||||
#include "amdgpu_amdkfd.h"
|
||||
|
||||
#include "bif/bif_4_1_d.h"
|
||||
#include "bif/bif_4_1_sh_mask.h"
|
||||
@ -1078,6 +1079,12 @@ static int gmc_v7_0_sw_init(void *handle)
|
||||
adev->vm_manager.vram_base_offset = 0;
|
||||
}
|
||||
|
||||
adev->gmc.vm_fault_info = kmalloc(sizeof(struct kfd_vm_fault_info),
|
||||
GFP_KERNEL);
|
||||
if (!adev->gmc.vm_fault_info)
|
||||
return -ENOMEM;
|
||||
atomic_set(&adev->gmc.vm_fault_info_updated, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1087,6 +1094,7 @@ static int gmc_v7_0_sw_fini(void *handle)
|
||||
|
||||
amdgpu_gem_force_release(adev);
|
||||
amdgpu_vm_manager_fini(adev);
|
||||
kfree(adev->gmc.vm_fault_info);
|
||||
gmc_v7_0_gart_fini(adev);
|
||||
amdgpu_bo_fini(adev);
|
||||
release_firmware(adev->gmc.fw);
|
||||
@ -1276,7 +1284,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
u32 addr, status, mc_client;
|
||||
u32 addr, status, mc_client, vmid;
|
||||
|
||||
addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR);
|
||||
status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
|
||||
@ -1301,6 +1309,29 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
|
||||
entry->pasid);
|
||||
}
|
||||
|
||||
vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
|
||||
VMID);
|
||||
if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
|
||||
&& !atomic_read(&adev->gmc.vm_fault_info_updated)) {
|
||||
struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info;
|
||||
u32 protections = REG_GET_FIELD(status,
|
||||
VM_CONTEXT1_PROTECTION_FAULT_STATUS,
|
||||
PROTECTIONS);
|
||||
|
||||
info->vmid = vmid;
|
||||
info->mc_id = REG_GET_FIELD(status,
|
||||
VM_CONTEXT1_PROTECTION_FAULT_STATUS,
|
||||
MEMORY_CLIENT_ID);
|
||||
info->status = status;
|
||||
info->page_addr = addr;
|
||||
info->prot_valid = protections & 0x7 ? true : false;
|
||||
info->prot_read = protections & 0x8 ? true : false;
|
||||
info->prot_write = protections & 0x10 ? true : false;
|
||||
info->prot_exec = protections & 0x20 ? true : false;
|
||||
mb();
|
||||
atomic_set(&adev->gmc.vm_fault_info_updated, 1);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include "amdgpu.h"
|
||||
#include "gmc_v8_0.h"
|
||||
#include "amdgpu_ucode.h"
|
||||
#include "amdgpu_amdkfd.h"
|
||||
|
||||
#include "gmc/gmc_8_1_d.h"
|
||||
#include "gmc/gmc_8_1_sh_mask.h"
|
||||
@ -1182,6 +1183,12 @@ static int gmc_v8_0_sw_init(void *handle)
|
||||
adev->vm_manager.vram_base_offset = 0;
|
||||
}
|
||||
|
||||
adev->gmc.vm_fault_info = kmalloc(sizeof(struct kfd_vm_fault_info),
|
||||
GFP_KERNEL);
|
||||
if (!adev->gmc.vm_fault_info)
|
||||
return -ENOMEM;
|
||||
atomic_set(&adev->gmc.vm_fault_info_updated, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1191,6 +1198,7 @@ static int gmc_v8_0_sw_fini(void *handle)
|
||||
|
||||
amdgpu_gem_force_release(adev);
|
||||
amdgpu_vm_manager_fini(adev);
|
||||
kfree(adev->gmc.vm_fault_info);
|
||||
gmc_v8_0_gart_fini(adev);
|
||||
amdgpu_bo_fini(adev);
|
||||
release_firmware(adev->gmc.fw);
|
||||
@ -1426,7 +1434,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
u32 addr, status, mc_client;
|
||||
u32 addr, status, mc_client, vmid;
|
||||
|
||||
if (amdgpu_sriov_vf(adev)) {
|
||||
dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
|
||||
@ -1463,6 +1471,29 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
|
||||
entry->pasid);
|
||||
}
|
||||
|
||||
vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
|
||||
VMID);
|
||||
if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
|
||||
&& !atomic_read(&adev->gmc.vm_fault_info_updated)) {
|
||||
struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info;
|
||||
u32 protections = REG_GET_FIELD(status,
|
||||
VM_CONTEXT1_PROTECTION_FAULT_STATUS,
|
||||
PROTECTIONS);
|
||||
|
||||
info->vmid = vmid;
|
||||
info->mc_id = REG_GET_FIELD(status,
|
||||
VM_CONTEXT1_PROTECTION_FAULT_STATUS,
|
||||
MEMORY_CLIENT_ID);
|
||||
info->status = status;
|
||||
info->page_addr = addr;
|
||||
info->prot_valid = protections & 0x7 ? true : false;
|
||||
info->prot_read = protections & 0x8 ? true : false;
|
||||
info->prot_write = protections & 0x10 ? true : false;
|
||||
info->prot_exec = protections & 0x20 ? true : false;
|
||||
mb();
|
||||
atomic_set(&adev->gmc.vm_fault_info_updated, 1);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -25,12 +25,39 @@
|
||||
#include "cik_int.h"
|
||||
|
||||
static bool cik_event_interrupt_isr(struct kfd_dev *dev,
|
||||
const uint32_t *ih_ring_entry)
|
||||
const uint32_t *ih_ring_entry,
|
||||
uint32_t *patched_ihre,
|
||||
bool *patched_flag)
|
||||
{
|
||||
const struct cik_ih_ring_entry *ihre =
|
||||
(const struct cik_ih_ring_entry *)ih_ring_entry;
|
||||
const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
|
||||
unsigned int vmid, pasid;
|
||||
|
||||
/* This workaround is due to HW/FW limitation on Hawaii that
|
||||
* VMID and PASID are not written into ih_ring_entry
|
||||
*/
|
||||
if ((ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT ||
|
||||
ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) &&
|
||||
dev->device_info->asic_family == CHIP_HAWAII) {
|
||||
struct cik_ih_ring_entry *tmp_ihre =
|
||||
(struct cik_ih_ring_entry *)patched_ihre;
|
||||
|
||||
*patched_flag = true;
|
||||
*tmp_ihre = *ihre;
|
||||
|
||||
vmid = f2g->read_vmid_from_vmfault_reg(dev->kgd);
|
||||
pasid = f2g->get_atc_vmid_pasid_mapping_pasid(dev->kgd, vmid);
|
||||
|
||||
tmp_ihre->ring_id &= 0x000000ff;
|
||||
tmp_ihre->ring_id |= vmid << 8;
|
||||
tmp_ihre->ring_id |= pasid << 16;
|
||||
|
||||
return (pasid != 0) &&
|
||||
vmid >= dev->vm_info.first_vmid_kfd &&
|
||||
vmid <= dev->vm_info.last_vmid_kfd;
|
||||
}
|
||||
|
||||
/* Only handle interrupts from KFD VMIDs */
|
||||
vmid = (ihre->ring_id & 0x0000ff00) >> 8;
|
||||
if (vmid < dev->vm_info.first_vmid_kfd ||
|
||||
@ -48,18 +75,19 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev,
|
||||
return ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE ||
|
||||
ihre->source_id == CIK_INTSRC_SDMA_TRAP ||
|
||||
ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG ||
|
||||
ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE;
|
||||
ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE ||
|
||||
ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT ||
|
||||
ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT;
|
||||
}
|
||||
|
||||
static void cik_event_interrupt_wq(struct kfd_dev *dev,
|
||||
const uint32_t *ih_ring_entry)
|
||||
{
|
||||
unsigned int pasid;
|
||||
const struct cik_ih_ring_entry *ihre =
|
||||
(const struct cik_ih_ring_entry *)ih_ring_entry;
|
||||
uint32_t context_id = ihre->data & 0xfffffff;
|
||||
|
||||
pasid = (ihre->ring_id & 0xffff0000) >> 16;
|
||||
unsigned int vmid = (ihre->ring_id & 0x0000ff00) >> 8;
|
||||
unsigned int pasid = (ihre->ring_id & 0xffff0000) >> 16;
|
||||
|
||||
if (pasid == 0)
|
||||
return;
|
||||
@ -72,6 +100,22 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev,
|
||||
kfd_signal_event_interrupt(pasid, context_id & 0xff, 8);
|
||||
else if (ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE)
|
||||
kfd_signal_hw_exception_event(pasid);
|
||||
else if (ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT ||
|
||||
ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) {
|
||||
struct kfd_vm_fault_info info;
|
||||
|
||||
kfd_process_vm_fault(dev->dqm, pasid);
|
||||
|
||||
memset(&info, 0, sizeof(info));
|
||||
dev->kfd2kgd->get_vm_fault_info(dev->kgd, &info);
|
||||
if (!info.page_addr && !info.status)
|
||||
return;
|
||||
|
||||
if (info.vmid == vmid)
|
||||
kfd_signal_vm_fault_event(dev, pasid, &info);
|
||||
else
|
||||
kfd_signal_vm_fault_event(dev, pasid, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
const struct kfd_event_interrupt_class event_interrupt_class_cik = {
|
||||
|
@ -20,8 +20,8 @@
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef HSA_RADEON_CIK_INT_H_INCLUDED
|
||||
#define HSA_RADEON_CIK_INT_H_INCLUDED
|
||||
#ifndef CIK_INT_H_INCLUDED
|
||||
#define CIK_INT_H_INCLUDED
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
@ -34,9 +34,10 @@ struct cik_ih_ring_entry {
|
||||
|
||||
#define CIK_INTSRC_CP_END_OF_PIPE 0xB5
|
||||
#define CIK_INTSRC_CP_BAD_OPCODE 0xB7
|
||||
#define CIK_INTSRC_DEQUEUE_COMPLETE 0xC6
|
||||
#define CIK_INTSRC_SDMA_TRAP 0xE0
|
||||
#define CIK_INTSRC_SQ_INTERRUPT_MSG 0xEF
|
||||
#define CIK_INTSRC_GFX_PAGE_INV_FAULT 0x92
|
||||
#define CIK_INTSRC_GFX_MEM_PROT_FAULT 0x93
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -21,18 +21,21 @@
|
||||
*/
|
||||
|
||||
static const uint32_t cwsr_trap_gfx8_hex[] = {
|
||||
0xbf820001, 0xbf820125,
|
||||
0xbf820001, 0xbf82012b,
|
||||
0xb8f4f802, 0x89748674,
|
||||
0xb8f5f803, 0x8675ff75,
|
||||
0x00000400, 0xbf850011,
|
||||
0x00000400, 0xbf850017,
|
||||
0xc00a1e37, 0x00000000,
|
||||
0xbf8c007f, 0x87777978,
|
||||
0xbf840002, 0xb974f802,
|
||||
0xbe801d78, 0xb8f5f803,
|
||||
0x8675ff75, 0x000001ff,
|
||||
0xbf850002, 0x80708470,
|
||||
0x82718071, 0x8671ff71,
|
||||
0x0000ffff, 0xb974f802,
|
||||
0xbf840005, 0x8f728374,
|
||||
0xb972e0c2, 0xbf800002,
|
||||
0xb9740002, 0xbe801d78,
|
||||
0xb8f5f803, 0x8675ff75,
|
||||
0x000001ff, 0xbf850002,
|
||||
0x80708470, 0x82718071,
|
||||
0x8671ff71, 0x0000ffff,
|
||||
0x8f728374, 0xb972e0c2,
|
||||
0xbf800002, 0xb9740002,
|
||||
0xbe801f70, 0xb8f5f803,
|
||||
0x8675ff75, 0x00000100,
|
||||
0xbf840006, 0xbefa0080,
|
||||
@ -168,7 +171,7 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
|
||||
0x807c847c, 0x806eff6e,
|
||||
0x00000400, 0xbf0a757c,
|
||||
0xbf85ffef, 0xbf9c0000,
|
||||
0xbf8200ca, 0xbef8007e,
|
||||
0xbf8200cd, 0xbef8007e,
|
||||
0x8679ff7f, 0x0000ffff,
|
||||
0x8779ff79, 0x00040000,
|
||||
0xbefa0080, 0xbefb00ff,
|
||||
@ -268,16 +271,18 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
|
||||
0x8f739773, 0xb976f807,
|
||||
0x8671ff71, 0x0000ffff,
|
||||
0x86fe7e7e, 0x86ea6a6a,
|
||||
0xb974f802, 0xbf8a0000,
|
||||
0x95807370, 0xbf810000,
|
||||
0x8f768374, 0xb976e0c2,
|
||||
0xbf800002, 0xb9740002,
|
||||
0xbf8a0000, 0x95807370,
|
||||
0xbf810000, 0x00000000,
|
||||
};
|
||||
|
||||
|
||||
static const uint32_t cwsr_trap_gfx9_hex[] = {
|
||||
0xbf820001, 0xbf82015a,
|
||||
0xbf820001, 0xbf82015d,
|
||||
0xb8f8f802, 0x89788678,
|
||||
0xb8f1f803, 0x866eff71,
|
||||
0x00000400, 0xbf850034,
|
||||
0x00000400, 0xbf850037,
|
||||
0x866eff71, 0x00000800,
|
||||
0xbf850003, 0x866eff71,
|
||||
0x00000100, 0xbf840008,
|
||||
@ -303,258 +308,261 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
|
||||
0x8f6e8b77, 0x866eff6e,
|
||||
0x001f8000, 0xb96ef807,
|
||||
0x86fe7e7e, 0x86ea6a6a,
|
||||
0xb978f802, 0xbe801f6c,
|
||||
0x866dff6d, 0x0000ffff,
|
||||
0xbef00080, 0xb9700283,
|
||||
0xb8f02407, 0x8e709c70,
|
||||
0x876d706d, 0xb8f003c7,
|
||||
0x8e709b70, 0x876d706d,
|
||||
0xb8f0f807, 0x8670ff70,
|
||||
0x00007fff, 0xb970f807,
|
||||
0xbeee007e, 0xbeef007f,
|
||||
0xbefe0180, 0xbf900004,
|
||||
0x87708478, 0xb970f802,
|
||||
0xbf8e0002, 0xbf88fffe,
|
||||
0xb8f02a05, 0x80708170,
|
||||
0x8e708a70, 0xb8f11605,
|
||||
0x80718171, 0x8e718671,
|
||||
0x80707170, 0x80707e70,
|
||||
0x8271807f, 0x8671ff71,
|
||||
0x0000ffff, 0xc0471cb8,
|
||||
0x00000040, 0xbf8cc07f,
|
||||
0xc04b1d38, 0x00000048,
|
||||
0xbf8cc07f, 0xc0431e78,
|
||||
0x00000058, 0xbf8cc07f,
|
||||
0xc0471eb8, 0x0000005c,
|
||||
0xbf8cc07f, 0xbef4007e,
|
||||
0x8675ff7f, 0x0000ffff,
|
||||
0x8775ff75, 0x00040000,
|
||||
0xbef60080, 0xbef700ff,
|
||||
0x00807fac, 0x8670ff7f,
|
||||
0x08000000, 0x8f708370,
|
||||
0x87777077, 0x8670ff7f,
|
||||
0x70000000, 0x8f708170,
|
||||
0x87777077, 0xbefb007c,
|
||||
0xbefa0080, 0xb8fa2a05,
|
||||
0x807a817a, 0x8e7a8a7a,
|
||||
0xb8f01605, 0x80708170,
|
||||
0x8e708670, 0x807a707a,
|
||||
0xbef60084, 0xbef600ff,
|
||||
0x01000000, 0xbefe007c,
|
||||
0xbefc007a, 0xc0611efa,
|
||||
0x0000007c, 0xbf8cc07f,
|
||||
0x807a847a, 0xbefc007e,
|
||||
0xbefe007c, 0xbefc007a,
|
||||
0xc0611b3a, 0x0000007c,
|
||||
0xbf8cc07f, 0x807a847a,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc007a, 0xc0611b7a,
|
||||
0x0000007c, 0xbf8cc07f,
|
||||
0x807a847a, 0xbefc007e,
|
||||
0xbefe007c, 0xbefc007a,
|
||||
0xc0611bba, 0x0000007c,
|
||||
0xbf8cc07f, 0x807a847a,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc007a, 0xc0611bfa,
|
||||
0x0000007c, 0xbf8cc07f,
|
||||
0x807a847a, 0xbefc007e,
|
||||
0xbefe007c, 0xbefc007a,
|
||||
0xc0611e3a, 0x0000007c,
|
||||
0xbf8cc07f, 0x807a847a,
|
||||
0xbefc007e, 0xb8f1f803,
|
||||
0xbefe007c, 0xbefc007a,
|
||||
0xc0611c7a, 0x0000007c,
|
||||
0xbf8cc07f, 0x807a847a,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc007a, 0xc0611a3a,
|
||||
0x0000007c, 0xbf8cc07f,
|
||||
0x807a847a, 0xbefc007e,
|
||||
0xbefe007c, 0xbefc007a,
|
||||
0xc0611a7a, 0x0000007c,
|
||||
0xbf8cc07f, 0x807a847a,
|
||||
0xbefc007e, 0xb8fbf801,
|
||||
0xbefe007c, 0xbefc007a,
|
||||
0xc0611efa, 0x0000007c,
|
||||
0xbf8cc07f, 0x807a847a,
|
||||
0xbefc007e, 0x8670ff7f,
|
||||
0x04000000, 0xbeef0080,
|
||||
0x876f6f70, 0xb8fa2a05,
|
||||
0x807a817a, 0x8e7a8a7a,
|
||||
0x8f6e8378, 0xb96ee0c2,
|
||||
0xbf800002, 0xb9780002,
|
||||
0xbe801f6c, 0x866dff6d,
|
||||
0x0000ffff, 0xbef00080,
|
||||
0xb9700283, 0xb8f02407,
|
||||
0x8e709c70, 0x876d706d,
|
||||
0xb8f003c7, 0x8e709b70,
|
||||
0x876d706d, 0xb8f0f807,
|
||||
0x8670ff70, 0x00007fff,
|
||||
0xb970f807, 0xbeee007e,
|
||||
0xbeef007f, 0xbefe0180,
|
||||
0xbf900004, 0x87708478,
|
||||
0xb970f802, 0xbf8e0002,
|
||||
0xbf88fffe, 0xb8f02a05,
|
||||
0x80708170, 0x8e708a70,
|
||||
0xb8f11605, 0x80718171,
|
||||
0x8e718471, 0x8e768271,
|
||||
0xbef600ff, 0x01000000,
|
||||
0xbef20174, 0x80747a74,
|
||||
0x82758075, 0xbefc0080,
|
||||
0xbf800000, 0xbe802b00,
|
||||
0xbe822b02, 0xbe842b04,
|
||||
0xbe862b06, 0xbe882b08,
|
||||
0xbe8a2b0a, 0xbe8c2b0c,
|
||||
0xbe8e2b0e, 0xc06b003a,
|
||||
0x00000000, 0xbf8cc07f,
|
||||
0xc06b013a, 0x00000010,
|
||||
0xbf8cc07f, 0xc06b023a,
|
||||
0x00000020, 0xbf8cc07f,
|
||||
0xc06b033a, 0x00000030,
|
||||
0xbf8cc07f, 0x8074c074,
|
||||
0x82758075, 0x807c907c,
|
||||
0xbf0a717c, 0xbf85ffe7,
|
||||
0xbef40172, 0xbefa0080,
|
||||
0xbefe00c1, 0xbeff00c1,
|
||||
0xbee80080, 0xbee90080,
|
||||
0xbef600ff, 0x01000000,
|
||||
0xe0724000, 0x7a1d0000,
|
||||
0xe0724100, 0x7a1d0100,
|
||||
0xe0724200, 0x7a1d0200,
|
||||
0xe0724300, 0x7a1d0300,
|
||||
0xbefe00c1, 0xbeff00c1,
|
||||
0xb8f14306, 0x8671c171,
|
||||
0xbf84002c, 0xbf8a0000,
|
||||
0x8670ff6f, 0x04000000,
|
||||
0xbf840028, 0x8e718671,
|
||||
0x8e718271, 0xbef60071,
|
||||
0xb8fa2a05, 0x807a817a,
|
||||
0x8e7a8a7a, 0xb8f01605,
|
||||
0x80708170, 0x8e708670,
|
||||
0x807a707a, 0x807aff7a,
|
||||
0x00000080, 0xbef600ff,
|
||||
0x01000000, 0xbefc0080,
|
||||
0xd28c0002, 0x000100c1,
|
||||
0xd28d0003, 0x000204c1,
|
||||
0xd1060002, 0x00011103,
|
||||
0x7e0602ff, 0x00000200,
|
||||
0xbefc00ff, 0x00010000,
|
||||
0xbe800077, 0x8677ff77,
|
||||
0xff7fffff, 0x8777ff77,
|
||||
0x00058000, 0xd8ec0000,
|
||||
0x00000002, 0xbf8cc07f,
|
||||
0xe0765000, 0x7a1d0002,
|
||||
0x68040702, 0xd0c9006a,
|
||||
0x0000e302, 0xbf87fff7,
|
||||
0xbef70000, 0xbefa00ff,
|
||||
0x00000400, 0xbefe00c1,
|
||||
0xbeff00c1, 0xb8f12a05,
|
||||
0x80718171, 0x8e718271,
|
||||
0x8e768871, 0xbef600ff,
|
||||
0x01000000, 0xbefc0084,
|
||||
0xbf0a717c, 0xbf840015,
|
||||
0xbf11017c, 0x8071ff71,
|
||||
0x00001000, 0x7e000300,
|
||||
0x7e020301, 0x7e040302,
|
||||
0x7e060303, 0xe0724000,
|
||||
0x7a1d0000, 0xe0724100,
|
||||
0x7a1d0100, 0xe0724200,
|
||||
0x7a1d0200, 0xe0724300,
|
||||
0x7a1d0300, 0x807c847c,
|
||||
0x807aff7a, 0x00000400,
|
||||
0xbf0a717c, 0xbf85ffef,
|
||||
0xbf9c0000, 0xbf8200d9,
|
||||
0x8e718671, 0x80707170,
|
||||
0x80707e70, 0x8271807f,
|
||||
0x8671ff71, 0x0000ffff,
|
||||
0xc0471cb8, 0x00000040,
|
||||
0xbf8cc07f, 0xc04b1d38,
|
||||
0x00000048, 0xbf8cc07f,
|
||||
0xc0431e78, 0x00000058,
|
||||
0xbf8cc07f, 0xc0471eb8,
|
||||
0x0000005c, 0xbf8cc07f,
|
||||
0xbef4007e, 0x8675ff7f,
|
||||
0x0000ffff, 0x8775ff75,
|
||||
0x00040000, 0xbef60080,
|
||||
0xbef700ff, 0x00807fac,
|
||||
0x866eff7f, 0x08000000,
|
||||
0x8f6e836e, 0x87776e77,
|
||||
0x866eff7f, 0x70000000,
|
||||
0x8f6e816e, 0x87776e77,
|
||||
0x866eff7f, 0x04000000,
|
||||
0xbf84001e, 0xbefe00c1,
|
||||
0xbeff00c1, 0xb8ef4306,
|
||||
0x866fc16f, 0xbf840019,
|
||||
0x8e6f866f, 0x8e6f826f,
|
||||
0xbef6006f, 0xb8f82a05,
|
||||
0x80788178, 0x8e788a78,
|
||||
0xb8ee1605, 0x806e816e,
|
||||
0x8e6e866e, 0x80786e78,
|
||||
0x8078ff78, 0x00000080,
|
||||
0x8670ff7f, 0x08000000,
|
||||
0x8f708370, 0x87777077,
|
||||
0x8670ff7f, 0x70000000,
|
||||
0x8f708170, 0x87777077,
|
||||
0xbefb007c, 0xbefa0080,
|
||||
0xb8fa2a05, 0x807a817a,
|
||||
0x8e7a8a7a, 0xb8f01605,
|
||||
0x80708170, 0x8e708670,
|
||||
0x807a707a, 0xbef60084,
|
||||
0xbef600ff, 0x01000000,
|
||||
0xbefc0080, 0xe0510000,
|
||||
0x781d0000, 0xe0510100,
|
||||
0x781d0000, 0x807cff7c,
|
||||
0x00000200, 0x8078ff78,
|
||||
0x00000200, 0xbf0a6f7c,
|
||||
0xbf85fff6, 0xbef80080,
|
||||
0xbefe007c, 0xbefc007a,
|
||||
0xc0611efa, 0x0000007c,
|
||||
0xbf8cc07f, 0x807a847a,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc007a, 0xc0611b3a,
|
||||
0x0000007c, 0xbf8cc07f,
|
||||
0x807a847a, 0xbefc007e,
|
||||
0xbefe007c, 0xbefc007a,
|
||||
0xc0611b7a, 0x0000007c,
|
||||
0xbf8cc07f, 0x807a847a,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc007a, 0xc0611bba,
|
||||
0x0000007c, 0xbf8cc07f,
|
||||
0x807a847a, 0xbefc007e,
|
||||
0xbefe007c, 0xbefc007a,
|
||||
0xc0611bfa, 0x0000007c,
|
||||
0xbf8cc07f, 0x807a847a,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc007a, 0xc0611e3a,
|
||||
0x0000007c, 0xbf8cc07f,
|
||||
0x807a847a, 0xbefc007e,
|
||||
0xb8f1f803, 0xbefe007c,
|
||||
0xbefc007a, 0xc0611c7a,
|
||||
0x0000007c, 0xbf8cc07f,
|
||||
0x807a847a, 0xbefc007e,
|
||||
0xbefe007c, 0xbefc007a,
|
||||
0xc0611a3a, 0x0000007c,
|
||||
0xbf8cc07f, 0x807a847a,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc007a, 0xc0611a7a,
|
||||
0x0000007c, 0xbf8cc07f,
|
||||
0x807a847a, 0xbefc007e,
|
||||
0xb8fbf801, 0xbefe007c,
|
||||
0xbefc007a, 0xc0611efa,
|
||||
0x0000007c, 0xbf8cc07f,
|
||||
0x807a847a, 0xbefc007e,
|
||||
0x8670ff7f, 0x04000000,
|
||||
0xbeef0080, 0x876f6f70,
|
||||
0xb8fa2a05, 0x807a817a,
|
||||
0x8e7a8a7a, 0xb8f11605,
|
||||
0x80718171, 0x8e718471,
|
||||
0x8e768271, 0xbef600ff,
|
||||
0x01000000, 0xbef20174,
|
||||
0x80747a74, 0x82758075,
|
||||
0xbefc0080, 0xbf800000,
|
||||
0xbe802b00, 0xbe822b02,
|
||||
0xbe842b04, 0xbe862b06,
|
||||
0xbe882b08, 0xbe8a2b0a,
|
||||
0xbe8c2b0c, 0xbe8e2b0e,
|
||||
0xc06b003a, 0x00000000,
|
||||
0xbf8cc07f, 0xc06b013a,
|
||||
0x00000010, 0xbf8cc07f,
|
||||
0xc06b023a, 0x00000020,
|
||||
0xbf8cc07f, 0xc06b033a,
|
||||
0x00000030, 0xbf8cc07f,
|
||||
0x8074c074, 0x82758075,
|
||||
0x807c907c, 0xbf0a717c,
|
||||
0xbf85ffe7, 0xbef40172,
|
||||
0xbefa0080, 0xbefe00c1,
|
||||
0xbeff00c1, 0xbee80080,
|
||||
0xbee90080, 0xbef600ff,
|
||||
0x01000000, 0xe0724000,
|
||||
0x7a1d0000, 0xe0724100,
|
||||
0x7a1d0100, 0xe0724200,
|
||||
0x7a1d0200, 0xe0724300,
|
||||
0x7a1d0300, 0xbefe00c1,
|
||||
0xbeff00c1, 0xb8f14306,
|
||||
0x8671c171, 0xbf84002c,
|
||||
0xbf8a0000, 0x8670ff6f,
|
||||
0x04000000, 0xbf840028,
|
||||
0x8e718671, 0x8e718271,
|
||||
0xbef60071, 0xb8fa2a05,
|
||||
0x807a817a, 0x8e7a8a7a,
|
||||
0xb8f01605, 0x80708170,
|
||||
0x8e708670, 0x807a707a,
|
||||
0x807aff7a, 0x00000080,
|
||||
0xbef600ff, 0x01000000,
|
||||
0xbefc0080, 0xd28c0002,
|
||||
0x000100c1, 0xd28d0003,
|
||||
0x000204c1, 0xd1060002,
|
||||
0x00011103, 0x7e0602ff,
|
||||
0x00000200, 0xbefc00ff,
|
||||
0x00010000, 0xbe800077,
|
||||
0x8677ff77, 0xff7fffff,
|
||||
0x8777ff77, 0x00058000,
|
||||
0xd8ec0000, 0x00000002,
|
||||
0xbf8cc07f, 0xe0765000,
|
||||
0x7a1d0002, 0x68040702,
|
||||
0xd0c9006a, 0x0000e302,
|
||||
0xbf87fff7, 0xbef70000,
|
||||
0xbefa00ff, 0x00000400,
|
||||
0xbefe00c1, 0xbeff00c1,
|
||||
0xb8ef2a05, 0x806f816f,
|
||||
0x8e6f826f, 0x8e76886f,
|
||||
0xb8f12a05, 0x80718171,
|
||||
0x8e718271, 0x8e768871,
|
||||
0xbef600ff, 0x01000000,
|
||||
0xbeee0078, 0x8078ff78,
|
||||
0x00000400, 0xbefc0084,
|
||||
0xbf11087c, 0x806fff6f,
|
||||
0x00008000, 0xe0524000,
|
||||
0x781d0000, 0xe0524100,
|
||||
0x781d0100, 0xe0524200,
|
||||
0x781d0200, 0xe0524300,
|
||||
0x781d0300, 0xbf8c0f70,
|
||||
0xbefc0084, 0xbf0a717c,
|
||||
0xbf840015, 0xbf11017c,
|
||||
0x8071ff71, 0x00001000,
|
||||
0x7e000300, 0x7e020301,
|
||||
0x7e040302, 0x7e060303,
|
||||
0x807c847c, 0x8078ff78,
|
||||
0x00000400, 0xbf0a6f7c,
|
||||
0xbf85ffee, 0xbf9c0000,
|
||||
0xe0524000, 0x6e1d0000,
|
||||
0xe0524100, 0x6e1d0100,
|
||||
0xe0524200, 0x6e1d0200,
|
||||
0xe0524300, 0x6e1d0300,
|
||||
0xe0724000, 0x7a1d0000,
|
||||
0xe0724100, 0x7a1d0100,
|
||||
0xe0724200, 0x7a1d0200,
|
||||
0xe0724300, 0x7a1d0300,
|
||||
0x807c847c, 0x807aff7a,
|
||||
0x00000400, 0xbf0a717c,
|
||||
0xbf85ffef, 0xbf9c0000,
|
||||
0xbf8200dc, 0xbef4007e,
|
||||
0x8675ff7f, 0x0000ffff,
|
||||
0x8775ff75, 0x00040000,
|
||||
0xbef60080, 0xbef700ff,
|
||||
0x00807fac, 0x866eff7f,
|
||||
0x08000000, 0x8f6e836e,
|
||||
0x87776e77, 0x866eff7f,
|
||||
0x70000000, 0x8f6e816e,
|
||||
0x87776e77, 0x866eff7f,
|
||||
0x04000000, 0xbf84001e,
|
||||
0xbefe00c1, 0xbeff00c1,
|
||||
0xb8ef4306, 0x866fc16f,
|
||||
0xbf840019, 0x8e6f866f,
|
||||
0x8e6f826f, 0xbef6006f,
|
||||
0xb8f82a05, 0x80788178,
|
||||
0x8e788a78, 0xb8ee1605,
|
||||
0x806e816e, 0x8e6e866e,
|
||||
0x80786e78, 0x80f8c078,
|
||||
0xb8ef1605, 0x806f816f,
|
||||
0x8e6f846f, 0x8e76826f,
|
||||
0xbef600ff, 0x01000000,
|
||||
0xbefc006f, 0xc031003a,
|
||||
0x00000078, 0x80f8c078,
|
||||
0xbf8cc07f, 0x80fc907c,
|
||||
0xbf800000, 0xbe802d00,
|
||||
0xbe822d02, 0xbe842d04,
|
||||
0xbe862d06, 0xbe882d08,
|
||||
0xbe8a2d0a, 0xbe8c2d0c,
|
||||
0xbe8e2d0e, 0xbf06807c,
|
||||
0xbf84fff0, 0xb8f82a05,
|
||||
0x80786e78, 0x8078ff78,
|
||||
0x00000080, 0xbef600ff,
|
||||
0x01000000, 0xbefc0080,
|
||||
0xe0510000, 0x781d0000,
|
||||
0xe0510100, 0x781d0000,
|
||||
0x807cff7c, 0x00000200,
|
||||
0x8078ff78, 0x00000200,
|
||||
0xbf0a6f7c, 0xbf85fff6,
|
||||
0xbef80080, 0xbefe00c1,
|
||||
0xbeff00c1, 0xb8ef2a05,
|
||||
0x806f816f, 0x8e6f826f,
|
||||
0x8e76886f, 0xbef600ff,
|
||||
0x01000000, 0xbeee0078,
|
||||
0x8078ff78, 0x00000400,
|
||||
0xbefc0084, 0xbf11087c,
|
||||
0x806fff6f, 0x00008000,
|
||||
0xe0524000, 0x781d0000,
|
||||
0xe0524100, 0x781d0100,
|
||||
0xe0524200, 0x781d0200,
|
||||
0xe0524300, 0x781d0300,
|
||||
0xbf8c0f70, 0x7e000300,
|
||||
0x7e020301, 0x7e040302,
|
||||
0x7e060303, 0x807c847c,
|
||||
0x8078ff78, 0x00000400,
|
||||
0xbf0a6f7c, 0xbf85ffee,
|
||||
0xbf9c0000, 0xe0524000,
|
||||
0x6e1d0000, 0xe0524100,
|
||||
0x6e1d0100, 0xe0524200,
|
||||
0x6e1d0200, 0xe0524300,
|
||||
0x6e1d0300, 0xb8f82a05,
|
||||
0x80788178, 0x8e788a78,
|
||||
0xb8ee1605, 0x806e816e,
|
||||
0x8e6e866e, 0x80786e78,
|
||||
0xbef60084, 0xbef600ff,
|
||||
0x01000000, 0xc0211bfa,
|
||||
0x80f8c078, 0xb8ef1605,
|
||||
0x806f816f, 0x8e6f846f,
|
||||
0x8e76826f, 0xbef600ff,
|
||||
0x01000000, 0xbefc006f,
|
||||
0xc031003a, 0x00000078,
|
||||
0x80f8c078, 0xbf8cc07f,
|
||||
0x80fc907c, 0xbf800000,
|
||||
0xbe802d00, 0xbe822d02,
|
||||
0xbe842d04, 0xbe862d06,
|
||||
0xbe882d08, 0xbe8a2d0a,
|
||||
0xbe8c2d0c, 0xbe8e2d0e,
|
||||
0xbf06807c, 0xbf84fff0,
|
||||
0xb8f82a05, 0x80788178,
|
||||
0x8e788a78, 0xb8ee1605,
|
||||
0x806e816e, 0x8e6e866e,
|
||||
0x80786e78, 0xbef60084,
|
||||
0xbef600ff, 0x01000000,
|
||||
0xc0211bfa, 0x00000078,
|
||||
0x80788478, 0xc0211b3a,
|
||||
0x00000078, 0x80788478,
|
||||
0xc0211b3a, 0x00000078,
|
||||
0x80788478, 0xc0211b7a,
|
||||
0xc0211b7a, 0x00000078,
|
||||
0x80788478, 0xc0211eba,
|
||||
0x00000078, 0x80788478,
|
||||
0xc0211eba, 0x00000078,
|
||||
0x80788478, 0xc0211efa,
|
||||
0xc0211efa, 0x00000078,
|
||||
0x80788478, 0xc0211c3a,
|
||||
0x00000078, 0x80788478,
|
||||
0xc0211c3a, 0x00000078,
|
||||
0x80788478, 0xc0211c7a,
|
||||
0xc0211c7a, 0x00000078,
|
||||
0x80788478, 0xc0211a3a,
|
||||
0x00000078, 0x80788478,
|
||||
0xc0211a3a, 0x00000078,
|
||||
0x80788478, 0xc0211a7a,
|
||||
0xc0211a7a, 0x00000078,
|
||||
0x80788478, 0xc0211cfa,
|
||||
0x00000078, 0x80788478,
|
||||
0xc0211cfa, 0x00000078,
|
||||
0x80788478, 0xbf8cc07f,
|
||||
0xbefc006f, 0xbefe007a,
|
||||
0xbeff007b, 0x866f71ff,
|
||||
0x000003ff, 0xb96f4803,
|
||||
0x866f71ff, 0xfffff800,
|
||||
0x8f6f8b6f, 0xb96fa2c3,
|
||||
0xb973f801, 0xb8ee2a05,
|
||||
0x806e816e, 0x8e6e8a6e,
|
||||
0xb8ef1605, 0x806f816f,
|
||||
0x8e6f866f, 0x806e6f6e,
|
||||
0x806e746e, 0x826f8075,
|
||||
0x866fff6f, 0x0000ffff,
|
||||
0xc0071cb7, 0x00000040,
|
||||
0xc00b1d37, 0x00000048,
|
||||
0xc0031e77, 0x00000058,
|
||||
0xc0071eb7, 0x0000005c,
|
||||
0xbf8cc07f, 0x866fff6d,
|
||||
0xf0000000, 0x8f6f9c6f,
|
||||
0x8e6f906f, 0xbeee0080,
|
||||
0x876e6f6e, 0x866fff6d,
|
||||
0x08000000, 0x8f6f9b6f,
|
||||
0x8e6f8f6f, 0x876e6f6e,
|
||||
0x866fff70, 0x00800000,
|
||||
0x8f6f976f, 0xb96ef807,
|
||||
0x866dff6d, 0x0000ffff,
|
||||
0x86fe7e7e, 0x86ea6a6a,
|
||||
0xb970f802, 0xbf8a0000,
|
||||
0xbf8cc07f, 0xbefc006f,
|
||||
0xbefe007a, 0xbeff007b,
|
||||
0x866f71ff, 0x000003ff,
|
||||
0xb96f4803, 0x866f71ff,
|
||||
0xfffff800, 0x8f6f8b6f,
|
||||
0xb96fa2c3, 0xb973f801,
|
||||
0xb8ee2a05, 0x806e816e,
|
||||
0x8e6e8a6e, 0xb8ef1605,
|
||||
0x806f816f, 0x8e6f866f,
|
||||
0x806e6f6e, 0x806e746e,
|
||||
0x826f8075, 0x866fff6f,
|
||||
0x0000ffff, 0xc0071cb7,
|
||||
0x00000040, 0xc00b1d37,
|
||||
0x00000048, 0xc0031e77,
|
||||
0x00000058, 0xc0071eb7,
|
||||
0x0000005c, 0xbf8cc07f,
|
||||
0x866fff6d, 0xf0000000,
|
||||
0x8f6f9c6f, 0x8e6f906f,
|
||||
0xbeee0080, 0x876e6f6e,
|
||||
0x866fff6d, 0x08000000,
|
||||
0x8f6f9b6f, 0x8e6f8f6f,
|
||||
0x876e6f6e, 0x866fff70,
|
||||
0x00800000, 0x8f6f976f,
|
||||
0xb96ef807, 0x866dff6d,
|
||||
0x0000ffff, 0x86fe7e7e,
|
||||
0x86ea6a6a, 0x8f6e8370,
|
||||
0xb96ee0c2, 0xbf800002,
|
||||
0xb9700002, 0xbf8a0000,
|
||||
0x95806f6c, 0xbf810000,
|
||||
};
|
||||
|
@ -103,6 +103,10 @@ var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23
|
||||
var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000
|
||||
var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1
|
||||
var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
|
||||
var SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT = 0
|
||||
var SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE = 1
|
||||
var SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT = 3
|
||||
var SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE = 29
|
||||
|
||||
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
|
||||
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9
|
||||
@ -251,7 +255,7 @@ if (!EMU_RUN_HACK)
|
||||
s_waitcnt lgkmcnt(0)
|
||||
s_or_b32 ttmp7, ttmp8, ttmp9
|
||||
s_cbranch_scc0 L_NO_NEXT_TRAP //next level trap handler not been set
|
||||
s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //restore HW status(SCC)
|
||||
set_status_without_spi_prio(s_save_status, ttmp2) //restore HW status(SCC)
|
||||
s_setpc_b64 [ttmp8,ttmp9] //jump to next level trap handler
|
||||
|
||||
L_NO_NEXT_TRAP:
|
||||
@ -262,7 +266,7 @@ L_NO_NEXT_TRAP:
|
||||
s_addc_u32 ttmp1, ttmp1, 0
|
||||
L_EXCP_CASE:
|
||||
s_and_b32 ttmp1, ttmp1, 0xFFFF
|
||||
s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //restore HW status(SCC)
|
||||
set_status_without_spi_prio(s_save_status, ttmp2) //restore HW status(SCC)
|
||||
s_rfe_b64 [ttmp0, ttmp1]
|
||||
end
|
||||
// ********* End handling of non-CWSR traps *******************
|
||||
@ -1053,7 +1057,7 @@ end
|
||||
s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
|
||||
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
|
||||
s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
|
||||
s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu
|
||||
set_status_without_spi_prio(s_restore_status, s_restore_tmp) // SCC is included, which is changed by previous salu
|
||||
|
||||
s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time
|
||||
|
||||
@ -1134,3 +1138,11 @@ end
|
||||
function get_hwreg_size_bytes
|
||||
return 128 //HWREG size 128 bytes
|
||||
end
|
||||
|
||||
function set_status_without_spi_prio(status, tmp)
|
||||
// Do not restore STATUS.SPI_PRIO since scheduler may have raised it.
|
||||
s_lshr_b32 tmp, status, SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT
|
||||
s_setreg_b32 hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE), tmp
|
||||
s_nop 0x2 // avoid S_SETREG => S_SETREG hazard
|
||||
s_setreg_b32 hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE), status
|
||||
end
|
||||
|
@ -103,6 +103,10 @@ var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000
|
||||
var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1
|
||||
var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
|
||||
var SQ_WAVE_STATUS_HALT_MASK = 0x2000
|
||||
var SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT = 0
|
||||
var SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE = 1
|
||||
var SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT = 3
|
||||
var SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE = 29
|
||||
|
||||
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
|
||||
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9
|
||||
@ -317,7 +321,7 @@ L_EXCP_CASE:
|
||||
// Restore SQ_WAVE_STATUS.
|
||||
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
|
||||
s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
|
||||
s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status
|
||||
set_status_without_spi_prio(s_save_status, ttmp2)
|
||||
|
||||
s_rfe_b64 [ttmp0, ttmp1]
|
||||
end
|
||||
@ -1120,7 +1124,7 @@ end
|
||||
s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
|
||||
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
|
||||
s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
|
||||
s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu
|
||||
set_status_without_spi_prio(s_restore_status, s_restore_tmp) // SCC is included, which is changed by previous salu
|
||||
|
||||
s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time
|
||||
|
||||
@ -1212,3 +1216,11 @@ function ack_sqc_store_workaround
|
||||
s_waitcnt lgkmcnt(0)
|
||||
end
|
||||
end
|
||||
|
||||
function set_status_without_spi_prio(status, tmp)
|
||||
// Do not restore STATUS.SPI_PRIO since scheduler may have raised it.
|
||||
s_lshr_b32 tmp, status, SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT
|
||||
s_setreg_b32 hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE), tmp
|
||||
s_nop 0x2 // avoid S_SETREG => S_SETREG hazard
|
||||
s_setreg_b32 hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE), status
|
||||
end
|
||||
|
@ -122,6 +122,9 @@ static int kfd_open(struct inode *inode, struct file *filep)
|
||||
if (IS_ERR(process))
|
||||
return PTR_ERR(process);
|
||||
|
||||
if (kfd_is_locked())
|
||||
return -EAGAIN;
|
||||
|
||||
dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
|
||||
process->pasid, process->is_32bit_user_mode);
|
||||
|
||||
@ -389,6 +392,61 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
|
||||
void *data)
|
||||
{
|
||||
int retval;
|
||||
const int max_num_cus = 1024;
|
||||
struct kfd_ioctl_set_cu_mask_args *args = data;
|
||||
struct queue_properties properties;
|
||||
uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr;
|
||||
size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32);
|
||||
|
||||
if ((args->num_cu_mask % 32) != 0) {
|
||||
pr_debug("num_cu_mask 0x%x must be a multiple of 32",
|
||||
args->num_cu_mask);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
properties.cu_mask_count = args->num_cu_mask;
|
||||
if (properties.cu_mask_count == 0) {
|
||||
pr_debug("CU mask cannot be 0");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* To prevent an unreasonably large CU mask size, set an arbitrary
|
||||
* limit of max_num_cus bits. We can then just drop any CU mask bits
|
||||
* past max_num_cus bits and just use the first max_num_cus bits.
|
||||
*/
|
||||
if (properties.cu_mask_count > max_num_cus) {
|
||||
pr_debug("CU mask cannot be greater than 1024 bits");
|
||||
properties.cu_mask_count = max_num_cus;
|
||||
cu_mask_size = sizeof(uint32_t) * (max_num_cus/32);
|
||||
}
|
||||
|
||||
properties.cu_mask = kzalloc(cu_mask_size, GFP_KERNEL);
|
||||
if (!properties.cu_mask)
|
||||
return -ENOMEM;
|
||||
|
||||
retval = copy_from_user(properties.cu_mask, cu_mask_ptr, cu_mask_size);
|
||||
if (retval) {
|
||||
pr_debug("Could not copy CU mask from userspace");
|
||||
kfree(properties.cu_mask);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
mutex_lock(&p->mutex);
|
||||
|
||||
retval = pqm_set_cu_mask(&p->pqm, args->queue_id, &properties);
|
||||
|
||||
mutex_unlock(&p->mutex);
|
||||
|
||||
if (retval)
|
||||
kfree(properties.cu_mask);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int kfd_ioctl_set_memory_policy(struct file *filep,
|
||||
struct kfd_process *p, void *data)
|
||||
{
|
||||
@ -754,7 +812,6 @@ static int kfd_ioctl_get_clock_counters(struct file *filep,
|
||||
{
|
||||
struct kfd_ioctl_get_clock_counters_args *args = data;
|
||||
struct kfd_dev *dev;
|
||||
struct timespec64 time;
|
||||
|
||||
dev = kfd_device_by_id(args->gpu_id);
|
||||
if (dev)
|
||||
@ -766,11 +823,8 @@ static int kfd_ioctl_get_clock_counters(struct file *filep,
|
||||
args->gpu_clock_counter = 0;
|
||||
|
||||
/* No access to rdtsc. Using raw monotonic time */
|
||||
getrawmonotonic64(&time);
|
||||
args->cpu_clock_counter = (uint64_t)timespec64_to_ns(&time);
|
||||
|
||||
get_monotonic_boottime64(&time);
|
||||
args->system_clock_counter = (uint64_t)timespec64_to_ns(&time);
|
||||
args->cpu_clock_counter = ktime_get_raw_ns();
|
||||
args->system_clock_counter = ktime_get_boot_ns();
|
||||
|
||||
/* Since the counter is in nano-seconds we use 1GHz frequency */
|
||||
args->system_clock_freq = 1000000000;
|
||||
@ -1558,6 +1612,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
|
||||
AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,
|
||||
kfd_ioctl_unmap_memory_from_gpu, 0),
|
||||
|
||||
AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK,
|
||||
kfd_ioctl_set_cu_mask, 0),
|
||||
|
||||
};
|
||||
|
||||
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
|
||||
|
@ -189,6 +189,21 @@ static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct kfd_mem_properties *
|
||||
find_subtype_mem(uint32_t heap_type, uint32_t flags, uint32_t width,
|
||||
struct kfd_topology_device *dev)
|
||||
{
|
||||
struct kfd_mem_properties *props;
|
||||
|
||||
list_for_each_entry(props, &dev->mem_props, list) {
|
||||
if (props->heap_type == heap_type
|
||||
&& props->flags == flags
|
||||
&& props->width == width)
|
||||
return props;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
/* kfd_parse_subtype_mem - parse memory subtypes and attach it to correct
|
||||
* topology device present in the device_list
|
||||
*/
|
||||
@ -197,36 +212,56 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem,
|
||||
{
|
||||
struct kfd_mem_properties *props;
|
||||
struct kfd_topology_device *dev;
|
||||
uint32_t heap_type;
|
||||
uint64_t size_in_bytes;
|
||||
uint32_t flags = 0;
|
||||
uint32_t width;
|
||||
|
||||
pr_debug("Found memory entry in CRAT table with proximity_domain=%d\n",
|
||||
mem->proximity_domain);
|
||||
list_for_each_entry(dev, device_list, list) {
|
||||
if (mem->proximity_domain == dev->proximity_domain) {
|
||||
props = kfd_alloc_struct(props);
|
||||
if (!props)
|
||||
return -ENOMEM;
|
||||
|
||||
/* We're on GPU node */
|
||||
if (dev->node_props.cpu_cores_count == 0) {
|
||||
/* APU */
|
||||
if (mem->visibility_type == 0)
|
||||
props->heap_type =
|
||||
heap_type =
|
||||
HSA_MEM_HEAP_TYPE_FB_PRIVATE;
|
||||
/* dGPU */
|
||||
else
|
||||
props->heap_type = mem->visibility_type;
|
||||
heap_type = mem->visibility_type;
|
||||
} else
|
||||
props->heap_type = HSA_MEM_HEAP_TYPE_SYSTEM;
|
||||
heap_type = HSA_MEM_HEAP_TYPE_SYSTEM;
|
||||
|
||||
if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE)
|
||||
props->flags |= HSA_MEM_FLAGS_HOT_PLUGGABLE;
|
||||
flags |= HSA_MEM_FLAGS_HOT_PLUGGABLE;
|
||||
if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE)
|
||||
props->flags |= HSA_MEM_FLAGS_NON_VOLATILE;
|
||||
flags |= HSA_MEM_FLAGS_NON_VOLATILE;
|
||||
|
||||
props->size_in_bytes =
|
||||
size_in_bytes =
|
||||
((uint64_t)mem->length_high << 32) +
|
||||
mem->length_low;
|
||||
props->width = mem->width;
|
||||
width = mem->width;
|
||||
|
||||
/* Multiple banks of the same type are aggregated into
|
||||
* one. User mode doesn't care about multiple physical
|
||||
* memory segments. It's managed as a single virtual
|
||||
* heap for user mode.
|
||||
*/
|
||||
props = find_subtype_mem(heap_type, flags, width, dev);
|
||||
if (props) {
|
||||
props->size_in_bytes += size_in_bytes;
|
||||
break;
|
||||
}
|
||||
|
||||
props = kfd_alloc_struct(props);
|
||||
if (!props)
|
||||
return -ENOMEM;
|
||||
|
||||
props->heap_type = heap_type;
|
||||
props->flags = flags;
|
||||
props->size_in_bytes = size_in_bytes;
|
||||
props->width = width;
|
||||
|
||||
dev->node_props.mem_banks_count++;
|
||||
list_add_tail(&props->list, &dev->mem_props);
|
||||
|
@ -38,7 +38,6 @@
|
||||
#include "kfd_dbgmgr.h"
|
||||
#include "kfd_dbgdev.h"
|
||||
#include "kfd_device_queue_manager.h"
|
||||
#include "../../radeon/cik_reg.h"
|
||||
|
||||
static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
|
||||
{
|
||||
|
@ -60,6 +60,9 @@ enum {
|
||||
SH_REG_SIZE = SH_REG_END - SH_REG_BASE
|
||||
};
|
||||
|
||||
/* SQ_CMD definitions */
|
||||
#define SQ_CMD 0x8DEC
|
||||
|
||||
enum SQ_IND_CMD_CMD {
|
||||
SQ_IND_CMD_CMD_NULL = 0x00000000,
|
||||
SQ_IND_CMD_CMD_HALT = 0x00000001,
|
||||
@ -190,4 +193,38 @@ union ULARGE_INTEGER {
|
||||
void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
|
||||
enum DBGDEV_TYPE type);
|
||||
|
||||
union TCP_WATCH_CNTL_BITS {
|
||||
struct {
|
||||
uint32_t mask:24;
|
||||
uint32_t vmid:4;
|
||||
uint32_t atc:1;
|
||||
uint32_t mode:2;
|
||||
uint32_t valid:1;
|
||||
} bitfields, bits;
|
||||
uint32_t u32All;
|
||||
signed int i32All;
|
||||
float f32All;
|
||||
};
|
||||
|
||||
enum {
|
||||
ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL,
|
||||
ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF,
|
||||
ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENSION = 0x03000000,
|
||||
/* extend the mask to 26 bits in order to match the low address field */
|
||||
ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6,
|
||||
ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF
|
||||
};
|
||||
|
||||
enum {
|
||||
MAX_TRAPID = 8, /* 3 bits in the bitfield. */
|
||||
MAX_WATCH_ADDRESSES = 4
|
||||
};
|
||||
|
||||
enum {
|
||||
ADDRESS_WATCH_REG_ADDR_HI = 0,
|
||||
ADDRESS_WATCH_REG_ADDR_LO,
|
||||
ADDRESS_WATCH_REG_CNTL,
|
||||
ADDRESS_WATCH_REG_MAX
|
||||
};
|
||||
|
||||
#endif /* KFD_DBGDEV_H_ */
|
||||
|
@ -21,6 +21,8 @@
|
||||
*/
|
||||
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
#include "kfd_priv.h"
|
||||
|
||||
static struct dentry *debugfs_root;
|
||||
@ -32,6 +34,38 @@ static int kfd_debugfs_open(struct inode *inode, struct file *file)
|
||||
return single_open(file, show, NULL);
|
||||
}
|
||||
|
||||
static ssize_t kfd_debugfs_hang_hws_write(struct file *file,
|
||||
const char __user *user_buf, size_t size, loff_t *ppos)
|
||||
{
|
||||
struct kfd_dev *dev;
|
||||
char tmp[16];
|
||||
uint32_t gpu_id;
|
||||
int ret = -EINVAL;
|
||||
|
||||
memset(tmp, 0, 16);
|
||||
if (size >= 16) {
|
||||
pr_err("Invalid input for gpu id.\n");
|
||||
goto out;
|
||||
}
|
||||
if (copy_from_user(tmp, user_buf, size)) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
if (kstrtoint(tmp, 10, &gpu_id)) {
|
||||
pr_err("Invalid input for gpu id.\n");
|
||||
goto out;
|
||||
}
|
||||
dev = kfd_device_by_id(gpu_id);
|
||||
if (dev) {
|
||||
kfd_debugfs_hang_hws(dev);
|
||||
ret = size;
|
||||
} else
|
||||
pr_err("Cannot find device %d.\n", gpu_id);
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct file_operations kfd_debugfs_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = kfd_debugfs_open,
|
||||
@ -40,6 +74,15 @@ static const struct file_operations kfd_debugfs_fops = {
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static const struct file_operations kfd_debugfs_hang_hws_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = kfd_debugfs_open,
|
||||
.read = seq_read,
|
||||
.write = kfd_debugfs_hang_hws_write,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
void kfd_debugfs_init(void)
|
||||
{
|
||||
struct dentry *ent;
|
||||
@ -65,6 +108,11 @@ void kfd_debugfs_init(void)
|
||||
ent = debugfs_create_file("rls", S_IFREG | 0444, debugfs_root,
|
||||
kfd_debugfs_rls_by_device,
|
||||
&kfd_debugfs_fops);
|
||||
|
||||
ent = debugfs_create_file("hang_hws", S_IFREG | 0644, debugfs_root,
|
||||
NULL,
|
||||
&kfd_debugfs_hang_hws_fops);
|
||||
|
||||
if (!ent)
|
||||
pr_warn("Failed to create rls in kfd debugfs\n");
|
||||
}
|
||||
|
@ -30,7 +30,13 @@
|
||||
#include "kfd_iommu.h"
|
||||
|
||||
#define MQD_SIZE_ALIGNED 768
|
||||
static atomic_t kfd_device_suspended = ATOMIC_INIT(0);
|
||||
|
||||
/*
|
||||
* kfd_locked is used to lock the kfd driver during suspend or reset
|
||||
* once locked, kfd driver will stop any further GPU execution.
|
||||
* create process (open) will return -EAGAIN.
|
||||
*/
|
||||
static atomic_t kfd_locked = ATOMIC_INIT(0);
|
||||
|
||||
#ifdef KFD_SUPPORT_IOMMU_V2
|
||||
static const struct kfd_device_info kaveri_device_info = {
|
||||
@ -46,6 +52,7 @@ static const struct kfd_device_info kaveri_device_info = {
|
||||
.supports_cwsr = false,
|
||||
.needs_iommu_device = true,
|
||||
.needs_pci_atomics = false,
|
||||
.num_sdma_engines = 2,
|
||||
};
|
||||
|
||||
static const struct kfd_device_info carrizo_device_info = {
|
||||
@ -61,6 +68,22 @@ static const struct kfd_device_info carrizo_device_info = {
|
||||
.supports_cwsr = true,
|
||||
.needs_iommu_device = true,
|
||||
.needs_pci_atomics = false,
|
||||
.num_sdma_engines = 2,
|
||||
};
|
||||
|
||||
static const struct kfd_device_info raven_device_info = {
|
||||
.asic_family = CHIP_RAVEN,
|
||||
.max_pasid_bits = 16,
|
||||
.max_no_of_hqd = 24,
|
||||
.doorbell_size = 8,
|
||||
.ih_ring_entry_size = 8 * sizeof(uint32_t),
|
||||
.event_interrupt_class = &event_interrupt_class_v9,
|
||||
.num_of_watch_points = 4,
|
||||
.mqd_size_aligned = MQD_SIZE_ALIGNED,
|
||||
.supports_cwsr = true,
|
||||
.needs_iommu_device = true,
|
||||
.needs_pci_atomics = true,
|
||||
.num_sdma_engines = 1,
|
||||
};
|
||||
#endif
|
||||
|
||||
@ -77,6 +100,7 @@ static const struct kfd_device_info hawaii_device_info = {
|
||||
.supports_cwsr = false,
|
||||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = false,
|
||||
.num_sdma_engines = 2,
|
||||
};
|
||||
|
||||
static const struct kfd_device_info tonga_device_info = {
|
||||
@ -91,6 +115,7 @@ static const struct kfd_device_info tonga_device_info = {
|
||||
.supports_cwsr = false,
|
||||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = true,
|
||||
.num_sdma_engines = 2,
|
||||
};
|
||||
|
||||
static const struct kfd_device_info tonga_vf_device_info = {
|
||||
@ -105,6 +130,7 @@ static const struct kfd_device_info tonga_vf_device_info = {
|
||||
.supports_cwsr = false,
|
||||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = false,
|
||||
.num_sdma_engines = 2,
|
||||
};
|
||||
|
||||
static const struct kfd_device_info fiji_device_info = {
|
||||
@ -119,6 +145,7 @@ static const struct kfd_device_info fiji_device_info = {
|
||||
.supports_cwsr = true,
|
||||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = true,
|
||||
.num_sdma_engines = 2,
|
||||
};
|
||||
|
||||
static const struct kfd_device_info fiji_vf_device_info = {
|
||||
@ -133,6 +160,7 @@ static const struct kfd_device_info fiji_vf_device_info = {
|
||||
.supports_cwsr = true,
|
||||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = false,
|
||||
.num_sdma_engines = 2,
|
||||
};
|
||||
|
||||
|
||||
@ -148,6 +176,7 @@ static const struct kfd_device_info polaris10_device_info = {
|
||||
.supports_cwsr = true,
|
||||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = true,
|
||||
.num_sdma_engines = 2,
|
||||
};
|
||||
|
||||
static const struct kfd_device_info polaris10_vf_device_info = {
|
||||
@ -162,6 +191,7 @@ static const struct kfd_device_info polaris10_vf_device_info = {
|
||||
.supports_cwsr = true,
|
||||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = false,
|
||||
.num_sdma_engines = 2,
|
||||
};
|
||||
|
||||
static const struct kfd_device_info polaris11_device_info = {
|
||||
@ -176,6 +206,7 @@ static const struct kfd_device_info polaris11_device_info = {
|
||||
.supports_cwsr = true,
|
||||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = true,
|
||||
.num_sdma_engines = 2,
|
||||
};
|
||||
|
||||
static const struct kfd_device_info vega10_device_info = {
|
||||
@ -190,6 +221,7 @@ static const struct kfd_device_info vega10_device_info = {
|
||||
.supports_cwsr = true,
|
||||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = false,
|
||||
.num_sdma_engines = 2,
|
||||
};
|
||||
|
||||
static const struct kfd_device_info vega10_vf_device_info = {
|
||||
@ -204,6 +236,7 @@ static const struct kfd_device_info vega10_vf_device_info = {
|
||||
.supports_cwsr = true,
|
||||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = false,
|
||||
.num_sdma_engines = 2,
|
||||
};
|
||||
|
||||
|
||||
@ -241,6 +274,7 @@ static const struct kfd_deviceid supported_devices[] = {
|
||||
{ 0x9875, &carrizo_device_info }, /* Carrizo */
|
||||
{ 0x9876, &carrizo_device_info }, /* Carrizo */
|
||||
{ 0x9877, &carrizo_device_info }, /* Carrizo */
|
||||
{ 0x15DD, &raven_device_info }, /* Raven */
|
||||
#endif
|
||||
{ 0x67A0, &hawaii_device_info }, /* Hawaii */
|
||||
{ 0x67A1, &hawaii_device_info }, /* Hawaii */
|
||||
@ -514,13 +548,54 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
|
||||
kfree(kfd);
|
||||
}
|
||||
|
||||
int kgd2kfd_pre_reset(struct kfd_dev *kfd)
|
||||
{
|
||||
if (!kfd->init_complete)
|
||||
return 0;
|
||||
kgd2kfd_suspend(kfd);
|
||||
|
||||
/* hold dqm->lock to prevent further execution*/
|
||||
dqm_lock(kfd->dqm);
|
||||
|
||||
kfd_signal_reset_event(kfd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fix me. KFD won't be able to resume existing process for now.
|
||||
* We will keep all existing process in a evicted state and
|
||||
* wait the process to be terminated.
|
||||
*/
|
||||
|
||||
int kgd2kfd_post_reset(struct kfd_dev *kfd)
|
||||
{
|
||||
int ret, count;
|
||||
|
||||
if (!kfd->init_complete)
|
||||
return 0;
|
||||
|
||||
dqm_unlock(kfd->dqm);
|
||||
|
||||
ret = kfd_resume(kfd);
|
||||
if (ret)
|
||||
return ret;
|
||||
count = atomic_dec_return(&kfd_locked);
|
||||
WARN_ONCE(count != 0, "KFD reset ref. error");
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool kfd_is_locked(void)
|
||||
{
|
||||
return (atomic_read(&kfd_locked) > 0);
|
||||
}
|
||||
|
||||
void kgd2kfd_suspend(struct kfd_dev *kfd)
|
||||
{
|
||||
if (!kfd->init_complete)
|
||||
return;
|
||||
|
||||
/* For first KFD device suspend all the KFD processes */
|
||||
if (atomic_inc_return(&kfd_device_suspended) == 1)
|
||||
if (atomic_inc_return(&kfd_locked) == 1)
|
||||
kfd_suspend_all_processes();
|
||||
|
||||
kfd->dqm->ops.stop(kfd->dqm);
|
||||
@ -539,7 +614,7 @@ int kgd2kfd_resume(struct kfd_dev *kfd)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
count = atomic_dec_return(&kfd_device_suspended);
|
||||
count = atomic_dec_return(&kfd_locked);
|
||||
WARN_ONCE(count < 0, "KFD suspend / resume ref. error");
|
||||
if (count == 0)
|
||||
ret = kfd_resume_all_processes();
|
||||
@ -577,14 +652,24 @@ static int kfd_resume(struct kfd_dev *kfd)
|
||||
/* This is called directly from KGD at ISR. */
|
||||
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
|
||||
{
|
||||
uint32_t patched_ihre[KFD_MAX_RING_ENTRY_SIZE];
|
||||
bool is_patched = false;
|
||||
|
||||
if (!kfd->init_complete)
|
||||
return;
|
||||
|
||||
if (kfd->device_info->ih_ring_entry_size > sizeof(patched_ihre)) {
|
||||
dev_err_once(kfd_device, "Ring entry too small\n");
|
||||
return;
|
||||
}
|
||||
|
||||
spin_lock(&kfd->interrupt_lock);
|
||||
|
||||
if (kfd->interrupts_active
|
||||
&& interrupt_is_wanted(kfd, ih_ring_entry)
|
||||
&& enqueue_ih_ring_entry(kfd, ih_ring_entry))
|
||||
&& interrupt_is_wanted(kfd, ih_ring_entry,
|
||||
patched_ihre, &is_patched)
|
||||
&& enqueue_ih_ring_entry(kfd,
|
||||
is_patched ? patched_ihre : ih_ring_entry))
|
||||
queue_work(kfd->ih_wq, &kfd->interrupt_work);
|
||||
|
||||
spin_unlock(&kfd->interrupt_lock);
|
||||
@ -739,8 +824,8 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
|
||||
if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
|
||||
return -ENOMEM;
|
||||
|
||||
*mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
|
||||
if ((*mem_obj) == NULL)
|
||||
*mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
|
||||
if (!(*mem_obj))
|
||||
return -ENOMEM;
|
||||
|
||||
pr_debug("Allocated mem_obj = %p for size = %d\n", *mem_obj, size);
|
||||
@ -857,3 +942,26 @@ int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj)
|
||||
kfree(mem_obj);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
|
||||
/* This function will send a package to HIQ to hang the HWS
|
||||
* which will trigger a GPU reset and bring the HWS back to normal state
|
||||
*/
|
||||
int kfd_debugfs_hang_hws(struct kfd_dev *dev)
|
||||
{
|
||||
int r = 0;
|
||||
|
||||
if (dev->dqm->sched_policy != KFD_SCHED_POLICY_HWS) {
|
||||
pr_err("HWS is not enabled");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
r = pm_debugfs_hang_hws(&dev->dqm->packets);
|
||||
if (!r)
|
||||
r = dqm_debugfs_execute_queues(dev->dqm);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -61,6 +61,8 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
|
||||
static void deallocate_sdma_queue(struct device_queue_manager *dqm,
|
||||
unsigned int sdma_queue_id);
|
||||
|
||||
static void kfd_process_hw_exception(struct work_struct *work);
|
||||
|
||||
static inline
|
||||
enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
|
||||
{
|
||||
@ -99,6 +101,17 @@ unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
|
||||
return dqm->dev->shared_resources.num_pipe_per_mec;
|
||||
}
|
||||
|
||||
static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm)
|
||||
{
|
||||
return dqm->dev->device_info->num_sdma_engines;
|
||||
}
|
||||
|
||||
unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
|
||||
{
|
||||
return dqm->dev->device_info->num_sdma_engines
|
||||
* KFD_SDMA_QUEUES_PER_ENGINE;
|
||||
}
|
||||
|
||||
void program_sh_mem_settings(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
@ -240,7 +253,7 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
|
||||
|
||||
print_queue(q);
|
||||
|
||||
mutex_lock(&dqm->lock);
|
||||
dqm_lock(dqm);
|
||||
|
||||
if (dqm->total_queue_count >= max_num_of_queues_per_device) {
|
||||
pr_warn("Can't create new usermode queue because %d queues were already created\n",
|
||||
@ -297,7 +310,7 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
|
||||
dqm->total_queue_count);
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&dqm->lock);
|
||||
dqm_unlock(dqm);
|
||||
return retval;
|
||||
}
|
||||
|
||||
@ -346,10 +359,10 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
int retval;
|
||||
struct mqd_manager *mqd;
|
||||
struct mqd_manager *mqd_mgr;
|
||||
|
||||
mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
|
||||
if (!mqd)
|
||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
|
||||
if (!mqd_mgr)
|
||||
return -ENOMEM;
|
||||
|
||||
retval = allocate_hqd(dqm, q);
|
||||
@ -360,7 +373,7 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
|
||||
if (retval)
|
||||
goto out_deallocate_hqd;
|
||||
|
||||
retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
|
||||
retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
|
||||
&q->gart_mqd_addr, &q->properties);
|
||||
if (retval)
|
||||
goto out_deallocate_doorbell;
|
||||
@ -374,15 +387,15 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
|
||||
if (!q->properties.is_active)
|
||||
return 0;
|
||||
|
||||
retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, &q->properties,
|
||||
q->process->mm);
|
||||
retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,
|
||||
&q->properties, q->process->mm);
|
||||
if (retval)
|
||||
goto out_uninit_mqd;
|
||||
|
||||
return 0;
|
||||
|
||||
out_uninit_mqd:
|
||||
mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
|
||||
mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
|
||||
out_deallocate_doorbell:
|
||||
deallocate_doorbell(qpd, q);
|
||||
out_deallocate_hqd:
|
||||
@ -399,11 +412,11 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
|
||||
struct queue *q)
|
||||
{
|
||||
int retval;
|
||||
struct mqd_manager *mqd;
|
||||
struct mqd_manager *mqd_mgr;
|
||||
|
||||
mqd = dqm->ops.get_mqd_manager(dqm,
|
||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
|
||||
get_mqd_type_from_queue_type(q->properties.type));
|
||||
if (!mqd)
|
||||
if (!mqd_mgr)
|
||||
return -ENOMEM;
|
||||
|
||||
if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
|
||||
@ -420,14 +433,14 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
|
||||
|
||||
deallocate_doorbell(qpd, q);
|
||||
|
||||
retval = mqd->destroy_mqd(mqd, q->mqd,
|
||||
retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
|
||||
KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
|
||||
KFD_UNMAP_LATENCY_MS,
|
||||
q->pipe, q->queue);
|
||||
if (retval == -ETIME)
|
||||
qpd->reset_wavefronts = true;
|
||||
|
||||
mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
|
||||
mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
|
||||
|
||||
list_del(&q->list);
|
||||
if (list_empty(&qpd->queues_list)) {
|
||||
@ -457,9 +470,9 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
|
||||
{
|
||||
int retval;
|
||||
|
||||
mutex_lock(&dqm->lock);
|
||||
dqm_lock(dqm);
|
||||
retval = destroy_queue_nocpsch_locked(dqm, qpd, q);
|
||||
mutex_unlock(&dqm->lock);
|
||||
dqm_unlock(dqm);
|
||||
|
||||
return retval;
|
||||
}
|
||||
@ -467,19 +480,19 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
|
||||
static int update_queue(struct device_queue_manager *dqm, struct queue *q)
|
||||
{
|
||||
int retval;
|
||||
struct mqd_manager *mqd;
|
||||
struct mqd_manager *mqd_mgr;
|
||||
struct kfd_process_device *pdd;
|
||||
bool prev_active = false;
|
||||
|
||||
mutex_lock(&dqm->lock);
|
||||
dqm_lock(dqm);
|
||||
pdd = kfd_get_process_device_data(q->device, q->process);
|
||||
if (!pdd) {
|
||||
retval = -ENODEV;
|
||||
goto out_unlock;
|
||||
}
|
||||
mqd = dqm->ops.get_mqd_manager(dqm,
|
||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
|
||||
get_mqd_type_from_queue_type(q->properties.type));
|
||||
if (!mqd) {
|
||||
if (!mqd_mgr) {
|
||||
retval = -ENOMEM;
|
||||
goto out_unlock;
|
||||
}
|
||||
@ -506,7 +519,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
|
||||
} else if (prev_active &&
|
||||
(q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
|
||||
q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
|
||||
retval = mqd->destroy_mqd(mqd, q->mqd,
|
||||
retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
|
||||
KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
|
||||
KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
|
||||
if (retval) {
|
||||
@ -515,7 +528,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
|
||||
}
|
||||
}
|
||||
|
||||
retval = mqd->update_mqd(mqd, q->mqd, &q->properties);
|
||||
retval = mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties);
|
||||
|
||||
/*
|
||||
* check active state vs. the previous state and modify
|
||||
@ -533,44 +546,44 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
|
||||
else if (q->properties.is_active &&
|
||||
(q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
|
||||
q->properties.type == KFD_QUEUE_TYPE_SDMA))
|
||||
retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue,
|
||||
retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,
|
||||
&q->properties, q->process->mm);
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&dqm->lock);
|
||||
dqm_unlock(dqm);
|
||||
return retval;
|
||||
}
|
||||
|
||||
static struct mqd_manager *get_mqd_manager(
|
||||
struct device_queue_manager *dqm, enum KFD_MQD_TYPE type)
|
||||
{
|
||||
struct mqd_manager *mqd;
|
||||
struct mqd_manager *mqd_mgr;
|
||||
|
||||
if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
|
||||
return NULL;
|
||||
|
||||
pr_debug("mqd type %d\n", type);
|
||||
|
||||
mqd = dqm->mqds[type];
|
||||
if (!mqd) {
|
||||
mqd = mqd_manager_init(type, dqm->dev);
|
||||
if (!mqd)
|
||||
mqd_mgr = dqm->mqd_mgrs[type];
|
||||
if (!mqd_mgr) {
|
||||
mqd_mgr = mqd_manager_init(type, dqm->dev);
|
||||
if (!mqd_mgr)
|
||||
pr_err("mqd manager is NULL");
|
||||
dqm->mqds[type] = mqd;
|
||||
dqm->mqd_mgrs[type] = mqd_mgr;
|
||||
}
|
||||
|
||||
return mqd;
|
||||
return mqd_mgr;
|
||||
}
|
||||
|
||||
static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
struct queue *q;
|
||||
struct mqd_manager *mqd;
|
||||
struct mqd_manager *mqd_mgr;
|
||||
struct kfd_process_device *pdd;
|
||||
int retval = 0;
|
||||
|
||||
mutex_lock(&dqm->lock);
|
||||
dqm_lock(dqm);
|
||||
if (qpd->evicted++ > 0) /* already evicted, do nothing */
|
||||
goto out;
|
||||
|
||||
@ -582,16 +595,16 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
|
||||
list_for_each_entry(q, &qpd->queues_list, list) {
|
||||
if (!q->properties.is_active)
|
||||
continue;
|
||||
mqd = dqm->ops.get_mqd_manager(dqm,
|
||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
|
||||
get_mqd_type_from_queue_type(q->properties.type));
|
||||
if (!mqd) { /* should not be here */
|
||||
if (!mqd_mgr) { /* should not be here */
|
||||
pr_err("Cannot evict queue, mqd mgr is NULL\n");
|
||||
retval = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
q->properties.is_evicted = true;
|
||||
q->properties.is_active = false;
|
||||
retval = mqd->destroy_mqd(mqd, q->mqd,
|
||||
retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
|
||||
KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
|
||||
KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
|
||||
if (retval)
|
||||
@ -600,7 +613,7 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
|
||||
}
|
||||
|
||||
out:
|
||||
mutex_unlock(&dqm->lock);
|
||||
dqm_unlock(dqm);
|
||||
return retval;
|
||||
}
|
||||
|
||||
@ -611,7 +624,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
|
||||
struct kfd_process_device *pdd;
|
||||
int retval = 0;
|
||||
|
||||
mutex_lock(&dqm->lock);
|
||||
dqm_lock(dqm);
|
||||
if (qpd->evicted++ > 0) /* already evicted, do nothing */
|
||||
goto out;
|
||||
|
||||
@ -633,7 +646,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
|
||||
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
|
||||
|
||||
out:
|
||||
mutex_unlock(&dqm->lock);
|
||||
dqm_unlock(dqm);
|
||||
return retval;
|
||||
}
|
||||
|
||||
@ -641,7 +654,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
struct queue *q;
|
||||
struct mqd_manager *mqd;
|
||||
struct mqd_manager *mqd_mgr;
|
||||
struct kfd_process_device *pdd;
|
||||
uint32_t pd_base;
|
||||
int retval = 0;
|
||||
@ -650,7 +663,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
|
||||
/* Retrieve PD base */
|
||||
pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
|
||||
|
||||
mutex_lock(&dqm->lock);
|
||||
dqm_lock(dqm);
|
||||
if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
|
||||
goto out;
|
||||
if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
|
||||
@ -677,16 +690,16 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
|
||||
list_for_each_entry(q, &qpd->queues_list, list) {
|
||||
if (!q->properties.is_evicted)
|
||||
continue;
|
||||
mqd = dqm->ops.get_mqd_manager(dqm,
|
||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
|
||||
get_mqd_type_from_queue_type(q->properties.type));
|
||||
if (!mqd) { /* should not be here */
|
||||
if (!mqd_mgr) { /* should not be here */
|
||||
pr_err("Cannot restore queue, mqd mgr is NULL\n");
|
||||
retval = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
q->properties.is_evicted = false;
|
||||
q->properties.is_active = true;
|
||||
retval = mqd->load_mqd(mqd, q->mqd, q->pipe,
|
||||
retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
|
||||
q->queue, &q->properties,
|
||||
q->process->mm);
|
||||
if (retval)
|
||||
@ -695,7 +708,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
|
||||
}
|
||||
qpd->evicted = 0;
|
||||
out:
|
||||
mutex_unlock(&dqm->lock);
|
||||
dqm_unlock(dqm);
|
||||
return retval;
|
||||
}
|
||||
|
||||
@ -711,7 +724,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
|
||||
/* Retrieve PD base */
|
||||
pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
|
||||
|
||||
mutex_lock(&dqm->lock);
|
||||
dqm_lock(dqm);
|
||||
if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
|
||||
goto out;
|
||||
if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
|
||||
@ -739,7 +752,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
|
||||
if (!retval)
|
||||
qpd->evicted = 0;
|
||||
out:
|
||||
mutex_unlock(&dqm->lock);
|
||||
dqm_unlock(dqm);
|
||||
return retval;
|
||||
}
|
||||
|
||||
@ -761,7 +774,7 @@ static int register_process(struct device_queue_manager *dqm,
|
||||
/* Retrieve PD base */
|
||||
pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
|
||||
|
||||
mutex_lock(&dqm->lock);
|
||||
dqm_lock(dqm);
|
||||
list_add(&n->list, &dqm->queues);
|
||||
|
||||
/* Update PD Base in QPD */
|
||||
@ -769,9 +782,10 @@ static int register_process(struct device_queue_manager *dqm,
|
||||
|
||||
retval = dqm->asic_ops.update_qpd(dqm, qpd);
|
||||
|
||||
dqm->processes_count++;
|
||||
if (dqm->processes_count++ == 0)
|
||||
dqm->dev->kfd2kgd->set_compute_idle(dqm->dev->kgd, false);
|
||||
|
||||
mutex_unlock(&dqm->lock);
|
||||
dqm_unlock(dqm);
|
||||
|
||||
return retval;
|
||||
}
|
||||
@ -786,20 +800,22 @@ static int unregister_process(struct device_queue_manager *dqm,
|
||||
list_empty(&qpd->queues_list) ? "empty" : "not empty");
|
||||
|
||||
retval = 0;
|
||||
mutex_lock(&dqm->lock);
|
||||
dqm_lock(dqm);
|
||||
|
||||
list_for_each_entry_safe(cur, next, &dqm->queues, list) {
|
||||
if (qpd == cur->qpd) {
|
||||
list_del(&cur->list);
|
||||
kfree(cur);
|
||||
dqm->processes_count--;
|
||||
if (--dqm->processes_count == 0)
|
||||
dqm->dev->kfd2kgd->set_compute_idle(
|
||||
dqm->dev->kgd, true);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
/* qpd not found in dqm list */
|
||||
retval = 1;
|
||||
out:
|
||||
mutex_unlock(&dqm->lock);
|
||||
dqm_unlock(dqm);
|
||||
return retval;
|
||||
}
|
||||
|
||||
@ -838,7 +854,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
|
||||
if (!dqm->allocated_queues)
|
||||
return -ENOMEM;
|
||||
|
||||
mutex_init(&dqm->lock);
|
||||
mutex_init(&dqm->lock_hidden);
|
||||
INIT_LIST_HEAD(&dqm->queues);
|
||||
dqm->queue_count = dqm->next_pipe_to_allocate = 0;
|
||||
dqm->sdma_queue_count = 0;
|
||||
@ -853,7 +869,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
|
||||
}
|
||||
|
||||
dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1;
|
||||
dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1;
|
||||
dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -866,8 +882,8 @@ static void uninitialize(struct device_queue_manager *dqm)
|
||||
|
||||
kfree(dqm->allocated_queues);
|
||||
for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
|
||||
kfree(dqm->mqds[i]);
|
||||
mutex_destroy(&dqm->lock);
|
||||
kfree(dqm->mqd_mgrs[i]);
|
||||
mutex_destroy(&dqm->lock_hidden);
|
||||
kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem);
|
||||
}
|
||||
|
||||
@ -901,7 +917,7 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
|
||||
static void deallocate_sdma_queue(struct device_queue_manager *dqm,
|
||||
unsigned int sdma_queue_id)
|
||||
{
|
||||
if (sdma_queue_id >= CIK_SDMA_QUEUES)
|
||||
if (sdma_queue_id >= get_num_sdma_queues(dqm))
|
||||
return;
|
||||
dqm->sdma_bitmap |= (1 << sdma_queue_id);
|
||||
}
|
||||
@ -910,19 +926,19 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
|
||||
struct queue *q,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
struct mqd_manager *mqd;
|
||||
struct mqd_manager *mqd_mgr;
|
||||
int retval;
|
||||
|
||||
mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
|
||||
if (!mqd)
|
||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
|
||||
if (!mqd_mgr)
|
||||
return -ENOMEM;
|
||||
|
||||
retval = allocate_sdma_queue(dqm, &q->sdma_id);
|
||||
if (retval)
|
||||
return retval;
|
||||
|
||||
q->properties.sdma_queue_id = q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE;
|
||||
q->properties.sdma_engine_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE;
|
||||
q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm);
|
||||
q->properties.sdma_engine_id = q->sdma_id % get_num_sdma_engines(dqm);
|
||||
|
||||
retval = allocate_doorbell(qpd, q);
|
||||
if (retval)
|
||||
@ -933,19 +949,20 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
|
||||
pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
|
||||
|
||||
dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
|
||||
retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
|
||||
retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
|
||||
&q->gart_mqd_addr, &q->properties);
|
||||
if (retval)
|
||||
goto out_deallocate_doorbell;
|
||||
|
||||
retval = mqd->load_mqd(mqd, q->mqd, 0, 0, &q->properties, NULL);
|
||||
retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 0, 0, &q->properties,
|
||||
NULL);
|
||||
if (retval)
|
||||
goto out_uninit_mqd;
|
||||
|
||||
return 0;
|
||||
|
||||
out_uninit_mqd:
|
||||
mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
|
||||
mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
|
||||
out_deallocate_doorbell:
|
||||
deallocate_doorbell(qpd, q);
|
||||
out_deallocate_sdma_queue:
|
||||
@ -1003,12 +1020,14 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
|
||||
{
|
||||
pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
|
||||
|
||||
mutex_init(&dqm->lock);
|
||||
mutex_init(&dqm->lock_hidden);
|
||||
INIT_LIST_HEAD(&dqm->queues);
|
||||
dqm->queue_count = dqm->processes_count = 0;
|
||||
dqm->sdma_queue_count = 0;
|
||||
dqm->active_runlist = false;
|
||||
dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1;
|
||||
dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1;
|
||||
|
||||
INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1041,9 +1060,11 @@ static int start_cpsch(struct device_queue_manager *dqm)
|
||||
|
||||
init_interrupts(dqm);
|
||||
|
||||
mutex_lock(&dqm->lock);
|
||||
dqm_lock(dqm);
|
||||
/* clear hang status when driver try to start the hw scheduler */
|
||||
dqm->is_hws_hang = false;
|
||||
execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
|
||||
mutex_unlock(&dqm->lock);
|
||||
dqm_unlock(dqm);
|
||||
|
||||
return 0;
|
||||
fail_allocate_vidmem:
|
||||
@ -1055,9 +1076,9 @@ static int start_cpsch(struct device_queue_manager *dqm)
|
||||
|
||||
static int stop_cpsch(struct device_queue_manager *dqm)
|
||||
{
|
||||
mutex_lock(&dqm->lock);
|
||||
dqm_lock(dqm);
|
||||
unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
|
||||
mutex_unlock(&dqm->lock);
|
||||
dqm_unlock(dqm);
|
||||
|
||||
kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
|
||||
pm_uninit(&dqm->packets);
|
||||
@ -1069,11 +1090,11 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
|
||||
struct kernel_queue *kq,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
mutex_lock(&dqm->lock);
|
||||
dqm_lock(dqm);
|
||||
if (dqm->total_queue_count >= max_num_of_queues_per_device) {
|
||||
pr_warn("Can't create new kernel queue because %d queues were already created\n",
|
||||
dqm->total_queue_count);
|
||||
mutex_unlock(&dqm->lock);
|
||||
dqm_unlock(dqm);
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
@ -1089,7 +1110,7 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
|
||||
dqm->queue_count++;
|
||||
qpd->is_debug = true;
|
||||
execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
|
||||
mutex_unlock(&dqm->lock);
|
||||
dqm_unlock(dqm);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1098,7 +1119,7 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
|
||||
struct kernel_queue *kq,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
mutex_lock(&dqm->lock);
|
||||
dqm_lock(dqm);
|
||||
list_del(&kq->list);
|
||||
dqm->queue_count--;
|
||||
qpd->is_debug = false;
|
||||
@ -1110,18 +1131,18 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
|
||||
dqm->total_queue_count--;
|
||||
pr_debug("Total of %d queues are accountable so far\n",
|
||||
dqm->total_queue_count);
|
||||
mutex_unlock(&dqm->lock);
|
||||
dqm_unlock(dqm);
|
||||
}
|
||||
|
||||
static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
int retval;
|
||||
struct mqd_manager *mqd;
|
||||
struct mqd_manager *mqd_mgr;
|
||||
|
||||
retval = 0;
|
||||
|
||||
mutex_lock(&dqm->lock);
|
||||
dqm_lock(dqm);
|
||||
|
||||
if (dqm->total_queue_count >= max_num_of_queues_per_device) {
|
||||
pr_warn("Can't create new usermode queue because %d queues were already created\n",
|
||||
@ -1135,19 +1156,19 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
|
||||
if (retval)
|
||||
goto out_unlock;
|
||||
q->properties.sdma_queue_id =
|
||||
q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE;
|
||||
q->sdma_id / get_num_sdma_engines(dqm);
|
||||
q->properties.sdma_engine_id =
|
||||
q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE;
|
||||
q->sdma_id % get_num_sdma_engines(dqm);
|
||||
}
|
||||
|
||||
retval = allocate_doorbell(qpd, q);
|
||||
if (retval)
|
||||
goto out_deallocate_sdma_queue;
|
||||
|
||||
mqd = dqm->ops.get_mqd_manager(dqm,
|
||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
|
||||
get_mqd_type_from_queue_type(q->properties.type));
|
||||
|
||||
if (!mqd) {
|
||||
if (!mqd_mgr) {
|
||||
retval = -ENOMEM;
|
||||
goto out_deallocate_doorbell;
|
||||
}
|
||||
@ -1164,7 +1185,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
|
||||
|
||||
q->properties.tba_addr = qpd->tba_addr;
|
||||
q->properties.tma_addr = qpd->tma_addr;
|
||||
retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
|
||||
retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
|
||||
&q->gart_mqd_addr, &q->properties);
|
||||
if (retval)
|
||||
goto out_deallocate_doorbell;
|
||||
@ -1188,7 +1209,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
|
||||
pr_debug("Total of %d queues are accountable so far\n",
|
||||
dqm->total_queue_count);
|
||||
|
||||
mutex_unlock(&dqm->lock);
|
||||
dqm_unlock(dqm);
|
||||
return retval;
|
||||
|
||||
out_deallocate_doorbell:
|
||||
@ -1197,7 +1218,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
|
||||
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
|
||||
deallocate_sdma_queue(dqm, q->sdma_id);
|
||||
out_unlock:
|
||||
mutex_unlock(&dqm->lock);
|
||||
dqm_unlock(dqm);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
@ -1210,6 +1232,13 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
|
||||
while (*fence_addr != fence_value) {
|
||||
if (time_after(jiffies, end_jiffies)) {
|
||||
pr_err("qcm fence wait loop timeout expired\n");
|
||||
/* In HWS case, this is used to halt the driver thread
|
||||
* in order not to mess up CP states before doing
|
||||
* scandumps for FW debugging.
|
||||
*/
|
||||
while (halt_if_hws_hang)
|
||||
schedule();
|
||||
|
||||
return -ETIME;
|
||||
}
|
||||
schedule();
|
||||
@ -1254,6 +1283,8 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
|
||||
{
|
||||
int retval = 0;
|
||||
|
||||
if (dqm->is_hws_hang)
|
||||
return -EIO;
|
||||
if (!dqm->active_runlist)
|
||||
return retval;
|
||||
|
||||
@ -1292,9 +1323,13 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm,
|
||||
{
|
||||
int retval;
|
||||
|
||||
if (dqm->is_hws_hang)
|
||||
return -EIO;
|
||||
retval = unmap_queues_cpsch(dqm, filter, filter_param);
|
||||
if (retval) {
|
||||
pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
|
||||
dqm->is_hws_hang = true;
|
||||
schedule_work(&dqm->hw_exception_work);
|
||||
return retval;
|
||||
}
|
||||
|
||||
@ -1306,7 +1341,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
|
||||
struct queue *q)
|
||||
{
|
||||
int retval;
|
||||
struct mqd_manager *mqd;
|
||||
struct mqd_manager *mqd_mgr;
|
||||
bool preempt_all_queues;
|
||||
|
||||
preempt_all_queues = false;
|
||||
@ -1314,7 +1349,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
|
||||
retval = 0;
|
||||
|
||||
/* remove queue from list to prevent rescheduling after preemption */
|
||||
mutex_lock(&dqm->lock);
|
||||
dqm_lock(dqm);
|
||||
|
||||
if (qpd->is_debug) {
|
||||
/*
|
||||
@ -1326,9 +1361,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
|
||||
|
||||
}
|
||||
|
||||
mqd = dqm->ops.get_mqd_manager(dqm,
|
||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
|
||||
get_mqd_type_from_queue_type(q->properties.type));
|
||||
if (!mqd) {
|
||||
if (!mqd_mgr) {
|
||||
retval = -ENOMEM;
|
||||
goto failed;
|
||||
}
|
||||
@ -1350,7 +1385,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
|
||||
qpd->reset_wavefronts = true;
|
||||
}
|
||||
|
||||
mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
|
||||
mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
|
||||
|
||||
/*
|
||||
* Unconditionally decrement this counter, regardless of the queue's
|
||||
@ -1360,14 +1395,14 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
|
||||
pr_debug("Total of %d queues are accountable so far\n",
|
||||
dqm->total_queue_count);
|
||||
|
||||
mutex_unlock(&dqm->lock);
|
||||
dqm_unlock(dqm);
|
||||
|
||||
return retval;
|
||||
|
||||
failed:
|
||||
failed_try_destroy_debugged_queue:
|
||||
|
||||
mutex_unlock(&dqm->lock);
|
||||
dqm_unlock(dqm);
|
||||
return retval;
|
||||
}
|
||||
|
||||
@ -1391,7 +1426,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
|
||||
if (!dqm->asic_ops.set_cache_memory_policy)
|
||||
return retval;
|
||||
|
||||
mutex_lock(&dqm->lock);
|
||||
dqm_lock(dqm);
|
||||
|
||||
if (alternate_aperture_size == 0) {
|
||||
/* base > limit disables APE1 */
|
||||
@ -1437,7 +1472,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
|
||||
qpd->sh_mem_ape1_limit);
|
||||
|
||||
out:
|
||||
mutex_unlock(&dqm->lock);
|
||||
dqm_unlock(dqm);
|
||||
return retval;
|
||||
}
|
||||
|
||||
@ -1468,7 +1503,7 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm,
|
||||
struct device_process_node *cur, *next_dpn;
|
||||
int retval = 0;
|
||||
|
||||
mutex_lock(&dqm->lock);
|
||||
dqm_lock(dqm);
|
||||
|
||||
/* Clear all user mode queues */
|
||||
list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
|
||||
@ -1489,7 +1524,7 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm,
|
||||
}
|
||||
}
|
||||
|
||||
mutex_unlock(&dqm->lock);
|
||||
dqm_unlock(dqm);
|
||||
return retval;
|
||||
}
|
||||
|
||||
@ -1500,14 +1535,14 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
|
||||
int retval;
|
||||
struct queue *q, *next;
|
||||
struct kernel_queue *kq, *kq_next;
|
||||
struct mqd_manager *mqd;
|
||||
struct mqd_manager *mqd_mgr;
|
||||
struct device_process_node *cur, *next_dpn;
|
||||
enum kfd_unmap_queues_filter filter =
|
||||
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
|
||||
|
||||
retval = 0;
|
||||
|
||||
mutex_lock(&dqm->lock);
|
||||
dqm_lock(dqm);
|
||||
|
||||
/* Clean all kernel queues */
|
||||
list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
|
||||
@ -1542,7 +1577,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
|
||||
}
|
||||
|
||||
retval = execute_queues_cpsch(dqm, filter, 0);
|
||||
if (retval || qpd->reset_wavefronts) {
|
||||
if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) {
|
||||
pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
|
||||
dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
|
||||
qpd->reset_wavefronts = false;
|
||||
@ -1550,19 +1585,19 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
|
||||
|
||||
/* lastly, free mqd resources */
|
||||
list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
|
||||
mqd = dqm->ops.get_mqd_manager(dqm,
|
||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
|
||||
get_mqd_type_from_queue_type(q->properties.type));
|
||||
if (!mqd) {
|
||||
if (!mqd_mgr) {
|
||||
retval = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
list_del(&q->list);
|
||||
qpd->queue_count--;
|
||||
mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
|
||||
mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
|
||||
}
|
||||
|
||||
out:
|
||||
mutex_unlock(&dqm->lock);
|
||||
dqm_unlock(dqm);
|
||||
return retval;
|
||||
}
|
||||
|
||||
@ -1683,6 +1718,30 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm)
|
||||
kfree(dqm);
|
||||
}
|
||||
|
||||
int kfd_process_vm_fault(struct device_queue_manager *dqm,
|
||||
unsigned int pasid)
|
||||
{
|
||||
struct kfd_process_device *pdd;
|
||||
struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
|
||||
int ret = 0;
|
||||
|
||||
if (!p)
|
||||
return -EINVAL;
|
||||
pdd = kfd_get_process_device_data(dqm->dev, p);
|
||||
if (pdd)
|
||||
ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd);
|
||||
kfd_unref_process(p);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void kfd_process_hw_exception(struct work_struct *work)
|
||||
{
|
||||
struct device_queue_manager *dqm = container_of(work,
|
||||
struct device_queue_manager, hw_exception_work);
|
||||
dqm->dev->kfd2kgd->gpu_recover(dqm->dev->kgd);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
|
||||
static void seq_reg_dump(struct seq_file *m,
|
||||
@ -1746,8 +1805,8 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)
|
||||
}
|
||||
}
|
||||
|
||||
for (pipe = 0; pipe < CIK_SDMA_ENGINE_NUM; pipe++) {
|
||||
for (queue = 0; queue < CIK_SDMA_QUEUES_PER_ENGINE; queue++) {
|
||||
for (pipe = 0; pipe < get_num_sdma_engines(dqm); pipe++) {
|
||||
for (queue = 0; queue < KFD_SDMA_QUEUES_PER_ENGINE; queue++) {
|
||||
r = dqm->dev->kfd2kgd->hqd_sdma_dump(
|
||||
dqm->dev->kgd, pipe, queue, &dump, &n_regs);
|
||||
if (r)
|
||||
@ -1764,4 +1823,16 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)
|
||||
return r;
|
||||
}
|
||||
|
||||
int dqm_debugfs_execute_queues(struct device_queue_manager *dqm)
|
||||
{
|
||||
int r = 0;
|
||||
|
||||
dqm_lock(dqm);
|
||||
dqm->active_runlist = true;
|
||||
r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
|
||||
dqm_unlock(dqm);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -26,15 +26,14 @@
|
||||
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include "kfd_priv.h"
|
||||
#include "kfd_mqd_manager.h"
|
||||
|
||||
#define KFD_UNMAP_LATENCY_MS (4000)
|
||||
#define QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS (2 * KFD_UNMAP_LATENCY_MS + 1000)
|
||||
|
||||
#define CIK_SDMA_QUEUES (4)
|
||||
#define CIK_SDMA_QUEUES_PER_ENGINE (2)
|
||||
#define CIK_SDMA_ENGINE_NUM (2)
|
||||
#define KFD_SDMA_QUEUES_PER_ENGINE (2)
|
||||
|
||||
struct device_process_node {
|
||||
struct qcm_process_device *qpd;
|
||||
@ -170,11 +169,12 @@ struct device_queue_manager {
|
||||
struct device_queue_manager_ops ops;
|
||||
struct device_queue_manager_asic_ops asic_ops;
|
||||
|
||||
struct mqd_manager *mqds[KFD_MQD_TYPE_MAX];
|
||||
struct mqd_manager *mqd_mgrs[KFD_MQD_TYPE_MAX];
|
||||
struct packet_manager packets;
|
||||
struct kfd_dev *dev;
|
||||
struct mutex lock;
|
||||
struct mutex lock_hidden; /* use dqm_lock/unlock(dqm) */
|
||||
struct list_head queues;
|
||||
unsigned int saved_flags;
|
||||
unsigned int processes_count;
|
||||
unsigned int queue_count;
|
||||
unsigned int sdma_queue_count;
|
||||
@ -190,6 +190,10 @@ struct device_queue_manager {
|
||||
struct kfd_mem_obj *fence_mem;
|
||||
bool active_runlist;
|
||||
int sched_policy;
|
||||
|
||||
/* hw exception */
|
||||
bool is_hws_hang;
|
||||
struct work_struct hw_exception_work;
|
||||
};
|
||||
|
||||
void device_queue_manager_init_cik(
|
||||
@ -207,6 +211,7 @@ void program_sh_mem_settings(struct device_queue_manager *dqm,
|
||||
unsigned int get_queues_num(struct device_queue_manager *dqm);
|
||||
unsigned int get_queues_per_pipe(struct device_queue_manager *dqm);
|
||||
unsigned int get_pipes_per_mec(struct device_queue_manager *dqm);
|
||||
unsigned int get_num_sdma_queues(struct device_queue_manager *dqm);
|
||||
|
||||
static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
|
||||
{
|
||||
@ -219,4 +224,19 @@ get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd)
|
||||
return (pdd->lds_base >> 60) & 0x0E;
|
||||
}
|
||||
|
||||
/* The DQM lock can be taken in MMU notifiers. Make sure no reclaim-FS
|
||||
* happens while holding this lock anywhere to prevent deadlocks when
|
||||
* an MMU notifier runs in reclaim-FS context.
|
||||
*/
|
||||
static inline void dqm_lock(struct device_queue_manager *dqm)
|
||||
{
|
||||
mutex_lock(&dqm->lock_hidden);
|
||||
dqm->saved_flags = memalloc_nofs_save();
|
||||
}
|
||||
static inline void dqm_unlock(struct device_queue_manager *dqm)
|
||||
{
|
||||
memalloc_nofs_restore(dqm->saved_flags);
|
||||
mutex_unlock(&dqm->lock_hidden);
|
||||
}
|
||||
|
||||
#endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */
|
||||
|
@ -60,7 +60,7 @@ static int update_qpd_v9(struct device_queue_manager *dqm,
|
||||
qpd->sh_mem_config =
|
||||
SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
|
||||
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
|
||||
if (vega10_noretry &&
|
||||
if (noretry &&
|
||||
!dqm->dev->device_info->needs_iommu_device)
|
||||
qpd->sh_mem_config |=
|
||||
1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
|
||||
|
@ -188,9 +188,9 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
|
||||
*doorbell_off = kfd->doorbell_id_offset + inx;
|
||||
|
||||
pr_debug("Get kernel queue doorbell\n"
|
||||
" doorbell offset == 0x%08X\n"
|
||||
" kernel address == %p\n",
|
||||
*doorbell_off, (kfd->doorbell_kernel_ptr + inx));
|
||||
" doorbell offset == 0x%08X\n"
|
||||
" doorbell index == 0x%x\n",
|
||||
*doorbell_off, inx);
|
||||
|
||||
return kfd->doorbell_kernel_ptr + inx;
|
||||
}
|
||||
@ -199,7 +199,8 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
|
||||
{
|
||||
unsigned int inx;
|
||||
|
||||
inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);
|
||||
inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr)
|
||||
* sizeof(u32) / kfd->device_info->doorbell_size;
|
||||
|
||||
mutex_lock(&kfd->doorbell_mutex);
|
||||
__clear_bit(inx, kfd->doorbell_available_index);
|
||||
|
@ -850,6 +850,13 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
|
||||
ev->memory_exception_data = *ev_data;
|
||||
}
|
||||
|
||||
if (type == KFD_EVENT_TYPE_MEMORY) {
|
||||
dev_warn(kfd_device,
|
||||
"Sending SIGSEGV to HSA Process with PID %d ",
|
||||
p->lead_thread->pid);
|
||||
send_sig(SIGSEGV, p->lead_thread, 0);
|
||||
}
|
||||
|
||||
/* Send SIGTERM no event of type "type" has been found*/
|
||||
if (send_signal) {
|
||||
if (send_sigterm) {
|
||||
@ -904,34 +911,41 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
|
||||
memory_exception_data.failure.NotPresent = 1;
|
||||
memory_exception_data.failure.NoExecute = 0;
|
||||
memory_exception_data.failure.ReadOnly = 0;
|
||||
if (vma) {
|
||||
if (vma->vm_start > address) {
|
||||
memory_exception_data.failure.NotPresent = 1;
|
||||
memory_exception_data.failure.NoExecute = 0;
|
||||
if (vma && address >= vma->vm_start) {
|
||||
memory_exception_data.failure.NotPresent = 0;
|
||||
|
||||
if (is_write_requested && !(vma->vm_flags & VM_WRITE))
|
||||
memory_exception_data.failure.ReadOnly = 1;
|
||||
else
|
||||
memory_exception_data.failure.ReadOnly = 0;
|
||||
} else {
|
||||
memory_exception_data.failure.NotPresent = 0;
|
||||
if (is_write_requested && !(vma->vm_flags & VM_WRITE))
|
||||
memory_exception_data.failure.ReadOnly = 1;
|
||||
else
|
||||
memory_exception_data.failure.ReadOnly = 0;
|
||||
if (is_execute_requested && !(vma->vm_flags & VM_EXEC))
|
||||
memory_exception_data.failure.NoExecute = 1;
|
||||
else
|
||||
memory_exception_data.failure.NoExecute = 0;
|
||||
}
|
||||
|
||||
if (is_execute_requested && !(vma->vm_flags & VM_EXEC))
|
||||
memory_exception_data.failure.NoExecute = 1;
|
||||
else
|
||||
memory_exception_data.failure.NoExecute = 0;
|
||||
}
|
||||
|
||||
up_read(&mm->mmap_sem);
|
||||
mmput(mm);
|
||||
|
||||
mutex_lock(&p->event_mutex);
|
||||
pr_debug("notpresent %d, noexecute %d, readonly %d\n",
|
||||
memory_exception_data.failure.NotPresent,
|
||||
memory_exception_data.failure.NoExecute,
|
||||
memory_exception_data.failure.ReadOnly);
|
||||
|
||||
/* Lookup events by type and signal them */
|
||||
lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_MEMORY,
|
||||
&memory_exception_data);
|
||||
/* Workaround on Raven to not kill the process when memory is freed
|
||||
* before IOMMU is able to finish processing all the excessive PPRs
|
||||
*/
|
||||
if (dev->device_info->asic_family != CHIP_RAVEN) {
|
||||
mutex_lock(&p->event_mutex);
|
||||
|
||||
/* Lookup events by type and signal them */
|
||||
lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_MEMORY,
|
||||
&memory_exception_data);
|
||||
|
||||
mutex_unlock(&p->event_mutex);
|
||||
}
|
||||
|
||||
mutex_unlock(&p->event_mutex);
|
||||
kfd_unref_process(p);
|
||||
}
|
||||
#endif /* KFD_SUPPORT_IOMMU_V2 */
|
||||
@ -956,3 +970,67 @@ void kfd_signal_hw_exception_event(unsigned int pasid)
|
||||
mutex_unlock(&p->event_mutex);
|
||||
kfd_unref_process(p);
|
||||
}
|
||||
|
||||
void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid,
|
||||
struct kfd_vm_fault_info *info)
|
||||
{
|
||||
struct kfd_event *ev;
|
||||
uint32_t id;
|
||||
struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
|
||||
struct kfd_hsa_memory_exception_data memory_exception_data;
|
||||
|
||||
if (!p)
|
||||
return; /* Presumably process exited. */
|
||||
memset(&memory_exception_data, 0, sizeof(memory_exception_data));
|
||||
memory_exception_data.gpu_id = dev->id;
|
||||
memory_exception_data.failure.imprecise = 1;
|
||||
/* Set failure reason */
|
||||
if (info) {
|
||||
memory_exception_data.va = (info->page_addr) << PAGE_SHIFT;
|
||||
memory_exception_data.failure.NotPresent =
|
||||
info->prot_valid ? 1 : 0;
|
||||
memory_exception_data.failure.NoExecute =
|
||||
info->prot_exec ? 1 : 0;
|
||||
memory_exception_data.failure.ReadOnly =
|
||||
info->prot_write ? 1 : 0;
|
||||
memory_exception_data.failure.imprecise = 0;
|
||||
}
|
||||
mutex_lock(&p->event_mutex);
|
||||
|
||||
id = KFD_FIRST_NONSIGNAL_EVENT_ID;
|
||||
idr_for_each_entry_continue(&p->event_idr, ev, id)
|
||||
if (ev->type == KFD_EVENT_TYPE_MEMORY) {
|
||||
ev->memory_exception_data = memory_exception_data;
|
||||
set_event(ev);
|
||||
}
|
||||
|
||||
mutex_unlock(&p->event_mutex);
|
||||
kfd_unref_process(p);
|
||||
}
|
||||
|
||||
void kfd_signal_reset_event(struct kfd_dev *dev)
|
||||
{
|
||||
struct kfd_hsa_hw_exception_data hw_exception_data;
|
||||
struct kfd_process *p;
|
||||
struct kfd_event *ev;
|
||||
unsigned int temp;
|
||||
uint32_t id, idx;
|
||||
|
||||
/* Whole gpu reset caused by GPU hang and memory is lost */
|
||||
memset(&hw_exception_data, 0, sizeof(hw_exception_data));
|
||||
hw_exception_data.gpu_id = dev->id;
|
||||
hw_exception_data.memory_lost = 1;
|
||||
|
||||
idx = srcu_read_lock(&kfd_processes_srcu);
|
||||
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
|
||||
mutex_lock(&p->event_mutex);
|
||||
id = KFD_FIRST_NONSIGNAL_EVENT_ID;
|
||||
idr_for_each_entry_continue(&p->event_idr, ev, id)
|
||||
if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
|
||||
ev->hw_exception_data = hw_exception_data;
|
||||
set_event(ev);
|
||||
}
|
||||
mutex_unlock(&p->event_mutex);
|
||||
}
|
||||
srcu_read_unlock(&kfd_processes_srcu, idx);
|
||||
}
|
||||
|
@ -66,6 +66,7 @@ struct kfd_event {
|
||||
/* type specific data */
|
||||
union {
|
||||
struct kfd_hsa_memory_exception_data memory_exception_data;
|
||||
struct kfd_hsa_hw_exception_data hw_exception_data;
|
||||
};
|
||||
};
|
||||
|
||||
|
@ -26,7 +26,9 @@
|
||||
|
||||
|
||||
static bool event_interrupt_isr_v9(struct kfd_dev *dev,
|
||||
const uint32_t *ih_ring_entry)
|
||||
const uint32_t *ih_ring_entry,
|
||||
uint32_t *patched_ihre,
|
||||
bool *patched_flag)
|
||||
{
|
||||
uint16_t source_id, client_id, pasid, vmid;
|
||||
const uint32_t *data = ih_ring_entry;
|
||||
@ -57,7 +59,9 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,
|
||||
return source_id == SOC15_INTSRC_CP_END_OF_PIPE ||
|
||||
source_id == SOC15_INTSRC_SDMA_TRAP ||
|
||||
source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG ||
|
||||
source_id == SOC15_INTSRC_CP_BAD_OPCODE;
|
||||
source_id == SOC15_INTSRC_CP_BAD_OPCODE ||
|
||||
client_id == SOC15_IH_CLIENTID_VMC ||
|
||||
client_id == SOC15_IH_CLIENTID_UTCL2;
|
||||
}
|
||||
|
||||
static void event_interrupt_wq_v9(struct kfd_dev *dev,
|
||||
@ -82,7 +86,19 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,
|
||||
kfd_signal_hw_exception_event(pasid);
|
||||
else if (client_id == SOC15_IH_CLIENTID_VMC ||
|
||||
client_id == SOC15_IH_CLIENTID_UTCL2) {
|
||||
/* TODO */
|
||||
struct kfd_vm_fault_info info = {0};
|
||||
uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry);
|
||||
|
||||
info.vmid = vmid;
|
||||
info.mc_id = client_id;
|
||||
info.page_addr = ih_ring_entry[4] |
|
||||
(uint64_t)(ih_ring_entry[5] & 0xf) << 32;
|
||||
info.prot_valid = ring_id & 0x08;
|
||||
info.prot_read = ring_id & 0x10;
|
||||
info.prot_write = ring_id & 0x20;
|
||||
|
||||
kfd_process_vm_fault(dev->dqm, pasid);
|
||||
kfd_signal_vm_fault_event(dev, pasid, &info);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -151,13 +151,15 @@ static void interrupt_wq(struct work_struct *work)
|
||||
ih_ring_entry);
|
||||
}
|
||||
|
||||
bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry)
|
||||
bool interrupt_is_wanted(struct kfd_dev *dev,
|
||||
const uint32_t *ih_ring_entry,
|
||||
uint32_t *patched_ihre, bool *flag)
|
||||
{
|
||||
/* integer and bitwise OR so there is no boolean short-circuiting */
|
||||
unsigned int wanted = 0;
|
||||
|
||||
wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev,
|
||||
ih_ring_entry);
|
||||
ih_ring_entry, patched_ihre, flag);
|
||||
|
||||
return wanted != 0;
|
||||
}
|
||||
|
@ -190,7 +190,7 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid,
|
||||
{
|
||||
struct kfd_dev *dev;
|
||||
|
||||
dev_warn(kfd_device,
|
||||
dev_warn_ratelimited(kfd_device,
|
||||
"Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X",
|
||||
PCI_BUS_NUM(pdev->devfn),
|
||||
PCI_SLOT(pdev->devfn),
|
||||
|
@ -59,7 +59,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
switch (type) {
|
||||
case KFD_QUEUE_TYPE_DIQ:
|
||||
case KFD_QUEUE_TYPE_HIQ:
|
||||
kq->mqd = dev->dqm->ops.get_mqd_manager(dev->dqm,
|
||||
kq->mqd_mgr = dev->dqm->ops.get_mqd_manager(dev->dqm,
|
||||
KFD_MQD_TYPE_HIQ);
|
||||
break;
|
||||
default:
|
||||
@ -67,7 +67,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!kq->mqd)
|
||||
if (!kq->mqd_mgr)
|
||||
return false;
|
||||
|
||||
prop.doorbell_ptr = kfd_get_kernel_doorbell(dev, &prop.doorbell_off);
|
||||
@ -123,6 +123,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
prop.write_ptr = (uint32_t *) kq->wptr_gpu_addr;
|
||||
prop.eop_ring_buffer_address = kq->eop_gpu_addr;
|
||||
prop.eop_ring_buffer_size = PAGE_SIZE;
|
||||
prop.cu_mask = NULL;
|
||||
|
||||
if (init_queue(&kq->queue, &prop) != 0)
|
||||
goto err_init_queue;
|
||||
@ -130,7 +131,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
kq->queue->device = dev;
|
||||
kq->queue->process = kfd_get_process(current);
|
||||
|
||||
retval = kq->mqd->init_mqd(kq->mqd, &kq->queue->mqd,
|
||||
retval = kq->mqd_mgr->init_mqd(kq->mqd_mgr, &kq->queue->mqd,
|
||||
&kq->queue->mqd_mem_obj,
|
||||
&kq->queue->gart_mqd_addr,
|
||||
&kq->queue->properties);
|
||||
@ -142,9 +143,9 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
pr_debug("Assigning hiq to hqd\n");
|
||||
kq->queue->pipe = KFD_CIK_HIQ_PIPE;
|
||||
kq->queue->queue = KFD_CIK_HIQ_QUEUE;
|
||||
kq->mqd->load_mqd(kq->mqd, kq->queue->mqd, kq->queue->pipe,
|
||||
kq->queue->queue, &kq->queue->properties,
|
||||
NULL);
|
||||
kq->mqd_mgr->load_mqd(kq->mqd_mgr, kq->queue->mqd,
|
||||
kq->queue->pipe, kq->queue->queue,
|
||||
&kq->queue->properties, NULL);
|
||||
} else {
|
||||
/* allocate fence for DIQ */
|
||||
|
||||
@ -182,7 +183,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
static void uninitialize(struct kernel_queue *kq)
|
||||
{
|
||||
if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ)
|
||||
kq->mqd->destroy_mqd(kq->mqd,
|
||||
kq->mqd_mgr->destroy_mqd(kq->mqd_mgr,
|
||||
kq->queue->mqd,
|
||||
KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
|
||||
KFD_UNMAP_LATENCY_MS,
|
||||
@ -191,7 +192,8 @@ static void uninitialize(struct kernel_queue *kq)
|
||||
else if (kq->queue->properties.type == KFD_QUEUE_TYPE_DIQ)
|
||||
kfd_gtt_sa_free(kq->dev, kq->fence_mem_obj);
|
||||
|
||||
kq->mqd->uninit_mqd(kq->mqd, kq->queue->mqd, kq->queue->mqd_mem_obj);
|
||||
kq->mqd_mgr->uninit_mqd(kq->mqd_mgr, kq->queue->mqd,
|
||||
kq->queue->mqd_mem_obj);
|
||||
|
||||
kfd_gtt_sa_free(kq->dev, kq->rptr_mem);
|
||||
kfd_gtt_sa_free(kq->dev, kq->wptr_mem);
|
||||
|
@ -70,7 +70,7 @@ struct kernel_queue {
|
||||
|
||||
/* data */
|
||||
struct kfd_dev *dev;
|
||||
struct mqd_manager *mqd;
|
||||
struct mqd_manager *mqd_mgr;
|
||||
struct queue *queue;
|
||||
uint64_t pending_wptr64;
|
||||
uint32_t pending_wptr;
|
||||
|
@ -47,6 +47,8 @@ static const struct kgd2kfd_calls kgd2kfd = {
|
||||
.resume_mm = kgd2kfd_resume_mm,
|
||||
.schedule_evict_and_restore_process =
|
||||
kgd2kfd_schedule_evict_and_restore_process,
|
||||
.pre_reset = kgd2kfd_pre_reset,
|
||||
.post_reset = kgd2kfd_post_reset,
|
||||
};
|
||||
|
||||
int sched_policy = KFD_SCHED_POLICY_HWS;
|
||||
@ -61,7 +63,7 @@ MODULE_PARM_DESC(hws_max_conc_proc,
|
||||
|
||||
int cwsr_enable = 1;
|
||||
module_param(cwsr_enable, int, 0444);
|
||||
MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = Off, 1 = On (Default))");
|
||||
MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = off, 1 = on (default))");
|
||||
|
||||
int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT;
|
||||
module_param(max_num_of_queues_per_device, int, 0444);
|
||||
@ -83,13 +85,19 @@ module_param(ignore_crat, int, 0444);
|
||||
MODULE_PARM_DESC(ignore_crat,
|
||||
"Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)");
|
||||
|
||||
int vega10_noretry;
|
||||
module_param_named(noretry, vega10_noretry, int, 0644);
|
||||
int noretry;
|
||||
module_param(noretry, int, 0644);
|
||||
MODULE_PARM_DESC(noretry,
|
||||
"Set sh_mem_config.retry_disable on Vega10 (0 = retry enabled (default), 1 = retry disabled)");
|
||||
"Set sh_mem_config.retry_disable on GFXv9+ dGPUs (0 = retry enabled (default), 1 = retry disabled)");
|
||||
|
||||
int halt_if_hws_hang;
|
||||
module_param(halt_if_hws_hang, int, 0644);
|
||||
MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)");
|
||||
|
||||
|
||||
static int amdkfd_init_completed;
|
||||
|
||||
|
||||
int kgd2kfd_init(unsigned int interface_version,
|
||||
const struct kgd2kfd_calls **g2f)
|
||||
{
|
||||
|
@ -21,7 +21,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "kfd_priv.h"
|
||||
#include "kfd_mqd_manager.h"
|
||||
|
||||
struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
|
||||
struct kfd_dev *dev)
|
||||
@ -48,3 +48,42 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
|
||||
const uint32_t *cu_mask, uint32_t cu_mask_count,
|
||||
uint32_t *se_mask)
|
||||
{
|
||||
struct kfd_cu_info cu_info;
|
||||
uint32_t cu_per_sh[4] = {0};
|
||||
int i, se, cu = 0;
|
||||
|
||||
mm->dev->kfd2kgd->get_cu_info(mm->dev->kgd, &cu_info);
|
||||
|
||||
if (cu_mask_count > cu_info.cu_active_number)
|
||||
cu_mask_count = cu_info.cu_active_number;
|
||||
|
||||
for (se = 0; se < cu_info.num_shader_engines; se++)
|
||||
for (i = 0; i < 4; i++)
|
||||
cu_per_sh[se] += hweight32(cu_info.cu_bitmap[se][i]);
|
||||
|
||||
/* Symmetrically map cu_mask to all SEs:
|
||||
* cu_mask[0] bit0 -> se_mask[0] bit0;
|
||||
* cu_mask[0] bit1 -> se_mask[1] bit0;
|
||||
* ... (if # SE is 4)
|
||||
* cu_mask[0] bit4 -> se_mask[0] bit1;
|
||||
* ...
|
||||
*/
|
||||
se = 0;
|
||||
for (i = 0; i < cu_mask_count; i++) {
|
||||
if (cu_mask[i / 32] & (1 << (i % 32)))
|
||||
se_mask[se] |= 1 << cu;
|
||||
|
||||
do {
|
||||
se++;
|
||||
if (se == cu_info.num_shader_engines) {
|
||||
se = 0;
|
||||
cu++;
|
||||
}
|
||||
} while (cu >= cu_per_sh[se] && cu < 32);
|
||||
}
|
||||
}
|
||||
|
@ -93,4 +93,8 @@ struct mqd_manager {
|
||||
struct kfd_dev *dev;
|
||||
};
|
||||
|
||||
void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
|
||||
const uint32_t *cu_mask, uint32_t cu_mask_count,
|
||||
uint32_t *se_mask);
|
||||
|
||||
#endif /* KFD_MQD_MANAGER_H_ */
|
||||
|
@ -41,6 +41,31 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
|
||||
return (struct cik_sdma_rlc_registers *)mqd;
|
||||
}
|
||||
|
||||
static void update_cu_mask(struct mqd_manager *mm, void *mqd,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
struct cik_mqd *m;
|
||||
uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */
|
||||
|
||||
if (q->cu_mask_count == 0)
|
||||
return;
|
||||
|
||||
mqd_symmetrically_map_cu_mask(mm,
|
||||
q->cu_mask, q->cu_mask_count, se_mask);
|
||||
|
||||
m = get_mqd(mqd);
|
||||
m->compute_static_thread_mgmt_se0 = se_mask[0];
|
||||
m->compute_static_thread_mgmt_se1 = se_mask[1];
|
||||
m->compute_static_thread_mgmt_se2 = se_mask[2];
|
||||
m->compute_static_thread_mgmt_se3 = se_mask[3];
|
||||
|
||||
pr_debug("Update cu mask to %#x %#x %#x %#x\n",
|
||||
m->compute_static_thread_mgmt_se0,
|
||||
m->compute_static_thread_mgmt_se1,
|
||||
m->compute_static_thread_mgmt_se2,
|
||||
m->compute_static_thread_mgmt_se3);
|
||||
}
|
||||
|
||||
static int init_mqd(struct mqd_manager *mm, void **mqd,
|
||||
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
|
||||
struct queue_properties *q)
|
||||
@ -196,6 +221,8 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
|
||||
if (q->format == KFD_QUEUE_FORMAT_AQL)
|
||||
m->cp_hqd_pq_control |= NO_UPDATE_RPTR;
|
||||
|
||||
update_cu_mask(mm, mqd, q);
|
||||
|
||||
q->is_active = (q->queue_size > 0 &&
|
||||
q->queue_address != 0 &&
|
||||
q->queue_percent > 0 &&
|
||||
@ -408,7 +435,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
|
||||
if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
|
||||
return NULL;
|
||||
|
||||
mqd = kzalloc(sizeof(*mqd), GFP_NOIO);
|
||||
mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
|
||||
if (!mqd)
|
||||
return NULL;
|
||||
|
||||
|
@ -41,6 +41,31 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
|
||||
return (struct v9_sdma_mqd *)mqd;
|
||||
}
|
||||
|
||||
static void update_cu_mask(struct mqd_manager *mm, void *mqd,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
struct v9_mqd *m;
|
||||
uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */
|
||||
|
||||
if (q->cu_mask_count == 0)
|
||||
return;
|
||||
|
||||
mqd_symmetrically_map_cu_mask(mm,
|
||||
q->cu_mask, q->cu_mask_count, se_mask);
|
||||
|
||||
m = get_mqd(mqd);
|
||||
m->compute_static_thread_mgmt_se0 = se_mask[0];
|
||||
m->compute_static_thread_mgmt_se1 = se_mask[1];
|
||||
m->compute_static_thread_mgmt_se2 = se_mask[2];
|
||||
m->compute_static_thread_mgmt_se3 = se_mask[3];
|
||||
|
||||
pr_debug("update cu mask to %#x %#x %#x %#x\n",
|
||||
m->compute_static_thread_mgmt_se0,
|
||||
m->compute_static_thread_mgmt_se1,
|
||||
m->compute_static_thread_mgmt_se2,
|
||||
m->compute_static_thread_mgmt_se3);
|
||||
}
|
||||
|
||||
static int init_mqd(struct mqd_manager *mm, void **mqd,
|
||||
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
|
||||
struct queue_properties *q)
|
||||
@ -55,7 +80,7 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
|
||||
* instead of sub-allocation function.
|
||||
*/
|
||||
if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
|
||||
*mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
|
||||
*mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
|
||||
if (!*mqd_mem_obj)
|
||||
return -ENOMEM;
|
||||
retval = kfd->kfd2kgd->init_gtt_mem_allocation(kfd->kgd,
|
||||
@ -198,6 +223,8 @@ static int update_mqd(struct mqd_manager *mm, void *mqd,
|
||||
if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address)
|
||||
m->cp_hqd_ctx_save_control = 0;
|
||||
|
||||
update_cu_mask(mm, mqd, q);
|
||||
|
||||
q->is_active = (q->queue_size > 0 &&
|
||||
q->queue_address != 0 &&
|
||||
q->queue_percent > 0 &&
|
||||
@ -393,7 +420,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
|
||||
if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
|
||||
return NULL;
|
||||
|
||||
mqd = kzalloc(sizeof(*mqd), GFP_NOIO);
|
||||
mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
|
||||
if (!mqd)
|
||||
return NULL;
|
||||
|
||||
|
@ -43,6 +43,31 @@ static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd)
|
||||
return (struct vi_sdma_mqd *)mqd;
|
||||
}
|
||||
|
||||
static void update_cu_mask(struct mqd_manager *mm, void *mqd,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
struct vi_mqd *m;
|
||||
uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */
|
||||
|
||||
if (q->cu_mask_count == 0)
|
||||
return;
|
||||
|
||||
mqd_symmetrically_map_cu_mask(mm,
|
||||
q->cu_mask, q->cu_mask_count, se_mask);
|
||||
|
||||
m = get_mqd(mqd);
|
||||
m->compute_static_thread_mgmt_se0 = se_mask[0];
|
||||
m->compute_static_thread_mgmt_se1 = se_mask[1];
|
||||
m->compute_static_thread_mgmt_se2 = se_mask[2];
|
||||
m->compute_static_thread_mgmt_se3 = se_mask[3];
|
||||
|
||||
pr_debug("Update cu mask to %#x %#x %#x %#x\n",
|
||||
m->compute_static_thread_mgmt_se0,
|
||||
m->compute_static_thread_mgmt_se1,
|
||||
m->compute_static_thread_mgmt_se2,
|
||||
m->compute_static_thread_mgmt_se3);
|
||||
}
|
||||
|
||||
static int init_mqd(struct mqd_manager *mm, void **mqd,
|
||||
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
|
||||
struct queue_properties *q)
|
||||
@ -196,6 +221,8 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
|
||||
atc_bit << CP_HQD_CTX_SAVE_CONTROL__ATC__SHIFT |
|
||||
mtype << CP_HQD_CTX_SAVE_CONTROL__MTYPE__SHIFT;
|
||||
|
||||
update_cu_mask(mm, mqd, q);
|
||||
|
||||
q->is_active = (q->queue_size > 0 &&
|
||||
q->queue_address != 0 &&
|
||||
q->queue_percent > 0 &&
|
||||
@ -394,7 +421,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
|
||||
if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
|
||||
return NULL;
|
||||
|
||||
mqd = kzalloc(sizeof(*mqd), GFP_NOIO);
|
||||
mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
|
||||
if (!mqd)
|
||||
return NULL;
|
||||
|
||||
|
@ -418,4 +418,30 @@ int pm_debugfs_runlist(struct seq_file *m, void *data)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int pm_debugfs_hang_hws(struct packet_manager *pm)
|
||||
{
|
||||
uint32_t *buffer, size;
|
||||
int r = 0;
|
||||
|
||||
size = pm->pmf->query_status_size;
|
||||
mutex_lock(&pm->lock);
|
||||
pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
|
||||
size / sizeof(uint32_t), (unsigned int **)&buffer);
|
||||
if (!buffer) {
|
||||
pr_err("Failed to allocate buffer on kernel queue\n");
|
||||
r = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
memset(buffer, 0x55, size);
|
||||
pm->priv_queue->ops.submit_packet(pm->priv_queue);
|
||||
|
||||
pr_info("Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.",
|
||||
buffer[0], buffer[1], buffer[2], buffer[3],
|
||||
buffer[4], buffer[5], buffer[6]);
|
||||
out:
|
||||
mutex_unlock(&pm->lock);
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -73,7 +73,7 @@
|
||||
|
||||
/*
|
||||
* When working with cp scheduler we should assign the HIQ manually or via
|
||||
* the radeon driver to a fixed hqd slot, here are the fixed HIQ hqd slot
|
||||
* the amdgpu driver to a fixed hqd slot, here are the fixed HIQ hqd slot
|
||||
* definitions for Kaveri. In Kaveri only the first ME queues participates
|
||||
* in the cp scheduling taking that in mind we set the HIQ slot in the
|
||||
* second ME.
|
||||
@ -142,7 +142,12 @@ extern int ignore_crat;
|
||||
/*
|
||||
* Set sh_mem_config.retry_disable on Vega10
|
||||
*/
|
||||
extern int vega10_noretry;
|
||||
extern int noretry;
|
||||
|
||||
/*
|
||||
* Halt if HWS hang is detected
|
||||
*/
|
||||
extern int halt_if_hws_hang;
|
||||
|
||||
/**
|
||||
* enum kfd_sched_policy
|
||||
@ -180,9 +185,10 @@ enum cache_policy {
|
||||
|
||||
struct kfd_event_interrupt_class {
|
||||
bool (*interrupt_isr)(struct kfd_dev *dev,
|
||||
const uint32_t *ih_ring_entry);
|
||||
const uint32_t *ih_ring_entry, uint32_t *patched_ihre,
|
||||
bool *patched_flag);
|
||||
void (*interrupt_wq)(struct kfd_dev *dev,
|
||||
const uint32_t *ih_ring_entry);
|
||||
const uint32_t *ih_ring_entry);
|
||||
};
|
||||
|
||||
struct kfd_device_info {
|
||||
@ -197,6 +203,7 @@ struct kfd_device_info {
|
||||
bool supports_cwsr;
|
||||
bool needs_iommu_device;
|
||||
bool needs_pci_atomics;
|
||||
unsigned int num_sdma_engines;
|
||||
};
|
||||
|
||||
struct kfd_mem_obj {
|
||||
@ -415,6 +422,9 @@ struct queue_properties {
|
||||
uint32_t ctl_stack_size;
|
||||
uint64_t tba_addr;
|
||||
uint64_t tma_addr;
|
||||
/* Relevant for CU */
|
||||
uint32_t cu_mask_count; /* Must be a multiple of 32 */
|
||||
uint32_t *cu_mask;
|
||||
};
|
||||
|
||||
/**
|
||||
@ -806,12 +816,18 @@ int kfd_interrupt_init(struct kfd_dev *dev);
|
||||
void kfd_interrupt_exit(struct kfd_dev *dev);
|
||||
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
|
||||
bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry);
|
||||
bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry);
|
||||
bool interrupt_is_wanted(struct kfd_dev *dev,
|
||||
const uint32_t *ih_ring_entry,
|
||||
uint32_t *patched_ihre, bool *flag);
|
||||
|
||||
/* Power Management */
|
||||
void kgd2kfd_suspend(struct kfd_dev *kfd);
|
||||
int kgd2kfd_resume(struct kfd_dev *kfd);
|
||||
|
||||
/* GPU reset */
|
||||
int kgd2kfd_pre_reset(struct kfd_dev *kfd);
|
||||
int kgd2kfd_post_reset(struct kfd_dev *kfd);
|
||||
|
||||
/* amdkfd Apertures */
|
||||
int kfd_init_apertures(struct kfd_process *process);
|
||||
|
||||
@ -838,6 +854,7 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm);
|
||||
struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
|
||||
enum kfd_queue_type type);
|
||||
void kernel_queue_uninit(struct kernel_queue *kq);
|
||||
int kfd_process_vm_fault(struct device_queue_manager *dqm, unsigned int pasid);
|
||||
|
||||
/* Process Queue Manager */
|
||||
struct process_queue_node {
|
||||
@ -858,6 +875,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
|
||||
int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid);
|
||||
int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
|
||||
struct queue_properties *p);
|
||||
int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid,
|
||||
struct queue_properties *p);
|
||||
struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,
|
||||
unsigned int qid);
|
||||
|
||||
@ -964,10 +983,17 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
|
||||
uint64_t *event_page_offset, uint32_t *event_slot_index);
|
||||
int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
|
||||
|
||||
void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid,
|
||||
struct kfd_vm_fault_info *info);
|
||||
|
||||
void kfd_signal_reset_event(struct kfd_dev *dev);
|
||||
|
||||
void kfd_flush_tlb(struct kfd_process_device *pdd);
|
||||
|
||||
int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
|
||||
|
||||
bool kfd_is_locked(void);
|
||||
|
||||
/* Debugfs */
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
|
||||
@ -980,6 +1006,10 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data);
|
||||
int kfd_debugfs_rls_by_device(struct seq_file *m, void *data);
|
||||
int pm_debugfs_runlist(struct seq_file *m, void *data);
|
||||
|
||||
int kfd_debugfs_hang_hws(struct kfd_dev *dev);
|
||||
int pm_debugfs_hang_hws(struct packet_manager *pm);
|
||||
int dqm_debugfs_execute_queues(struct device_queue_manager *dqm);
|
||||
|
||||
#else
|
||||
|
||||
static inline void kfd_debugfs_init(void) {}
|
||||
|
@ -244,6 +244,8 @@ struct kfd_process *kfd_get_process(const struct task_struct *thread)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
process = find_process(thread);
|
||||
if (!process)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
return process;
|
||||
}
|
||||
|
@ -186,8 +186,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
|
||||
|
||||
switch (type) {
|
||||
case KFD_QUEUE_TYPE_SDMA:
|
||||
if (dev->dqm->queue_count >=
|
||||
CIK_SDMA_QUEUES_PER_ENGINE * CIK_SDMA_ENGINE_NUM) {
|
||||
if (dev->dqm->queue_count >= get_num_sdma_queues(dev->dqm)) {
|
||||
pr_err("Over-subscription is not allowed for SDMA.\n");
|
||||
retval = -EPERM;
|
||||
goto err_create_queue;
|
||||
@ -209,7 +208,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
|
||||
KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
|
||||
((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) ||
|
||||
(dev->dqm->queue_count >= get_queues_num(dev->dqm)))) {
|
||||
pr_err("Over-subscription is not allowed in radeon_kfd.sched_policy == 1\n");
|
||||
pr_debug("Over-subscription is not allowed when amdkfd.sched_policy == 1\n");
|
||||
retval = -EPERM;
|
||||
goto err_create_queue;
|
||||
}
|
||||
@ -326,6 +325,8 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
|
||||
if (retval != -ETIME)
|
||||
goto err_destroy_queue;
|
||||
}
|
||||
kfree(pqn->q->properties.cu_mask);
|
||||
pqn->q->properties.cu_mask = NULL;
|
||||
uninit_queue(pqn->q);
|
||||
}
|
||||
|
||||
@ -366,6 +367,34 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid,
|
||||
struct queue_properties *p)
|
||||
{
|
||||
int retval;
|
||||
struct process_queue_node *pqn;
|
||||
|
||||
pqn = get_queue_by_qid(pqm, qid);
|
||||
if (!pqn) {
|
||||
pr_debug("No queue %d exists for update operation\n", qid);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
/* Free the old CU mask memory if it is already allocated, then
|
||||
* allocate memory for the new CU mask.
|
||||
*/
|
||||
kfree(pqn->q->properties.cu_mask);
|
||||
|
||||
pqn->q->properties.cu_mask_count = p->cu_mask_count;
|
||||
pqn->q->properties.cu_mask = p->cu_mask;
|
||||
|
||||
retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
|
||||
pqn->q);
|
||||
if (retval != 0)
|
||||
return retval;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct kernel_queue *pqm_get_kernel_queue(
|
||||
struct process_queue_manager *pqm,
|
||||
unsigned int qid)
|
||||
@ -387,7 +416,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
|
||||
struct process_queue_node *pqn;
|
||||
struct queue *q;
|
||||
enum KFD_MQD_TYPE mqd_type;
|
||||
struct mqd_manager *mqd_manager;
|
||||
struct mqd_manager *mqd_mgr;
|
||||
int r = 0;
|
||||
|
||||
list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
|
||||
@ -410,11 +439,11 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
|
||||
q->properties.type, q->device->id);
|
||||
continue;
|
||||
}
|
||||
mqd_manager = q->device->dqm->ops.get_mqd_manager(
|
||||
mqd_mgr = q->device->dqm->ops.get_mqd_manager(
|
||||
q->device->dqm, mqd_type);
|
||||
} else if (pqn->kq) {
|
||||
q = pqn->kq->queue;
|
||||
mqd_manager = pqn->kq->mqd;
|
||||
mqd_mgr = pqn->kq->mqd_mgr;
|
||||
switch (q->properties.type) {
|
||||
case KFD_QUEUE_TYPE_DIQ:
|
||||
seq_printf(m, " DIQ on device %x\n",
|
||||
@ -434,7 +463,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
|
||||
continue;
|
||||
}
|
||||
|
||||
r = mqd_manager->debugfs_show_mqd(m, q->mqd);
|
||||
r = mqd_mgr->debugfs_show_mqd(m, q->mqd);
|
||||
if (r != 0)
|
||||
break;
|
||||
}
|
||||
|
@ -47,6 +47,17 @@ enum kfd_preempt_type {
|
||||
KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
|
||||
};
|
||||
|
||||
struct kfd_vm_fault_info {
|
||||
uint64_t page_addr;
|
||||
uint32_t vmid;
|
||||
uint32_t mc_id;
|
||||
uint32_t status;
|
||||
bool prot_valid;
|
||||
bool prot_read;
|
||||
bool prot_write;
|
||||
bool prot_exec;
|
||||
};
|
||||
|
||||
struct kfd_cu_info {
|
||||
uint32_t num_shader_engines;
|
||||
uint32_t num_shader_arrays_per_engine;
|
||||
@ -259,6 +270,21 @@ struct tile_config {
|
||||
* IB to the corresponding ring (ring type). The IB is executed with the
|
||||
* specified VMID in a user mode context.
|
||||
*
|
||||
* @get_vm_fault_info: Return information about a recent VM fault on
|
||||
* GFXv7 and v8. If multiple VM faults occurred since the last call of
|
||||
* this function, it will return information about the first of those
|
||||
* faults. On GFXv9 VM fault information is fully contained in the IH
|
||||
* packet and this function is not needed.
|
||||
*
|
||||
* @read_vmid_from_vmfault_reg: On Hawaii the VMID is not set in the
|
||||
* IH ring entry. This function allows the KFD ISR to get the VMID
|
||||
* from the fault status register as early as possible.
|
||||
*
|
||||
* @gpu_recover: let kgd reset gpu after kfd detect CPC hang
|
||||
*
|
||||
* @set_compute_idle: Indicates that compute is idle on a device. This
|
||||
* can be used to change power profiles depending on compute activity.
|
||||
*
|
||||
* This structure contains function pointers to services that the kgd driver
|
||||
* provides to amdkfd driver.
|
||||
*
|
||||
@ -374,6 +400,14 @@ struct kfd2kgd_calls {
|
||||
int (*submit_ib)(struct kgd_dev *kgd, enum kgd_engine_type engine,
|
||||
uint32_t vmid, uint64_t gpu_addr,
|
||||
uint32_t *ib_cmd, uint32_t ib_len);
|
||||
|
||||
int (*get_vm_fault_info)(struct kgd_dev *kgd,
|
||||
struct kfd_vm_fault_info *info);
|
||||
uint32_t (*read_vmid_from_vmfault_reg)(struct kgd_dev *kgd);
|
||||
|
||||
void (*gpu_recover)(struct kgd_dev *kgd);
|
||||
|
||||
void (*set_compute_idle)(struct kgd_dev *kgd, bool idle);
|
||||
};
|
||||
|
||||
/**
|
||||
@ -399,6 +433,10 @@ struct kfd2kgd_calls {
|
||||
* @schedule_evict_and_restore_process: Schedules work queue that will prepare
|
||||
* for safe eviction of KFD BOs that belong to the specified process.
|
||||
*
|
||||
* @pre_reset: Notifies amdkfd that amdgpu about to reset the gpu
|
||||
*
|
||||
* @post_reset: Notify amdkfd that amgpu successfully reseted the gpu
|
||||
*
|
||||
* This structure contains function callback pointers so the kgd driver
|
||||
* will notify to the amdkfd about certain status changes.
|
||||
*
|
||||
@ -417,6 +455,8 @@ struct kgd2kfd_calls {
|
||||
int (*resume_mm)(struct mm_struct *mm);
|
||||
int (*schedule_evict_and_restore_process)(struct mm_struct *mm,
|
||||
struct dma_fence *fence);
|
||||
int (*pre_reset)(struct kfd_dev *kfd);
|
||||
int (*post_reset)(struct kfd_dev *kfd);
|
||||
};
|
||||
|
||||
int kgd2kfd_init(unsigned interface_version,
|
||||
|
@ -76,6 +76,12 @@ struct kfd_ioctl_update_queue_args {
|
||||
__u32 queue_priority; /* to KFD */
|
||||
};
|
||||
|
||||
struct kfd_ioctl_set_cu_mask_args {
|
||||
__u32 queue_id; /* to KFD */
|
||||
__u32 num_cu_mask; /* to KFD */
|
||||
__u64 cu_mask_ptr; /* to KFD */
|
||||
};
|
||||
|
||||
/* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */
|
||||
#define KFD_IOC_CACHE_POLICY_COHERENT 0
|
||||
#define KFD_IOC_CACHE_POLICY_NONCOHERENT 1
|
||||
@ -189,6 +195,15 @@ struct kfd_ioctl_dbg_wave_control_args {
|
||||
|
||||
#define KFD_SIGNAL_EVENT_LIMIT 4096
|
||||
|
||||
/* For kfd_event_data.hw_exception_data.reset_type. */
|
||||
#define KFD_HW_EXCEPTION_WHOLE_GPU_RESET 0
|
||||
#define KFD_HW_EXCEPTION_PER_ENGINE_RESET 1
|
||||
|
||||
/* For kfd_event_data.hw_exception_data.reset_cause. */
|
||||
#define KFD_HW_EXCEPTION_GPU_HANG 0
|
||||
#define KFD_HW_EXCEPTION_ECC 1
|
||||
|
||||
|
||||
struct kfd_ioctl_create_event_args {
|
||||
__u64 event_page_offset; /* from KFD */
|
||||
__u32 event_trigger_data; /* from KFD - signal events only */
|
||||
@ -219,7 +234,7 @@ struct kfd_memory_exception_failure {
|
||||
__u32 NotPresent; /* Page not present or supervisor privilege */
|
||||
__u32 ReadOnly; /* Write access to a read-only page */
|
||||
__u32 NoExecute; /* Execute access to a page marked NX */
|
||||
__u32 pad;
|
||||
__u32 imprecise; /* Can't determine the exact fault address */
|
||||
};
|
||||
|
||||
/* memory exception data*/
|
||||
@ -230,10 +245,19 @@ struct kfd_hsa_memory_exception_data {
|
||||
__u32 pad;
|
||||
};
|
||||
|
||||
/* Event data*/
|
||||
/* hw exception data */
|
||||
struct kfd_hsa_hw_exception_data {
|
||||
uint32_t reset_type;
|
||||
uint32_t reset_cause;
|
||||
uint32_t memory_lost;
|
||||
uint32_t gpu_id;
|
||||
};
|
||||
|
||||
/* Event data */
|
||||
struct kfd_event_data {
|
||||
union {
|
||||
struct kfd_hsa_memory_exception_data memory_exception_data;
|
||||
struct kfd_hsa_hw_exception_data hw_exception_data;
|
||||
}; /* From KFD */
|
||||
__u64 kfd_event_data_ext; /* pointer to an extension structure
|
||||
for future exception types */
|
||||
@ -448,7 +472,10 @@ struct kfd_ioctl_unmap_memory_from_gpu_args {
|
||||
#define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU \
|
||||
AMDKFD_IOWR(0x19, struct kfd_ioctl_unmap_memory_from_gpu_args)
|
||||
|
||||
#define AMDKFD_IOC_SET_CU_MASK \
|
||||
AMDKFD_IOW(0x1A, struct kfd_ioctl_set_cu_mask_args)
|
||||
|
||||
#define AMDKFD_COMMAND_START 0x01
|
||||
#define AMDKFD_COMMAND_END 0x1A
|
||||
#define AMDKFD_COMMAND_END 0x1B
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user