mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-28 11:18:45 +07:00
drm/amdgpu: remove distinction between explicit and implicit sync (v2)
According to Marek a pipeline sync should be inserted for implicit syncs well. v2: bump the driver version Signed-off-by: Christian König <christian.koenig@amd.com> Tested-by: Marek Olšák <marek.olsak@amd.com> Signed-off-by: Marek Olšák <marek.olsak@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
614c56111c
commit
174b328bc8
@ -395,7 +395,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return amdgpu_sync_fence(sync, vm->last_update, false);
|
||||
return amdgpu_sync_fence(sync, vm->last_update);
|
||||
}
|
||||
|
||||
static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
|
||||
@ -785,7 +785,7 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
|
||||
|
||||
amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
|
||||
|
||||
amdgpu_sync_fence(sync, bo_va->last_pt_update, false);
|
||||
amdgpu_sync_fence(sync, bo_va->last_pt_update);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -804,7 +804,7 @@ static int update_gpuvm_pte(struct amdgpu_device *adev,
|
||||
return ret;
|
||||
}
|
||||
|
||||
return amdgpu_sync_fence(sync, bo_va->last_pt_update, false);
|
||||
return amdgpu_sync_fence(sync, bo_va->last_pt_update);
|
||||
}
|
||||
|
||||
static int map_bo_to_gpuvm(struct amdgpu_device *adev,
|
||||
@ -2102,7 +2102,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
|
||||
pr_debug("Memory eviction: Validate BOs failed. Try again\n");
|
||||
goto validate_map_fail;
|
||||
}
|
||||
ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving, false);
|
||||
ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving);
|
||||
if (ret) {
|
||||
pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
|
||||
goto validate_map_fail;
|
||||
|
@ -992,7 +992,7 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
|
||||
dma_fence_put(old);
|
||||
}
|
||||
|
||||
r = amdgpu_sync_fence(&p->job->sync, fence, true);
|
||||
r = amdgpu_sync_fence(&p->job->sync, fence);
|
||||
dma_fence_put(fence);
|
||||
if (r)
|
||||
return r;
|
||||
@ -1014,7 +1014,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
|
||||
return r;
|
||||
}
|
||||
|
||||
r = amdgpu_sync_fence(&p->job->sync, fence, true);
|
||||
r = amdgpu_sync_fence(&p->job->sync, fence);
|
||||
dma_fence_put(fence);
|
||||
|
||||
return r;
|
||||
|
@ -87,9 +87,10 @@
|
||||
* - 3.36.0 - Allow reading more status registers on si/cik
|
||||
* - 3.37.0 - L2 is invalidated before SDMA IBs, needed for correctness
|
||||
* - 3.38.0 - Add AMDGPU_IB_FLAG_EMIT_MEM_SYNC
|
||||
* - 3.39.0 - DMABUF implicit sync does a full pipeline sync
|
||||
*/
|
||||
#define KMS_DRIVER_MAJOR 3
|
||||
#define KMS_DRIVER_MINOR 38
|
||||
#define KMS_DRIVER_MINOR 39
|
||||
#define KMS_DRIVER_PATCHLEVEL 0
|
||||
|
||||
int amdgpu_vram_limit = 0;
|
||||
|
@ -178,7 +178,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
|
||||
|
||||
need_ctx_switch = ring->current_ctx != fence_ctx;
|
||||
if (ring->funcs->emit_pipeline_sync && job &&
|
||||
((tmp = amdgpu_sync_get_fence(&job->sched_sync, NULL)) ||
|
||||
((tmp = amdgpu_sync_get_fence(&job->sched_sync)) ||
|
||||
(amdgpu_sriov_vf(adev) && need_ctx_switch) ||
|
||||
amdgpu_vm_need_pipeline_sync(ring, job))) {
|
||||
need_pipe_sync = true;
|
||||
|
@ -206,7 +206,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
|
||||
int r;
|
||||
|
||||
if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait))
|
||||
return amdgpu_sync_fence(sync, ring->vmid_wait, false);
|
||||
return amdgpu_sync_fence(sync, ring->vmid_wait);
|
||||
|
||||
fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL);
|
||||
if (!fences)
|
||||
@ -241,7 +241,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
r = amdgpu_sync_fence(sync, &array->base, false);
|
||||
r = amdgpu_sync_fence(sync, &array->base);
|
||||
dma_fence_put(ring->vmid_wait);
|
||||
ring->vmid_wait = &array->base;
|
||||
return r;
|
||||
@ -294,7 +294,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
|
||||
tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);
|
||||
if (tmp) {
|
||||
*id = NULL;
|
||||
r = amdgpu_sync_fence(sync, tmp, false);
|
||||
r = amdgpu_sync_fence(sync, tmp);
|
||||
return r;
|
||||
}
|
||||
needs_flush = true;
|
||||
@ -303,7 +303,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
|
||||
/* Good we can use this VMID. Remember this submission as
|
||||
* user of the VMID.
|
||||
*/
|
||||
r = amdgpu_sync_fence(&(*id)->active, fence, false);
|
||||
r = amdgpu_sync_fence(&(*id)->active, fence);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
@ -375,7 +375,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
|
||||
/* Good, we can use this VMID. Remember this submission as
|
||||
* user of the VMID.
|
||||
*/
|
||||
r = amdgpu_sync_fence(&(*id)->active, fence, false);
|
||||
r = amdgpu_sync_fence(&(*id)->active, fence);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
@ -435,7 +435,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
|
||||
id = idle;
|
||||
|
||||
/* Remember this submission as user of the VMID */
|
||||
r = amdgpu_sync_fence(&id->active, fence, false);
|
||||
r = amdgpu_sync_fence(&id->active, fence);
|
||||
if (r)
|
||||
goto error;
|
||||
|
||||
|
@ -183,16 +183,13 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job,
|
||||
struct amdgpu_job *job = to_amdgpu_job(sched_job);
|
||||
struct amdgpu_vm *vm = job->vm;
|
||||
struct dma_fence *fence;
|
||||
bool explicit = false;
|
||||
int r;
|
||||
|
||||
fence = amdgpu_sync_get_fence(&job->sync, &explicit);
|
||||
if (fence && explicit) {
|
||||
if (drm_sched_dependency_optimized(fence, s_entity)) {
|
||||
r = amdgpu_sync_fence(&job->sched_sync, fence, false);
|
||||
if (r)
|
||||
DRM_ERROR("Error adding fence (%d)\n", r);
|
||||
}
|
||||
fence = amdgpu_sync_get_fence(&job->sync);
|
||||
if (fence && drm_sched_dependency_optimized(fence, s_entity)) {
|
||||
r = amdgpu_sync_fence(&job->sched_sync, fence);
|
||||
if (r)
|
||||
DRM_ERROR("Error adding fence (%d)\n", r);
|
||||
}
|
||||
|
||||
while (fence == NULL && vm && !job->vmid) {
|
||||
@ -202,7 +199,7 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job,
|
||||
if (r)
|
||||
DRM_ERROR("Error getting VM ID (%d)\n", r);
|
||||
|
||||
fence = amdgpu_sync_get_fence(&job->sync, NULL);
|
||||
fence = amdgpu_sync_get_fence(&job->sync);
|
||||
}
|
||||
|
||||
return fence;
|
||||
|
@ -35,7 +35,6 @@
|
||||
struct amdgpu_sync_entry {
|
||||
struct hlist_node node;
|
||||
struct dma_fence *fence;
|
||||
bool explicit;
|
||||
};
|
||||
|
||||
static struct kmem_cache *amdgpu_sync_slab;
|
||||
@ -129,8 +128,7 @@ static void amdgpu_sync_keep_later(struct dma_fence **keep,
|
||||
* Tries to add the fence to an existing hash entry. Returns true when an entry
|
||||
* was found, false otherwise.
|
||||
*/
|
||||
static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f,
|
||||
bool explicit)
|
||||
static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f)
|
||||
{
|
||||
struct amdgpu_sync_entry *e;
|
||||
|
||||
@ -139,10 +137,6 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f,
|
||||
continue;
|
||||
|
||||
amdgpu_sync_keep_later(&e->fence, f);
|
||||
|
||||
/* Preserve eplicit flag to not loose pipe line sync */
|
||||
e->explicit |= explicit;
|
||||
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -153,27 +147,23 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f,
|
||||
*
|
||||
* @sync: sync object to add fence to
|
||||
* @f: fence to sync to
|
||||
* @explicit: if this is an explicit dependency
|
||||
*
|
||||
* Add the fence to the sync object.
|
||||
*/
|
||||
int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
|
||||
bool explicit)
|
||||
int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f)
|
||||
{
|
||||
struct amdgpu_sync_entry *e;
|
||||
|
||||
if (!f)
|
||||
return 0;
|
||||
|
||||
if (amdgpu_sync_add_later(sync, f, explicit))
|
||||
if (amdgpu_sync_add_later(sync, f))
|
||||
return 0;
|
||||
|
||||
e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL);
|
||||
if (!e)
|
||||
return -ENOMEM;
|
||||
|
||||
e->explicit = explicit;
|
||||
|
||||
hash_add(sync->fences, &e->node, f->context);
|
||||
e->fence = dma_fence_get(f);
|
||||
return 0;
|
||||
@ -194,7 +184,7 @@ int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
|
||||
return 0;
|
||||
|
||||
amdgpu_sync_keep_later(&sync->last_vm_update, fence);
|
||||
return amdgpu_sync_fence(sync, fence, false);
|
||||
return amdgpu_sync_fence(sync, fence);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -221,7 +211,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
|
||||
|
||||
/* always sync to the exclusive fence */
|
||||
f = dma_resv_get_excl(resv);
|
||||
r = amdgpu_sync_fence(sync, f, false);
|
||||
r = amdgpu_sync_fence(sync, f);
|
||||
|
||||
flist = dma_resv_get_list(resv);
|
||||
if (!flist || r)
|
||||
@ -237,7 +227,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
|
||||
|
||||
/* Always sync to moves, no matter what */
|
||||
if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED) {
|
||||
r = amdgpu_sync_fence(sync, f, false);
|
||||
r = amdgpu_sync_fence(sync, f);
|
||||
if (r)
|
||||
break;
|
||||
}
|
||||
@ -277,7 +267,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
|
||||
|
||||
WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
|
||||
"Adding eviction fence to sync obj");
|
||||
r = amdgpu_sync_fence(sync, f, false);
|
||||
r = amdgpu_sync_fence(sync, f);
|
||||
if (r)
|
||||
break;
|
||||
}
|
||||
@ -332,11 +322,10 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
|
||||
* amdgpu_sync_get_fence - get the next fence from the sync object
|
||||
*
|
||||
* @sync: sync object to use
|
||||
* @explicit: true if the next fence is explicit
|
||||
*
|
||||
* Get and removes the next fence from the sync object not signaled yet.
|
||||
*/
|
||||
struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit)
|
||||
struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
|
||||
{
|
||||
struct amdgpu_sync_entry *e;
|
||||
struct hlist_node *tmp;
|
||||
@ -345,8 +334,6 @@ struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit
|
||||
hash_for_each_safe(sync->fences, i, tmp, e, node) {
|
||||
|
||||
f = e->fence;
|
||||
if (explicit)
|
||||
*explicit = e->explicit;
|
||||
|
||||
hash_del(&e->node);
|
||||
kmem_cache_free(amdgpu_sync_slab, e);
|
||||
@ -378,7 +365,7 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
|
||||
hash_for_each_safe(source->fences, i, tmp, e, node) {
|
||||
f = e->fence;
|
||||
if (!dma_fence_is_signaled(f)) {
|
||||
r = amdgpu_sync_fence(clone, f, e->explicit);
|
||||
r = amdgpu_sync_fence(clone, f);
|
||||
if (r)
|
||||
return r;
|
||||
} else {
|
||||
|
@ -47,16 +47,14 @@ struct amdgpu_sync {
|
||||
};
|
||||
|
||||
void amdgpu_sync_create(struct amdgpu_sync *sync);
|
||||
int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
|
||||
bool explicit);
|
||||
int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f);
|
||||
int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence);
|
||||
int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
|
||||
struct dma_resv *resv, enum amdgpu_sync_mode mode,
|
||||
void *owner);
|
||||
struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
|
||||
struct amdgpu_ring *ring);
|
||||
struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync,
|
||||
bool *explicit);
|
||||
struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
|
||||
int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone);
|
||||
int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr);
|
||||
void amdgpu_sync_free(struct amdgpu_sync *sync);
|
||||
|
@ -208,7 +208,7 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
|
||||
int r;
|
||||
|
||||
/* Wait for PD/PT moves to be completed */
|
||||
r = amdgpu_sync_fence(&p->job->sync, bo->tbo.moving, false);
|
||||
r = amdgpu_sync_fence(&p->job->sync, bo->tbo.moving);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user