mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-25 23:49:36 +07:00
effd924d2f
Add amdgpu_queue_mgr, a mechanism that allows disjointing usermode's ring ids from the kernel's ring ids. The queue manager maintains a per-file descriptor map of user ring ids to amdgpu_ring pointers. Once a map is created it is permanent (this is required to maintain FIFO execution guarantees for a context's ring). Different queue map policies can be configured for each HW IP. Currently all HW IPs use the identity mapper, i.e. kernel ring id is equal to the user ring id. The purpose of this mechanism is to distribute the load across multiple queues more effectively for HW IPs that support multiple rings. Userspace clients are unable to check whether a specific resource is in use by a different client. Therefore, it is up to the kernel driver to make the optimal choice. v2: remove amdgpu_queue_mapper_funcs v3: made amdgpu_queue_mgr per context instead of per-fd v4: add context_put on error paths v5: rebase and include new IPs UVD_ENC & VCN_* v6: drop unused amdgpu_ring_is_valid_index (Alex) Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Andres Rodriguez <andresx7@gmail.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
328 lines
7.6 KiB
C
328 lines
7.6 KiB
C
/*
|
|
* Copyright 2015 Advanced Micro Devices, Inc.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
* OTHER DEALINGS IN THE SOFTWARE.
|
|
*
|
|
* Authors: monk liu <monk.liu@amd.com>
|
|
*/
|
|
|
|
#include <drm/drmP.h>
|
|
#include "amdgpu.h"
|
|
|
|
static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
|
|
{
|
|
unsigned i, j;
|
|
int r;
|
|
|
|
memset(ctx, 0, sizeof(*ctx));
|
|
ctx->adev = adev;
|
|
kref_init(&ctx->refcount);
|
|
spin_lock_init(&ctx->ring_lock);
|
|
ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS,
|
|
sizeof(struct dma_fence*), GFP_KERNEL);
|
|
if (!ctx->fences)
|
|
return -ENOMEM;
|
|
|
|
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
|
|
ctx->rings[i].sequence = 1;
|
|
ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
|
|
}
|
|
|
|
ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
|
|
|
|
/* create context entity for each ring */
|
|
for (i = 0; i < adev->num_rings; i++) {
|
|
struct amdgpu_ring *ring = adev->rings[i];
|
|
struct amd_sched_rq *rq;
|
|
|
|
rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
|
|
|
|
if (ring == &adev->gfx.kiq.ring)
|
|
continue;
|
|
|
|
r = amd_sched_entity_init(&ring->sched, &ctx->rings[i].entity,
|
|
rq, amdgpu_sched_jobs);
|
|
if (r)
|
|
goto failed;
|
|
}
|
|
|
|
r = amdgpu_queue_mgr_init(adev, &ctx->queue_mgr);
|
|
if (r)
|
|
goto failed;
|
|
|
|
return 0;
|
|
|
|
failed:
|
|
for (j = 0; j < i; j++)
|
|
amd_sched_entity_fini(&adev->rings[j]->sched,
|
|
&ctx->rings[j].entity);
|
|
kfree(ctx->fences);
|
|
ctx->fences = NULL;
|
|
return r;
|
|
}
|
|
|
|
static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
|
|
{
|
|
struct amdgpu_device *adev = ctx->adev;
|
|
unsigned i, j;
|
|
|
|
if (!adev)
|
|
return;
|
|
|
|
for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
|
|
for (j = 0; j < amdgpu_sched_jobs; ++j)
|
|
dma_fence_put(ctx->rings[i].fences[j]);
|
|
kfree(ctx->fences);
|
|
ctx->fences = NULL;
|
|
|
|
for (i = 0; i < adev->num_rings; i++)
|
|
amd_sched_entity_fini(&adev->rings[i]->sched,
|
|
&ctx->rings[i].entity);
|
|
|
|
amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
|
|
}
|
|
|
|
static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
|
|
struct amdgpu_fpriv *fpriv,
|
|
uint32_t *id)
|
|
{
|
|
struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
|
|
struct amdgpu_ctx *ctx;
|
|
int r;
|
|
|
|
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
|
|
if (!ctx)
|
|
return -ENOMEM;
|
|
|
|
mutex_lock(&mgr->lock);
|
|
r = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL);
|
|
if (r < 0) {
|
|
mutex_unlock(&mgr->lock);
|
|
kfree(ctx);
|
|
return r;
|
|
}
|
|
*id = (uint32_t)r;
|
|
r = amdgpu_ctx_init(adev, ctx);
|
|
if (r) {
|
|
idr_remove(&mgr->ctx_handles, *id);
|
|
*id = 0;
|
|
kfree(ctx);
|
|
}
|
|
mutex_unlock(&mgr->lock);
|
|
return r;
|
|
}
|
|
|
|
static void amdgpu_ctx_do_release(struct kref *ref)
|
|
{
|
|
struct amdgpu_ctx *ctx;
|
|
|
|
ctx = container_of(ref, struct amdgpu_ctx, refcount);
|
|
|
|
amdgpu_ctx_fini(ctx);
|
|
|
|
kfree(ctx);
|
|
}
|
|
|
|
static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id)
|
|
{
|
|
struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
|
|
struct amdgpu_ctx *ctx;
|
|
|
|
mutex_lock(&mgr->lock);
|
|
ctx = idr_remove(&mgr->ctx_handles, id);
|
|
if (ctx)
|
|
kref_put(&ctx->refcount, amdgpu_ctx_do_release);
|
|
mutex_unlock(&mgr->lock);
|
|
return ctx ? 0 : -EINVAL;
|
|
}
|
|
|
|
static int amdgpu_ctx_query(struct amdgpu_device *adev,
|
|
struct amdgpu_fpriv *fpriv, uint32_t id,
|
|
union drm_amdgpu_ctx_out *out)
|
|
{
|
|
struct amdgpu_ctx *ctx;
|
|
struct amdgpu_ctx_mgr *mgr;
|
|
unsigned reset_counter;
|
|
|
|
if (!fpriv)
|
|
return -EINVAL;
|
|
|
|
mgr = &fpriv->ctx_mgr;
|
|
mutex_lock(&mgr->lock);
|
|
ctx = idr_find(&mgr->ctx_handles, id);
|
|
if (!ctx) {
|
|
mutex_unlock(&mgr->lock);
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* TODO: these two are always zero */
|
|
out->state.flags = 0x0;
|
|
out->state.hangs = 0x0;
|
|
|
|
/* determine if a GPU reset has occured since the last call */
|
|
reset_counter = atomic_read(&adev->gpu_reset_counter);
|
|
/* TODO: this should ideally return NO, GUILTY, or INNOCENT. */
|
|
if (ctx->reset_counter == reset_counter)
|
|
out->state.reset_status = AMDGPU_CTX_NO_RESET;
|
|
else
|
|
out->state.reset_status = AMDGPU_CTX_UNKNOWN_RESET;
|
|
ctx->reset_counter = reset_counter;
|
|
|
|
mutex_unlock(&mgr->lock);
|
|
return 0;
|
|
}
|
|
|
|
int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
|
|
struct drm_file *filp)
|
|
{
|
|
int r;
|
|
uint32_t id;
|
|
|
|
union drm_amdgpu_ctx *args = data;
|
|
struct amdgpu_device *adev = dev->dev_private;
|
|
struct amdgpu_fpriv *fpriv = filp->driver_priv;
|
|
|
|
r = 0;
|
|
id = args->in.ctx_id;
|
|
|
|
switch (args->in.op) {
|
|
case AMDGPU_CTX_OP_ALLOC_CTX:
|
|
r = amdgpu_ctx_alloc(adev, fpriv, &id);
|
|
args->out.alloc.ctx_id = id;
|
|
break;
|
|
case AMDGPU_CTX_OP_FREE_CTX:
|
|
r = amdgpu_ctx_free(fpriv, id);
|
|
break;
|
|
case AMDGPU_CTX_OP_QUERY_STATE:
|
|
r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
|
|
break;
|
|
default:
|
|
return -EINVAL;
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id)
|
|
{
|
|
struct amdgpu_ctx *ctx;
|
|
struct amdgpu_ctx_mgr *mgr;
|
|
|
|
if (!fpriv)
|
|
return NULL;
|
|
|
|
mgr = &fpriv->ctx_mgr;
|
|
|
|
mutex_lock(&mgr->lock);
|
|
ctx = idr_find(&mgr->ctx_handles, id);
|
|
if (ctx)
|
|
kref_get(&ctx->refcount);
|
|
mutex_unlock(&mgr->lock);
|
|
return ctx;
|
|
}
|
|
|
|
int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
|
|
{
|
|
if (ctx == NULL)
|
|
return -EINVAL;
|
|
|
|
kref_put(&ctx->refcount, amdgpu_ctx_do_release);
|
|
return 0;
|
|
}
|
|
|
|
uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
|
|
struct dma_fence *fence)
|
|
{
|
|
struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
|
|
uint64_t seq = cring->sequence;
|
|
unsigned idx = 0;
|
|
struct dma_fence *other = NULL;
|
|
|
|
idx = seq & (amdgpu_sched_jobs - 1);
|
|
other = cring->fences[idx];
|
|
if (other) {
|
|
signed long r;
|
|
r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
|
|
if (r < 0)
|
|
DRM_ERROR("Error (%ld) waiting for fence!\n", r);
|
|
}
|
|
|
|
dma_fence_get(fence);
|
|
|
|
spin_lock(&ctx->ring_lock);
|
|
cring->fences[idx] = fence;
|
|
cring->sequence++;
|
|
spin_unlock(&ctx->ring_lock);
|
|
|
|
dma_fence_put(other);
|
|
|
|
return seq;
|
|
}
|
|
|
|
struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
|
|
struct amdgpu_ring *ring, uint64_t seq)
|
|
{
|
|
struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
|
|
struct dma_fence *fence;
|
|
|
|
spin_lock(&ctx->ring_lock);
|
|
|
|
if (seq == ~0ull)
|
|
seq = ctx->rings[ring->idx].sequence - 1;
|
|
|
|
if (seq >= cring->sequence) {
|
|
spin_unlock(&ctx->ring_lock);
|
|
return ERR_PTR(-EINVAL);
|
|
}
|
|
|
|
|
|
if (seq + amdgpu_sched_jobs < cring->sequence) {
|
|
spin_unlock(&ctx->ring_lock);
|
|
return NULL;
|
|
}
|
|
|
|
fence = dma_fence_get(cring->fences[seq & (amdgpu_sched_jobs - 1)]);
|
|
spin_unlock(&ctx->ring_lock);
|
|
|
|
return fence;
|
|
}
|
|
|
|
void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
|
|
{
|
|
mutex_init(&mgr->lock);
|
|
idr_init(&mgr->ctx_handles);
|
|
}
|
|
|
|
void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
|
|
{
|
|
struct amdgpu_ctx *ctx;
|
|
struct idr *idp;
|
|
uint32_t id;
|
|
|
|
idp = &mgr->ctx_handles;
|
|
|
|
idr_for_each_entry(idp, ctx, id) {
|
|
if (kref_put(&ctx->refcount, amdgpu_ctx_do_release) != 1)
|
|
DRM_ERROR("ctx %p is still alive\n", ctx);
|
|
}
|
|
|
|
idr_destroy(&mgr->ctx_handles);
|
|
mutex_destroy(&mgr->lock);
|
|
}
|