mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-28 11:18:45 +07:00
b75f22505a
Add the following two operations to the CS IOCTL: Signal: The signal operation is basically a command submission, that is created by the driver upon user request. It will be implemented using a dedicated PQE that will increment a specific SOB. There will be a new flag: HL_CS_FLAGS_SIGNAL. When the user set this flag in the CS IOCTL structure, the driver will execute a dedicated code path that will prepare this special PQE and submit it. The user only needs to provide a queue index on which to put the signal. Wait: The wait operation is also a command submission that is created by the driver upon user request. It will be implemented using a dedicated PQE that will contain packets of "ARM a monitor" + FENCE packet. There will be a new flag: HL_CS_FLAGS_WAIT. When the user set this flag in the CS structure, the driver will execute a dedicated code path that will prepare this special PQE and submit it. The user needs to provide the following parameters: 1. queue ID 2. an array of signal_seq numbers and the number of signals to wait on (the length of signal_seq_arr). The IOCTL will return the CS sequence number of the wait it put on the queue ID. Currently, the code supports signal_seq_nr==1. But this API definition will allow us to put a single PQE that waits on multiple signals. To correctly configure the monitor and fence, the driver will need to retrieve the specified signal CS object that contains the relevant SOB and its expected value. In case the signal CS has already been completed, there is no point of adding a wait operation. In this case, the driver will return to the user *without* putting anything on the PQ. The return code should reflect to the user that the signal was completed, as we won't return a CS sequence number for this wait. Signed-off-by: Omer Shpigelman <oshpigelman@habana.ai> Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
231 lines
4.8 KiB
C
231 lines
4.8 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
/*
|
|
* Copyright 2016-2019 HabanaLabs, Ltd.
|
|
* All Rights Reserved.
|
|
*/
|
|
|
|
#include "habanalabs.h"
|
|
|
|
#include <linux/slab.h>
|
|
|
|
static void hl_ctx_fini(struct hl_ctx *ctx)
|
|
{
|
|
struct hl_device *hdev = ctx->hdev;
|
|
int i;
|
|
|
|
/*
|
|
* If we arrived here, there are no jobs waiting for this context
|
|
* on its queues so we can safely remove it.
|
|
* This is because for each CS, we increment the ref count and for
|
|
* every CS that was finished we decrement it and we won't arrive
|
|
* to this function unless the ref count is 0
|
|
*/
|
|
|
|
for (i = 0 ; i < HL_MAX_PENDING_CS ; i++)
|
|
dma_fence_put(ctx->cs_pending[i]);
|
|
|
|
if (ctx->asid != HL_KERNEL_ASID_ID) {
|
|
/* The engines are stopped as there is no executing CS, but the
|
|
* Coresight might be still working by accessing addresses
|
|
* related to the stopped engines. Hence stop it explicitly.
|
|
* Stop only if this is the compute context, as there can be
|
|
* only one compute context
|
|
*/
|
|
if ((hdev->in_debug) && (hdev->compute_ctx == ctx))
|
|
hl_device_set_debug_mode(hdev, false);
|
|
|
|
hl_vm_ctx_fini(ctx);
|
|
hl_asid_free(hdev, ctx->asid);
|
|
} else {
|
|
hl_mmu_ctx_fini(ctx);
|
|
}
|
|
}
|
|
|
|
void hl_ctx_do_release(struct kref *ref)
|
|
{
|
|
struct hl_ctx *ctx;
|
|
|
|
ctx = container_of(ref, struct hl_ctx, refcount);
|
|
|
|
hl_ctx_fini(ctx);
|
|
|
|
if (ctx->hpriv)
|
|
hl_hpriv_put(ctx->hpriv);
|
|
|
|
kfree(ctx);
|
|
}
|
|
|
|
int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv)
|
|
{
|
|
struct hl_ctx_mgr *mgr = &hpriv->ctx_mgr;
|
|
struct hl_ctx *ctx;
|
|
int rc;
|
|
|
|
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
|
|
if (!ctx) {
|
|
rc = -ENOMEM;
|
|
goto out_err;
|
|
}
|
|
|
|
mutex_lock(&mgr->ctx_lock);
|
|
rc = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL);
|
|
mutex_unlock(&mgr->ctx_lock);
|
|
|
|
if (rc < 0) {
|
|
dev_err(hdev->dev, "Failed to allocate IDR for a new CTX\n");
|
|
goto free_ctx;
|
|
}
|
|
|
|
ctx->handle = rc;
|
|
|
|
rc = hl_ctx_init(hdev, ctx, false);
|
|
if (rc)
|
|
goto remove_from_idr;
|
|
|
|
hl_hpriv_get(hpriv);
|
|
ctx->hpriv = hpriv;
|
|
|
|
/* TODO: remove for multiple contexts per process */
|
|
hpriv->ctx = ctx;
|
|
|
|
/* TODO: remove the following line for multiple process support */
|
|
hdev->compute_ctx = ctx;
|
|
|
|
return 0;
|
|
|
|
remove_from_idr:
|
|
mutex_lock(&mgr->ctx_lock);
|
|
idr_remove(&mgr->ctx_handles, ctx->handle);
|
|
mutex_unlock(&mgr->ctx_lock);
|
|
free_ctx:
|
|
kfree(ctx);
|
|
out_err:
|
|
return rc;
|
|
}
|
|
|
|
void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx)
|
|
{
|
|
if (kref_put(&ctx->refcount, hl_ctx_do_release) == 1)
|
|
return;
|
|
|
|
dev_warn(hdev->dev,
|
|
"Context %d closed or terminated but its CS are executing\n",
|
|
ctx->asid);
|
|
}
|
|
|
|
int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
|
|
{
|
|
int rc = 0;
|
|
|
|
ctx->hdev = hdev;
|
|
|
|
kref_init(&ctx->refcount);
|
|
|
|
ctx->cs_sequence = 1;
|
|
spin_lock_init(&ctx->cs_lock);
|
|
atomic_set(&ctx->thread_ctx_switch_token, 1);
|
|
ctx->thread_ctx_switch_wait_token = 0;
|
|
|
|
if (is_kernel_ctx) {
|
|
ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */
|
|
rc = hl_mmu_ctx_init(ctx);
|
|
if (rc) {
|
|
dev_err(hdev->dev, "Failed to init mmu ctx module\n");
|
|
goto mem_ctx_err;
|
|
}
|
|
} else {
|
|
ctx->asid = hl_asid_alloc(hdev);
|
|
if (!ctx->asid) {
|
|
dev_err(hdev->dev, "No free ASID, failed to create context\n");
|
|
return -ENOMEM;
|
|
}
|
|
|
|
rc = hl_vm_ctx_init(ctx);
|
|
if (rc) {
|
|
dev_err(hdev->dev, "Failed to init mem ctx module\n");
|
|
rc = -ENOMEM;
|
|
goto mem_ctx_err;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
|
|
mem_ctx_err:
|
|
if (ctx->asid != HL_KERNEL_ASID_ID)
|
|
hl_asid_free(hdev, ctx->asid);
|
|
|
|
return rc;
|
|
}
|
|
|
|
void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx)
|
|
{
|
|
kref_get(&ctx->refcount);
|
|
}
|
|
|
|
int hl_ctx_put(struct hl_ctx *ctx)
|
|
{
|
|
return kref_put(&ctx->refcount, hl_ctx_do_release);
|
|
}
|
|
|
|
struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
|
|
{
|
|
struct dma_fence *fence;
|
|
|
|
spin_lock(&ctx->cs_lock);
|
|
|
|
if (seq >= ctx->cs_sequence) {
|
|
spin_unlock(&ctx->cs_lock);
|
|
return ERR_PTR(-EINVAL);
|
|
}
|
|
|
|
if (seq + HL_MAX_PENDING_CS < ctx->cs_sequence) {
|
|
spin_unlock(&ctx->cs_lock);
|
|
return NULL;
|
|
}
|
|
|
|
fence = dma_fence_get(
|
|
ctx->cs_pending[seq & (HL_MAX_PENDING_CS - 1)]);
|
|
spin_unlock(&ctx->cs_lock);
|
|
|
|
return fence;
|
|
}
|
|
|
|
/*
|
|
* hl_ctx_mgr_init - initialize the context manager
|
|
*
|
|
* @mgr: pointer to context manager structure
|
|
*
|
|
* This manager is an object inside the hpriv object of the user process.
|
|
* The function is called when a user process opens the FD.
|
|
*/
|
|
void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr)
|
|
{
|
|
mutex_init(&mgr->ctx_lock);
|
|
idr_init(&mgr->ctx_handles);
|
|
}
|
|
|
|
/*
|
|
* hl_ctx_mgr_fini - finalize the context manager
|
|
*
|
|
* @hdev: pointer to device structure
|
|
* @mgr: pointer to context manager structure
|
|
*
|
|
* This function goes over all the contexts in the manager and frees them.
|
|
* It is called when a process closes the FD.
|
|
*/
|
|
void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr)
|
|
{
|
|
struct hl_ctx *ctx;
|
|
struct idr *idp;
|
|
u32 id;
|
|
|
|
idp = &mgr->ctx_handles;
|
|
|
|
idr_for_each_entry(idp, ctx, id)
|
|
hl_ctx_free(hdev, ctx);
|
|
|
|
idr_destroy(&mgr->ctx_handles);
|
|
mutex_destroy(&mgr->ctx_lock);
|
|
}
|