mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-11-24 10:20:49 +07:00
This tag contains the following changes for kernel 5.10-rc1:
- Stop using the DRM's dma-fence module and instead use kernel completions. - Support PCIe AER - Use dma_mmap_coherent for memory allocated using dma_alloc_coherent - Use smallest possible alignment when allocating virtual addresses in our MMU driver. - Refactor MMU driver code to be device-oriented - Allow user to check CS status without any sleep - Add an option to map a Command Buffer to the Device's MMU - Expose sync manager resource allocation to user through INFO IOCTL - Convert code to use standard BIT(), GENMASK() and FIELD_PREP() - Many small fixes (casting, better error messages, remove unused defines, h/w configuration fixes, etc.) -----BEGIN PGP SIGNATURE----- iQFKBAABCgA0FiEE7TEboABC71LctBLFZR1NuKta54AFAl9qIHgWHG9kZWQuZ2Fi YmF5QGdtYWlsLmNvbQAKCRBlHU24q1rngKKmB/9EzD+xOFKFM7NOWdbkmJQ8di7P eLHlqSh8EpIKyvVtiNp5oMMUwBM7bBK4gsvHqPK4cQNUN6wvqvDYt9lRhjgSua1A 8heVtWpot0+d8PVT64PDWYxRfkI4UiOhQDpyj2vhrBkepW9cQlxs/DTlJHfDAQRd ihY3H94tX5DO5/wy7W9XC5BChvgMMoj9KIP5+wYdReaPbgQyIx9x7L8GbqE1aS9C daHmFGXxSfJffxDGkIot3XczrmoBIx+9qtL7EZo2HkC6s1IyBnX1KgxvAJR5urt8 FFd0ma3Md+8PLkEeNX3VJrDAnQPskCvmrU2B61PftnI0EC3eW7mRufM0+kiM =zQ26 -----END PGP SIGNATURE----- Merge tag 'misc-habanalabs-next-2020-09-22' of git://people.freedesktop.org/~gabbayo/linux into char-misc-next Oded writes: This tag contains the following changes for kernel 5.10-rc1: - Stop using the DRM's dma-fence module and instead use kernel completions. - Support PCIe AER - Use dma_mmap_coherent for memory allocated using dma_alloc_coherent - Use smallest possible alignment when allocating virtual addresses in our MMU driver. - Refactor MMU driver code to be device-oriented - Allow user to check CS status without any sleep - Add an option to map a Command Buffer to the Device's MMU - Expose sync manager resource allocation to user through INFO IOCTL - Convert code to use standard BIT(), GENMASK() and FIELD_PREP() - Many small fixes (casting, better error messages, remove unused defines, h/w configuration fixes, etc.) * tag 'misc-habanalabs-next-2020-09-22' of git://people.freedesktop.org/~gabbayo/linux: (46 commits) habanalabs: update scratchpad register map habanalabs: add indication of security-enabled F/W habanalabs/gaudi: fix DMA completions max outstanding to 15 habanalabs/gaudi: remove axi drain support habanalabs: update firmware interface file habanalabs: Add an option to map CB to device MMU habanalabs: Save context in a command buffer object habanalabs: no need for DMA_SHARED_BUFFER habanalabs: allow to wait on CS without sleep habanalabs/gaudi: increase timeout for boot fit load habanalabs: add debugfs support for MMU with 6 HOPs habanalabs: add num_hops to hl_mmu_properties habanalabs: refactor MMU as device-oriented habanalabs: rename mmu.c to mmu_v1.c habanalabs: use smallest possible alignment for virtual addresses habanalabs: check flag before reset because of f/w event habanalabs: increase PQ COMP_OFFSET by one nibble habanalabs: Fix alignment issue in cpucp_info structure habanalabs: remove unused define habanalabs: remove unused ASIC function pointer ...
This commit is contained in:
commit
9e07279310
@ -2,13 +2,17 @@ What: /sys/class/habanalabs/hl<n>/armcp_kernel_ver
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Version of the Linux kernel running on the device's CPU
|
||||
Description: Version of the Linux kernel running on the device's CPU.
|
||||
Will be DEPRECATED in Linux kernel version 5.10, and be
|
||||
replaced with cpucp_kernel_ver
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/armcp_ver
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Version of the application running on the device's CPU
|
||||
Will be DEPRECATED in Linux kernel version 5.10, and be
|
||||
replaced with cpucp_ver
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/clk_max_freq_mhz
|
||||
Date: Jun 2019
|
||||
@ -33,6 +37,18 @@ KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Version of the Device's CPLD F/W
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/cpucp_kernel_ver
|
||||
Date: Oct 2020
|
||||
KernelVersion: 5.10
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Version of the Linux kernel running on the device's CPU
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/cpucp_ver
|
||||
Date: Oct 2020
|
||||
KernelVersion: 5.10
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Version of the application running on the device's CPU
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/device_type
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
|
@ -7,7 +7,6 @@ config HABANA_AI
|
||||
tristate "HabanaAI accelerators (habanalabs)"
|
||||
depends on PCI && HAS_IOMEM
|
||||
select FRAME_VECTOR
|
||||
select DMA_SHARED_BUFFER
|
||||
select GENERIC_ALLOCATOR
|
||||
select HWMON
|
||||
help
|
||||
|
@ -3,5 +3,5 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
|
||||
common/asid.o common/habanalabs_ioctl.o \
|
||||
common/command_buffer.o common/hw_queue.o common/irq.o \
|
||||
common/sysfs.o common/hwmon.o common/memory.o \
|
||||
common/command_submission.o common/mmu.o common/firmware_if.o \
|
||||
common/pci.o
|
||||
common/command_submission.o common/mmu.o common/mmu_v1.o \
|
||||
common/firmware_if.o common/pci.o
|
||||
|
@ -13,6 +13,131 @@
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/genalloc.h>
|
||||
|
||||
static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
struct hl_vm_va_block *va_block, *tmp;
|
||||
dma_addr_t bus_addr;
|
||||
u64 virt_addr;
|
||||
u32 page_size = prop->pmmu.page_size;
|
||||
s32 offset;
|
||||
int rc;
|
||||
|
||||
if (!hdev->supports_cb_mapping) {
|
||||
dev_err_ratelimited(hdev->dev,
|
||||
"Cannot map CB because no VA range is allocated for CB mapping\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!hdev->mmu_enable) {
|
||||
dev_err_ratelimited(hdev->dev,
|
||||
"Cannot map CB because MMU is disabled\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&cb->va_block_list);
|
||||
|
||||
for (bus_addr = cb->bus_address;
|
||||
bus_addr < cb->bus_address + cb->size;
|
||||
bus_addr += page_size) {
|
||||
|
||||
virt_addr = (u64) gen_pool_alloc(ctx->cb_va_pool, page_size);
|
||||
if (!virt_addr) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to allocate device virtual address for CB\n");
|
||||
rc = -ENOMEM;
|
||||
goto err_va_pool_free;
|
||||
}
|
||||
|
||||
va_block = kzalloc(sizeof(*va_block), GFP_KERNEL);
|
||||
if (!va_block) {
|
||||
rc = -ENOMEM;
|
||||
gen_pool_free(ctx->cb_va_pool, virt_addr, page_size);
|
||||
goto err_va_pool_free;
|
||||
}
|
||||
|
||||
va_block->start = virt_addr;
|
||||
va_block->end = virt_addr + page_size;
|
||||
va_block->size = page_size;
|
||||
list_add_tail(&va_block->node, &cb->va_block_list);
|
||||
}
|
||||
|
||||
mutex_lock(&ctx->mmu_lock);
|
||||
|
||||
bus_addr = cb->bus_address;
|
||||
offset = 0;
|
||||
list_for_each_entry(va_block, &cb->va_block_list, node) {
|
||||
rc = hl_mmu_map(ctx, va_block->start, bus_addr, va_block->size,
|
||||
list_is_last(&va_block->node,
|
||||
&cb->va_block_list));
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to map VA %#llx to CB\n",
|
||||
va_block->start);
|
||||
goto err_va_umap;
|
||||
}
|
||||
|
||||
bus_addr += va_block->size;
|
||||
offset += va_block->size;
|
||||
}
|
||||
|
||||
hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
|
||||
|
||||
mutex_unlock(&ctx->mmu_lock);
|
||||
|
||||
cb->is_mmu_mapped = true;
|
||||
|
||||
return 0;
|
||||
|
||||
err_va_umap:
|
||||
list_for_each_entry(va_block, &cb->va_block_list, node) {
|
||||
if (offset <= 0)
|
||||
break;
|
||||
hl_mmu_unmap(ctx, va_block->start, va_block->size,
|
||||
offset <= va_block->size);
|
||||
offset -= va_block->size;
|
||||
}
|
||||
|
||||
hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
|
||||
|
||||
mutex_unlock(&ctx->mmu_lock);
|
||||
|
||||
err_va_pool_free:
|
||||
list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) {
|
||||
gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size);
|
||||
list_del(&va_block->node);
|
||||
kfree(va_block);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void cb_unmap_mem(struct hl_ctx *ctx, struct hl_cb *cb)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
struct hl_vm_va_block *va_block, *tmp;
|
||||
|
||||
mutex_lock(&ctx->mmu_lock);
|
||||
|
||||
list_for_each_entry(va_block, &cb->va_block_list, node)
|
||||
if (hl_mmu_unmap(ctx, va_block->start, va_block->size,
|
||||
list_is_last(&va_block->node,
|
||||
&cb->va_block_list)))
|
||||
dev_warn_ratelimited(hdev->dev,
|
||||
"Failed to unmap CB's va 0x%llx\n",
|
||||
va_block->start);
|
||||
|
||||
hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
|
||||
|
||||
mutex_unlock(&ctx->mmu_lock);
|
||||
|
||||
list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) {
|
||||
gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size);
|
||||
list_del(&va_block->node);
|
||||
kfree(va_block);
|
||||
}
|
||||
}
|
||||
|
||||
static void cb_fini(struct hl_device *hdev, struct hl_cb *cb)
|
||||
{
|
||||
if (cb->is_internal)
|
||||
@ -47,6 +172,11 @@ static void cb_release(struct kref *ref)
|
||||
|
||||
hl_debugfs_remove_cb(cb);
|
||||
|
||||
if (cb->is_mmu_mapped)
|
||||
cb_unmap_mem(cb->ctx, cb);
|
||||
|
||||
hl_ctx_put(cb->ctx);
|
||||
|
||||
cb_do_release(hdev, cb);
|
||||
}
|
||||
|
||||
@ -107,11 +237,12 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
|
||||
}
|
||||
|
||||
int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
|
||||
u32 cb_size, u64 *handle, int ctx_id, bool internal_cb)
|
||||
struct hl_ctx *ctx, u32 cb_size, bool internal_cb,
|
||||
bool map_cb, u64 *handle)
|
||||
{
|
||||
struct hl_cb *cb;
|
||||
bool alloc_new_cb = true;
|
||||
int rc;
|
||||
int rc, ctx_id = ctx->asid;
|
||||
|
||||
/*
|
||||
* Can't use generic function to check this because of special case
|
||||
@ -163,7 +294,21 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
|
||||
}
|
||||
|
||||
cb->hdev = hdev;
|
||||
cb->ctx_id = ctx_id;
|
||||
cb->ctx = ctx;
|
||||
hl_ctx_get(hdev, cb->ctx);
|
||||
|
||||
if (map_cb) {
|
||||
if (ctx_id == HL_KERNEL_ASID_ID) {
|
||||
dev_err(hdev->dev,
|
||||
"CB mapping is not supported for kernel context\n");
|
||||
rc = -EINVAL;
|
||||
goto release_cb;
|
||||
}
|
||||
|
||||
rc = cb_map_mem(ctx, cb);
|
||||
if (rc)
|
||||
goto release_cb;
|
||||
}
|
||||
|
||||
spin_lock(&mgr->cb_lock);
|
||||
rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC);
|
||||
@ -171,10 +316,10 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
|
||||
|
||||
if (rc < 0) {
|
||||
dev_err(hdev->dev, "Failed to allocate IDR for a new CB\n");
|
||||
goto release_cb;
|
||||
goto unmap_mem;
|
||||
}
|
||||
|
||||
cb->id = rc;
|
||||
cb->id = (u64) rc;
|
||||
|
||||
kref_init(&cb->refcount);
|
||||
spin_lock_init(&cb->lock);
|
||||
@ -183,14 +328,18 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
|
||||
* idr is 32-bit so we can safely OR it with a mask that is above
|
||||
* 32 bit
|
||||
*/
|
||||
*handle = cb->id | HL_MMAP_CB_MASK;
|
||||
*handle = cb->id | HL_MMAP_TYPE_CB;
|
||||
*handle <<= PAGE_SHIFT;
|
||||
|
||||
hl_debugfs_add_cb(cb);
|
||||
|
||||
return 0;
|
||||
|
||||
unmap_mem:
|
||||
if (cb->is_mmu_mapped)
|
||||
cb_unmap_mem(cb->ctx, cb);
|
||||
release_cb:
|
||||
hl_ctx_put(cb->ctx);
|
||||
cb_do_release(hdev, cb);
|
||||
out_err:
|
||||
*handle = 0;
|
||||
@ -250,9 +399,10 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
|
||||
args->in.cb_size, HL_MAX_CB_SIZE);
|
||||
rc = -EINVAL;
|
||||
} else {
|
||||
rc = hl_cb_create(hdev, &hpriv->cb_mgr,
|
||||
args->in.cb_size, &handle,
|
||||
hpriv->ctx->asid, false);
|
||||
rc = hl_cb_create(hdev, &hpriv->cb_mgr, hpriv->ctx,
|
||||
args->in.cb_size, false,
|
||||
!!(args->in.flags & HL_CB_FLAGS_MAP),
|
||||
&handle);
|
||||
}
|
||||
|
||||
memset(args, 0, sizeof(*args));
|
||||
@ -300,11 +450,14 @@ int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
|
||||
{
|
||||
struct hl_device *hdev = hpriv->hdev;
|
||||
struct hl_cb *cb;
|
||||
phys_addr_t address;
|
||||
u32 handle, user_cb_size;
|
||||
int rc;
|
||||
|
||||
/* We use the page offset to hold the idr and thus we need to clear
|
||||
* it before doing the mmap itself
|
||||
*/
|
||||
handle = vma->vm_pgoff;
|
||||
vma->vm_pgoff = 0;
|
||||
|
||||
/* reference was taken here */
|
||||
cb = hl_cb_get(hdev, &hpriv->cb_mgr, handle);
|
||||
@ -356,12 +509,8 @@ int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
|
||||
|
||||
vma->vm_private_data = cb;
|
||||
|
||||
/* Calculate address for CB */
|
||||
address = virt_to_phys((void *) (uintptr_t) cb->kernel_address);
|
||||
|
||||
rc = hdev->asic_funcs->cb_mmap(hdev, vma, cb->kernel_address,
|
||||
address, cb->size);
|
||||
|
||||
rc = hdev->asic_funcs->cb_mmap(hdev, vma, (void *) cb->kernel_address,
|
||||
cb->bus_address, cb->size);
|
||||
if (rc) {
|
||||
spin_lock(&cb->lock);
|
||||
cb->mmap = false;
|
||||
@ -425,7 +574,7 @@ void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr)
|
||||
if (kref_put(&cb->refcount, cb_release) != 1)
|
||||
dev_err(hdev->dev,
|
||||
"CB %d for CTX ID %d is still alive\n",
|
||||
id, cb->ctx_id);
|
||||
id, cb->ctx->asid);
|
||||
}
|
||||
|
||||
idr_destroy(&mgr->cb_handles);
|
||||
@ -438,8 +587,8 @@ struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size,
|
||||
struct hl_cb *cb;
|
||||
int rc;
|
||||
|
||||
rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, cb_size, &cb_handle,
|
||||
HL_KERNEL_ASID_ID, internal_cb);
|
||||
rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx, cb_size,
|
||||
internal_cb, false, &cb_handle);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to allocate CB for the kernel driver %d\n", rc);
|
||||
@ -495,3 +644,45 @@ int hl_cb_pool_fini(struct hl_device *hdev)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int hl_cb_va_pool_init(struct hl_ctx *ctx)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
int rc;
|
||||
|
||||
if (!hdev->supports_cb_mapping)
|
||||
return 0;
|
||||
|
||||
ctx->cb_va_pool = gen_pool_create(__ffs(prop->pmmu.page_size), -1);
|
||||
if (!ctx->cb_va_pool) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to create VA gen pool for CB mapping\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
rc = gen_pool_add(ctx->cb_va_pool, prop->cb_va_start_addr,
|
||||
prop->cb_va_end_addr - prop->cb_va_start_addr, -1);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to add memory to VA gen pool for CB mapping\n");
|
||||
goto err_pool_destroy;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err_pool_destroy:
|
||||
gen_pool_destroy(ctx->cb_va_pool);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
void hl_cb_va_pool_fini(struct hl_ctx *ctx)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
|
||||
if (!hdev->supports_cb_mapping)
|
||||
return;
|
||||
|
||||
gen_pool_destroy(ctx->cb_va_pool);
|
||||
}
|
||||
|
@ -38,26 +38,10 @@ void hl_sob_reset_error(struct kref *ref)
|
||||
hw_sob->q_idx, hw_sob->sob_id);
|
||||
}
|
||||
|
||||
static const char *hl_fence_get_driver_name(struct dma_fence *fence)
|
||||
{
|
||||
return "HabanaLabs";
|
||||
}
|
||||
|
||||
static const char *hl_fence_get_timeline_name(struct dma_fence *fence)
|
||||
{
|
||||
struct hl_cs_compl *hl_cs_compl =
|
||||
container_of(fence, struct hl_cs_compl, base_fence);
|
||||
|
||||
return dev_name(hl_cs_compl->hdev->dev);
|
||||
}
|
||||
|
||||
static bool hl_fence_enable_signaling(struct dma_fence *fence)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static void hl_fence_release(struct dma_fence *fence)
|
||||
static void hl_fence_release(struct kref *kref)
|
||||
{
|
||||
struct hl_fence *fence =
|
||||
container_of(kref, struct hl_fence, refcount);
|
||||
struct hl_cs_compl *hl_cs_cmpl =
|
||||
container_of(fence, struct hl_cs_compl, base_fence);
|
||||
struct hl_device *hdev = hl_cs_cmpl->hdev;
|
||||
@ -99,15 +83,27 @@ static void hl_fence_release(struct dma_fence *fence)
|
||||
}
|
||||
|
||||
free:
|
||||
kfree_rcu(hl_cs_cmpl, base_fence.rcu);
|
||||
kfree(hl_cs_cmpl);
|
||||
}
|
||||
|
||||
static const struct dma_fence_ops hl_fence_ops = {
|
||||
.get_driver_name = hl_fence_get_driver_name,
|
||||
.get_timeline_name = hl_fence_get_timeline_name,
|
||||
.enable_signaling = hl_fence_enable_signaling,
|
||||
.release = hl_fence_release
|
||||
};
|
||||
void hl_fence_put(struct hl_fence *fence)
|
||||
{
|
||||
if (fence)
|
||||
kref_put(&fence->refcount, hl_fence_release);
|
||||
}
|
||||
|
||||
void hl_fence_get(struct hl_fence *fence)
|
||||
{
|
||||
if (fence)
|
||||
kref_get(&fence->refcount);
|
||||
}
|
||||
|
||||
static void hl_fence_init(struct hl_fence *fence)
|
||||
{
|
||||
kref_init(&fence->refcount);
|
||||
fence->error = 0;
|
||||
init_completion(&fence->completion);
|
||||
}
|
||||
|
||||
static void cs_get(struct hl_cs *cs)
|
||||
{
|
||||
@ -256,6 +252,8 @@ static void cs_counters_aggregate(struct hl_device *hdev, struct hl_ctx *ctx)
|
||||
ctx->cs_counters.parsing_drop_cnt;
|
||||
hdev->aggregated_cs_counters.queue_full_drop_cnt +=
|
||||
ctx->cs_counters.queue_full_drop_cnt;
|
||||
hdev->aggregated_cs_counters.max_cs_in_flight_drop_cnt +=
|
||||
ctx->cs_counters.max_cs_in_flight_drop_cnt;
|
||||
}
|
||||
|
||||
static void cs_do_release(struct kref *ref)
|
||||
@ -336,7 +334,7 @@ static void cs_do_release(struct kref *ref)
|
||||
* In case the wait for signal CS was submitted, the put occurs
|
||||
* in init_signal_wait_cs() right before hanging on the PQ.
|
||||
*/
|
||||
dma_fence_put(cs->signal_fence);
|
||||
hl_fence_put(cs->signal_fence);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -348,19 +346,18 @@ static void cs_do_release(struct kref *ref)
|
||||
hl_ctx_put(cs->ctx);
|
||||
|
||||
/* We need to mark an error for not submitted because in that case
|
||||
* the dma fence release flow is different. Mainly, we don't need
|
||||
* the hl fence release flow is different. Mainly, we don't need
|
||||
* to handle hw_sob for signal/wait
|
||||
*/
|
||||
if (cs->timedout)
|
||||
dma_fence_set_error(cs->fence, -ETIMEDOUT);
|
||||
cs->fence->error = -ETIMEDOUT;
|
||||
else if (cs->aborted)
|
||||
dma_fence_set_error(cs->fence, -EIO);
|
||||
cs->fence->error = -EIO;
|
||||
else if (!cs->submitted)
|
||||
dma_fence_set_error(cs->fence, -EBUSY);
|
||||
|
||||
dma_fence_signal(cs->fence);
|
||||
dma_fence_put(cs->fence);
|
||||
cs->fence->error = -EBUSY;
|
||||
|
||||
complete_all(&cs->fence->completion);
|
||||
hl_fence_put(cs->fence);
|
||||
cs_counters_aggregate(hdev, cs->ctx);
|
||||
|
||||
kfree(cs->jobs_in_queue_cnt);
|
||||
@ -401,7 +398,7 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
|
||||
enum hl_cs_type cs_type, struct hl_cs **cs_new)
|
||||
{
|
||||
struct hl_cs_compl *cs_cmpl;
|
||||
struct dma_fence *other = NULL;
|
||||
struct hl_fence *other = NULL;
|
||||
struct hl_cs *cs;
|
||||
int rc;
|
||||
|
||||
@ -434,9 +431,11 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
|
||||
cs_cmpl->cs_seq = ctx->cs_sequence;
|
||||
other = ctx->cs_pending[cs_cmpl->cs_seq &
|
||||
(hdev->asic_prop.max_pending_cs - 1)];
|
||||
if ((other) && (!dma_fence_is_signaled(other))) {
|
||||
dev_dbg(hdev->dev,
|
||||
|
||||
if (other && !completion_done(&other->completion)) {
|
||||
dev_dbg_ratelimited(hdev->dev,
|
||||
"Rejecting CS because of too many in-flights CS\n");
|
||||
ctx->cs_counters.max_cs_in_flight_drop_cnt++;
|
||||
rc = -EAGAIN;
|
||||
goto free_fence;
|
||||
}
|
||||
@ -448,8 +447,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
|
||||
goto free_fence;
|
||||
}
|
||||
|
||||
dma_fence_init(&cs_cmpl->base_fence, &hl_fence_ops, &cs_cmpl->lock,
|
||||
ctx->asid, ctx->cs_sequence);
|
||||
/* init hl_fence */
|
||||
hl_fence_init(&cs_cmpl->base_fence);
|
||||
|
||||
cs->sequence = cs_cmpl->cs_seq;
|
||||
|
||||
@ -458,9 +457,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
|
||||
&cs_cmpl->base_fence;
|
||||
ctx->cs_sequence++;
|
||||
|
||||
dma_fence_get(&cs_cmpl->base_fence);
|
||||
hl_fence_get(&cs_cmpl->base_fence);
|
||||
|
||||
dma_fence_put(other);
|
||||
hl_fence_put(other);
|
||||
|
||||
spin_unlock(&ctx->cs_lock);
|
||||
|
||||
@ -690,8 +689,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
|
||||
rc = -ENOMEM;
|
||||
if (is_kernel_allocated_cb)
|
||||
goto release_cb;
|
||||
else
|
||||
goto free_cs_object;
|
||||
|
||||
goto free_cs_object;
|
||||
}
|
||||
|
||||
job->id = i + 1;
|
||||
@ -773,7 +772,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
|
||||
struct hl_ctx *ctx = hpriv->ctx;
|
||||
struct hl_cs_chunk *cs_chunk_array, *chunk;
|
||||
struct hw_queue_properties *hw_queue_prop;
|
||||
struct dma_fence *sig_fence = NULL;
|
||||
struct hl_fence *sig_fence = NULL;
|
||||
struct hl_cs_job *job;
|
||||
struct hl_cs *cs;
|
||||
struct hl_cb *cb;
|
||||
@ -883,14 +882,14 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
|
||||
dev_err(hdev->dev,
|
||||
"CS seq 0x%llx is not of a signal CS\n",
|
||||
signal_seq);
|
||||
dma_fence_put(sig_fence);
|
||||
hl_fence_put(sig_fence);
|
||||
rc = -EINVAL;
|
||||
goto free_signal_seq_array;
|
||||
}
|
||||
|
||||
if (dma_fence_is_signaled(sig_fence)) {
|
||||
if (completion_done(&sig_fence->completion)) {
|
||||
/* signal CS already finished */
|
||||
dma_fence_put(sig_fence);
|
||||
hl_fence_put(sig_fence);
|
||||
rc = 0;
|
||||
goto free_signal_seq_array;
|
||||
}
|
||||
@ -902,7 +901,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
|
||||
rc = allocate_cs(hdev, ctx, cs_type, &cs);
|
||||
if (rc) {
|
||||
if (cs_type == CS_TYPE_WAIT)
|
||||
dma_fence_put(sig_fence);
|
||||
hl_fence_put(sig_fence);
|
||||
hl_ctx_put(ctx);
|
||||
goto free_signal_seq_array;
|
||||
}
|
||||
@ -1162,7 +1161,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
|
||||
static long _hl_cs_wait_ioctl(struct hl_device *hdev,
|
||||
struct hl_ctx *ctx, u64 timeout_us, u64 seq)
|
||||
{
|
||||
struct dma_fence *fence;
|
||||
struct hl_fence *fence;
|
||||
unsigned long timeout;
|
||||
long rc;
|
||||
|
||||
@ -1181,12 +1180,18 @@ static long _hl_cs_wait_ioctl(struct hl_device *hdev,
|
||||
"Can't wait on CS %llu because current CS is at seq %llu\n",
|
||||
seq, ctx->cs_sequence);
|
||||
} else if (fence) {
|
||||
rc = dma_fence_wait_timeout(fence, true, timeout);
|
||||
if (!timeout_us)
|
||||
rc = completion_done(&fence->completion);
|
||||
else
|
||||
rc = wait_for_completion_interruptible_timeout(
|
||||
&fence->completion, timeout);
|
||||
|
||||
if (fence->error == -ETIMEDOUT)
|
||||
rc = -ETIMEDOUT;
|
||||
else if (fence->error == -EIO)
|
||||
rc = -EIO;
|
||||
dma_fence_put(fence);
|
||||
|
||||
hl_fence_put(fence);
|
||||
} else {
|
||||
dev_dbg(hdev->dev,
|
||||
"Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
|
||||
|
@ -23,7 +23,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
|
||||
*/
|
||||
|
||||
for (i = 0 ; i < hdev->asic_prop.max_pending_cs ; i++)
|
||||
dma_fence_put(ctx->cs_pending[i]);
|
||||
hl_fence_put(ctx->cs_pending[i]);
|
||||
|
||||
kfree(ctx->cs_pending);
|
||||
|
||||
@ -37,6 +37,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
|
||||
if ((hdev->in_debug) && (hdev->compute_ctx == ctx))
|
||||
hl_device_set_debug_mode(hdev, false);
|
||||
|
||||
hl_cb_va_pool_fini(ctx);
|
||||
hl_vm_ctx_fini(ctx);
|
||||
hl_asid_free(hdev, ctx->asid);
|
||||
} else {
|
||||
@ -128,7 +129,7 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
|
||||
atomic_set(&ctx->thread_ctx_switch_token, 1);
|
||||
ctx->thread_ctx_switch_wait_token = 0;
|
||||
ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs,
|
||||
sizeof(struct dma_fence *),
|
||||
sizeof(struct hl_fence *),
|
||||
GFP_KERNEL);
|
||||
if (!ctx->cs_pending)
|
||||
return -ENOMEM;
|
||||
@ -155,15 +156,24 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
|
||||
goto err_asid_free;
|
||||
}
|
||||
|
||||
rc = hl_cb_va_pool_init(ctx);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to init VA pool for mapped CB\n");
|
||||
goto err_vm_ctx_fini;
|
||||
}
|
||||
|
||||
rc = hdev->asic_funcs->ctx_init(ctx);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "ctx_init failed\n");
|
||||
goto err_vm_ctx_fini;
|
||||
goto err_cb_va_pool_fini;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err_cb_va_pool_fini:
|
||||
hl_cb_va_pool_fini(ctx);
|
||||
err_vm_ctx_fini:
|
||||
hl_vm_ctx_fini(ctx);
|
||||
err_asid_free:
|
||||
@ -184,10 +194,10 @@ int hl_ctx_put(struct hl_ctx *ctx)
|
||||
return kref_put(&ctx->refcount, hl_ctx_do_release);
|
||||
}
|
||||
|
||||
struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
|
||||
struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
|
||||
{
|
||||
struct asic_fixed_properties *asic_prop = &ctx->hdev->asic_prop;
|
||||
struct dma_fence *fence;
|
||||
struct hl_fence *fence;
|
||||
|
||||
spin_lock(&ctx->cs_lock);
|
||||
|
||||
@ -201,8 +211,9 @@ struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
fence = dma_fence_get(
|
||||
ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)]);
|
||||
fence = ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)];
|
||||
hl_fence_get(fence);
|
||||
|
||||
spin_unlock(&ctx->cs_lock);
|
||||
|
||||
return fence;
|
||||
|
@ -21,7 +21,7 @@ static struct dentry *hl_debug_root;
|
||||
static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
|
||||
u8 i2c_reg, long *val)
|
||||
{
|
||||
struct armcp_packet pkt;
|
||||
struct cpucp_packet pkt;
|
||||
int rc;
|
||||
|
||||
if (hl_device_disabled_or_in_reset(hdev))
|
||||
@ -29,8 +29,8 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
|
||||
|
||||
memset(&pkt, 0, sizeof(pkt));
|
||||
|
||||
pkt.ctl = cpu_to_le32(ARMCP_PACKET_I2C_RD <<
|
||||
ARMCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_I2C_RD <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.i2c_bus = i2c_bus;
|
||||
pkt.i2c_addr = i2c_addr;
|
||||
pkt.i2c_reg = i2c_reg;
|
||||
@ -47,7 +47,7 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
|
||||
static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
|
||||
u8 i2c_reg, u32 val)
|
||||
{
|
||||
struct armcp_packet pkt;
|
||||
struct cpucp_packet pkt;
|
||||
int rc;
|
||||
|
||||
if (hl_device_disabled_or_in_reset(hdev))
|
||||
@ -55,8 +55,8 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
|
||||
|
||||
memset(&pkt, 0, sizeof(pkt));
|
||||
|
||||
pkt.ctl = cpu_to_le32(ARMCP_PACKET_I2C_WR <<
|
||||
ARMCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_I2C_WR <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.i2c_bus = i2c_bus;
|
||||
pkt.i2c_addr = i2c_addr;
|
||||
pkt.i2c_reg = i2c_reg;
|
||||
@ -73,7 +73,7 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
|
||||
|
||||
static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state)
|
||||
{
|
||||
struct armcp_packet pkt;
|
||||
struct cpucp_packet pkt;
|
||||
int rc;
|
||||
|
||||
if (hl_device_disabled_or_in_reset(hdev))
|
||||
@ -81,8 +81,8 @@ static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state)
|
||||
|
||||
memset(&pkt, 0, sizeof(pkt));
|
||||
|
||||
pkt.ctl = cpu_to_le32(ARMCP_PACKET_LED_SET <<
|
||||
ARMCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_LED_SET <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.led_index = cpu_to_le32(led);
|
||||
pkt.value = cpu_to_le64(state);
|
||||
|
||||
@ -110,8 +110,8 @@ static int command_buffers_show(struct seq_file *s, void *data)
|
||||
seq_puts(s, "---------------------------------------------------------------\n");
|
||||
}
|
||||
seq_printf(s,
|
||||
" %03d %d 0x%08x %d %d %d\n",
|
||||
cb->id, cb->ctx_id, cb->size,
|
||||
" %03llu %d 0x%08x %d %d %d\n",
|
||||
cb->id, cb->ctx->asid, cb->size,
|
||||
kref_read(&cb->refcount),
|
||||
cb->mmap, cb->cs_cnt);
|
||||
}
|
||||
@ -354,6 +354,14 @@ static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
|
||||
mmu_specs->hop4_shift);
|
||||
}
|
||||
|
||||
static inline u64 get_hop5_pte_addr(struct hl_ctx *ctx,
|
||||
struct hl_mmu_properties *mmu_specs,
|
||||
u64 hop_addr, u64 vaddr)
|
||||
{
|
||||
return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop5_mask,
|
||||
mmu_specs->hop5_shift);
|
||||
}
|
||||
|
||||
static inline u64 get_next_hop_addr(u64 curr_pte)
|
||||
{
|
||||
if (curr_pte & PAGE_PRESENT_MASK)
|
||||
@ -377,6 +385,7 @@ static int mmu_show(struct seq_file *s, void *data)
|
||||
hop2_addr = 0, hop2_pte_addr = 0, hop2_pte = 0,
|
||||
hop3_addr = 0, hop3_pte_addr = 0, hop3_pte = 0,
|
||||
hop4_addr = 0, hop4_pte_addr = 0, hop4_pte = 0,
|
||||
hop5_addr = 0, hop5_pte_addr = 0, hop5_pte = 0,
|
||||
virt_addr = dev_entry->mmu_addr;
|
||||
|
||||
if (!hdev->mmu_enable)
|
||||
@ -428,20 +437,49 @@ static int mmu_show(struct seq_file *s, void *data)
|
||||
hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
|
||||
hop3_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr);
|
||||
|
||||
if (!(hop3_pte & LAST_MASK)) {
|
||||
if (mmu_prop->num_hops == MMU_ARCH_5_HOPS) {
|
||||
if (!(hop3_pte & LAST_MASK)) {
|
||||
hop4_addr = get_next_hop_addr(hop3_pte);
|
||||
|
||||
if (hop4_addr == ULLONG_MAX)
|
||||
goto not_mapped;
|
||||
|
||||
hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop,
|
||||
hop4_addr, virt_addr);
|
||||
hop4_pte = hdev->asic_funcs->read_pte(hdev,
|
||||
hop4_pte_addr);
|
||||
if (!(hop4_pte & PAGE_PRESENT_MASK))
|
||||
goto not_mapped;
|
||||
} else {
|
||||
if (!(hop3_pte & PAGE_PRESENT_MASK))
|
||||
goto not_mapped;
|
||||
}
|
||||
} else {
|
||||
hop4_addr = get_next_hop_addr(hop3_pte);
|
||||
|
||||
if (hop4_addr == ULLONG_MAX)
|
||||
goto not_mapped;
|
||||
|
||||
hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
|
||||
virt_addr);
|
||||
hop4_pte = hdev->asic_funcs->read_pte(hdev, hop4_pte_addr);
|
||||
if (!(hop4_pte & PAGE_PRESENT_MASK))
|
||||
goto not_mapped;
|
||||
} else {
|
||||
if (!(hop3_pte & PAGE_PRESENT_MASK))
|
||||
goto not_mapped;
|
||||
hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop,
|
||||
hop4_addr, virt_addr);
|
||||
hop4_pte = hdev->asic_funcs->read_pte(hdev,
|
||||
hop4_pte_addr);
|
||||
if (!(hop4_pte & LAST_MASK)) {
|
||||
hop5_addr = get_next_hop_addr(hop4_pte);
|
||||
|
||||
if (hop5_addr == ULLONG_MAX)
|
||||
goto not_mapped;
|
||||
|
||||
hop5_pte_addr = get_hop5_pte_addr(ctx, mmu_prop,
|
||||
hop5_addr, virt_addr);
|
||||
hop5_pte = hdev->asic_funcs->read_pte(hdev,
|
||||
hop5_pte_addr);
|
||||
if (!(hop5_pte & PAGE_PRESENT_MASK))
|
||||
goto not_mapped;
|
||||
} else {
|
||||
if (!(hop4_pte & PAGE_PRESENT_MASK))
|
||||
goto not_mapped;
|
||||
}
|
||||
}
|
||||
|
||||
seq_printf(s, "asid: %u, virt_addr: 0x%llx\n",
|
||||
@ -463,10 +501,22 @@ static int mmu_show(struct seq_file *s, void *data)
|
||||
seq_printf(s, "hop3_pte_addr: 0x%llx\n", hop3_pte_addr);
|
||||
seq_printf(s, "hop3_pte: 0x%llx\n", hop3_pte);
|
||||
|
||||
if (!(hop3_pte & LAST_MASK)) {
|
||||
if (mmu_prop->num_hops == MMU_ARCH_5_HOPS) {
|
||||
if (!(hop3_pte & LAST_MASK)) {
|
||||
seq_printf(s, "hop4_addr: 0x%llx\n", hop4_addr);
|
||||
seq_printf(s, "hop4_pte_addr: 0x%llx\n", hop4_pte_addr);
|
||||
seq_printf(s, "hop4_pte: 0x%llx\n", hop4_pte);
|
||||
}
|
||||
} else {
|
||||
seq_printf(s, "hop4_addr: 0x%llx\n", hop4_addr);
|
||||
seq_printf(s, "hop4_pte_addr: 0x%llx\n", hop4_pte_addr);
|
||||
seq_printf(s, "hop4_pte: 0x%llx\n", hop4_pte);
|
||||
|
||||
if (!(hop4_pte & LAST_MASK)) {
|
||||
seq_printf(s, "hop5_addr: 0x%llx\n", hop5_addr);
|
||||
seq_printf(s, "hop5_pte_addr: 0x%llx\n", hop5_pte_addr);
|
||||
seq_printf(s, "hop5_pte: 0x%llx\n", hop5_pte);
|
||||
}
|
||||
}
|
||||
|
||||
goto out;
|
||||
|
@ -123,9 +123,13 @@ static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
|
||||
static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
{
|
||||
struct hl_fpriv *hpriv = filp->private_data;
|
||||
unsigned long vm_pgoff;
|
||||
|
||||
if ((vma->vm_pgoff & HL_MMAP_CB_MASK) == HL_MMAP_CB_MASK) {
|
||||
vma->vm_pgoff ^= HL_MMAP_CB_MASK;
|
||||
vm_pgoff = vma->vm_pgoff;
|
||||
vma->vm_pgoff = HL_MMAP_OFFSET_VALUE_GET(vm_pgoff);
|
||||
|
||||
switch (vm_pgoff & HL_MMAP_TYPE_MASK) {
|
||||
case HL_MMAP_TYPE_CB:
|
||||
return hl_cb_mmap(hpriv, vma);
|
||||
}
|
||||
|
||||
@ -286,7 +290,7 @@ static int device_early_init(struct hl_device *hdev)
|
||||
}
|
||||
|
||||
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
|
||||
snprintf(workq_name, 32, "hl-free-jobs-%u", i);
|
||||
snprintf(workq_name, 32, "hl-free-jobs-%u", (u32) i);
|
||||
hdev->cq_wq[i] = create_singlethread_workqueue(workq_name);
|
||||
if (hdev->cq_wq[i] == NULL) {
|
||||
dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
|
||||
@ -317,6 +321,10 @@ static int device_early_init(struct hl_device *hdev)
|
||||
goto free_chip_info;
|
||||
}
|
||||
|
||||
rc = hl_mmu_if_set_funcs(hdev);
|
||||
if (rc)
|
||||
goto free_idle_busy_ts_arr;
|
||||
|
||||
hl_cb_mgr_init(&hdev->kernel_cb_mgr);
|
||||
|
||||
mutex_init(&hdev->send_cpu_message_lock);
|
||||
@ -330,6 +338,8 @@ static int device_early_init(struct hl_device *hdev)
|
||||
|
||||
return 0;
|
||||
|
||||
free_idle_busy_ts_arr:
|
||||
kfree(hdev->idle_busy_ts_arr);
|
||||
free_chip_info:
|
||||
kfree(hdev->hl_chip_info);
|
||||
free_eq_wq:
|
||||
@ -871,7 +881,7 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
|
||||
* so this message won't be sent
|
||||
*/
|
||||
if (hl_fw_send_pci_access_msg(hdev,
|
||||
ARMCP_PACKET_DISABLE_PCI_ACCESS))
|
||||
CPUCP_PACKET_DISABLE_PCI_ACCESS))
|
||||
dev_warn(hdev->dev,
|
||||
"Failed to disable PCI access by F/W\n");
|
||||
}
|
||||
|
@ -68,9 +68,9 @@ int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
|
||||
|
||||
int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode)
|
||||
{
|
||||
struct armcp_packet pkt = {};
|
||||
struct cpucp_packet pkt = {};
|
||||
|
||||
pkt.ctl = cpu_to_le32(opcode << ARMCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.ctl = cpu_to_le32(opcode << CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
|
||||
return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt,
|
||||
sizeof(pkt), 0, NULL);
|
||||
@ -79,7 +79,7 @@ int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode)
|
||||
int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
|
||||
u16 len, u32 timeout, long *result)
|
||||
{
|
||||
struct armcp_packet *pkt;
|
||||
struct cpucp_packet *pkt;
|
||||
dma_addr_t pkt_dma_addr;
|
||||
u32 tmp;
|
||||
int rc = 0;
|
||||
@ -111,7 +111,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
|
||||
}
|
||||
|
||||
rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp,
|
||||
(tmp == ARMCP_PACKET_FENCE_VAL), 1000,
|
||||
(tmp == CPUCP_PACKET_FENCE_VAL), 1000,
|
||||
timeout, true);
|
||||
|
||||
hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
|
||||
@ -124,12 +124,12 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
|
||||
|
||||
tmp = le32_to_cpu(pkt->ctl);
|
||||
|
||||
rc = (tmp & ARMCP_PKT_CTL_RC_MASK) >> ARMCP_PKT_CTL_RC_SHIFT;
|
||||
rc = (tmp & CPUCP_PKT_CTL_RC_MASK) >> CPUCP_PKT_CTL_RC_SHIFT;
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "F/W ERROR %d for CPU packet %d\n",
|
||||
rc,
|
||||
(tmp & ARMCP_PKT_CTL_OPCODE_MASK)
|
||||
>> ARMCP_PKT_CTL_OPCODE_SHIFT);
|
||||
(tmp & CPUCP_PKT_CTL_OPCODE_MASK)
|
||||
>> CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
rc = -EIO;
|
||||
} else if (result) {
|
||||
*result = (long) le64_to_cpu(pkt->result);
|
||||
@ -145,14 +145,14 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
|
||||
|
||||
int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type)
|
||||
{
|
||||
struct armcp_packet pkt;
|
||||
struct cpucp_packet pkt;
|
||||
long result;
|
||||
int rc;
|
||||
|
||||
memset(&pkt, 0, sizeof(pkt));
|
||||
|
||||
pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ <<
|
||||
ARMCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.value = cpu_to_le64(event_type);
|
||||
|
||||
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
|
||||
@ -167,15 +167,15 @@ int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type)
|
||||
int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
|
||||
size_t irq_arr_size)
|
||||
{
|
||||
struct armcp_unmask_irq_arr_packet *pkt;
|
||||
struct cpucp_unmask_irq_arr_packet *pkt;
|
||||
size_t total_pkt_size;
|
||||
long result;
|
||||
int rc;
|
||||
|
||||
total_pkt_size = sizeof(struct armcp_unmask_irq_arr_packet) +
|
||||
total_pkt_size = sizeof(struct cpucp_unmask_irq_arr_packet) +
|
||||
irq_arr_size;
|
||||
|
||||
/* data should be aligned to 8 bytes in order to ArmCP to copy it */
|
||||
/* data should be aligned to 8 bytes in order to CPU-CP to copy it */
|
||||
total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
|
||||
|
||||
/* total_pkt_size is casted to u16 later on */
|
||||
@ -191,8 +191,8 @@ int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
|
||||
pkt->length = cpu_to_le32(irq_arr_size / sizeof(irq_arr[0]));
|
||||
memcpy(&pkt->irqs, irq_arr, irq_arr_size);
|
||||
|
||||
pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
|
||||
ARMCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
|
||||
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
|
||||
total_pkt_size, 0, &result);
|
||||
@ -207,19 +207,19 @@ int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
|
||||
|
||||
int hl_fw_test_cpu_queue(struct hl_device *hdev)
|
||||
{
|
||||
struct armcp_packet test_pkt = {};
|
||||
struct cpucp_packet test_pkt = {};
|
||||
long result;
|
||||
int rc;
|
||||
|
||||
test_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST <<
|
||||
ARMCP_PKT_CTL_OPCODE_SHIFT);
|
||||
test_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL);
|
||||
test_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
test_pkt.value = cpu_to_le64(CPUCP_PACKET_FENCE_VAL);
|
||||
|
||||
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt,
|
||||
sizeof(test_pkt), 0, &result);
|
||||
|
||||
if (!rc) {
|
||||
if (result != ARMCP_PACKET_FENCE_VAL)
|
||||
if (result != CPUCP_PACKET_FENCE_VAL)
|
||||
dev_err(hdev->dev,
|
||||
"CPU queue test failed (0x%08lX)\n", result);
|
||||
} else {
|
||||
@ -251,61 +251,61 @@ void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
|
||||
|
||||
int hl_fw_send_heartbeat(struct hl_device *hdev)
|
||||
{
|
||||
struct armcp_packet hb_pkt = {};
|
||||
struct cpucp_packet hb_pkt = {};
|
||||
long result;
|
||||
int rc;
|
||||
|
||||
hb_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST <<
|
||||
ARMCP_PKT_CTL_OPCODE_SHIFT);
|
||||
hb_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL);
|
||||
hb_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
hb_pkt.value = cpu_to_le64(CPUCP_PACKET_FENCE_VAL);
|
||||
|
||||
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt,
|
||||
sizeof(hb_pkt), 0, &result);
|
||||
|
||||
if ((rc) || (result != ARMCP_PACKET_FENCE_VAL))
|
||||
if ((rc) || (result != CPUCP_PACKET_FENCE_VAL))
|
||||
rc = -EIO;
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
int hl_fw_armcp_info_get(struct hl_device *hdev)
|
||||
int hl_fw_cpucp_info_get(struct hl_device *hdev)
|
||||
{
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
struct armcp_packet pkt = {};
|
||||
void *armcp_info_cpu_addr;
|
||||
dma_addr_t armcp_info_dma_addr;
|
||||
struct cpucp_packet pkt = {};
|
||||
void *cpucp_info_cpu_addr;
|
||||
dma_addr_t cpucp_info_dma_addr;
|
||||
long result;
|
||||
int rc;
|
||||
|
||||
armcp_info_cpu_addr =
|
||||
cpucp_info_cpu_addr =
|
||||
hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
|
||||
sizeof(struct armcp_info),
|
||||
&armcp_info_dma_addr);
|
||||
if (!armcp_info_cpu_addr) {
|
||||
sizeof(struct cpucp_info),
|
||||
&cpucp_info_dma_addr);
|
||||
if (!cpucp_info_cpu_addr) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to allocate DMA memory for ArmCP info packet\n");
|
||||
"Failed to allocate DMA memory for CPU-CP info packet\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
memset(armcp_info_cpu_addr, 0, sizeof(struct armcp_info));
|
||||
memset(cpucp_info_cpu_addr, 0, sizeof(struct cpucp_info));
|
||||
|
||||
pkt.ctl = cpu_to_le32(ARMCP_PACKET_INFO_GET <<
|
||||
ARMCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.addr = cpu_to_le64(armcp_info_dma_addr);
|
||||
pkt.data_max_size = cpu_to_le32(sizeof(struct armcp_info));
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_INFO_GET <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.addr = cpu_to_le64(cpucp_info_dma_addr);
|
||||
pkt.data_max_size = cpu_to_le32(sizeof(struct cpucp_info));
|
||||
|
||||
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
|
||||
HL_ARMCP_INFO_TIMEOUT_USEC, &result);
|
||||
HL_CPUCP_INFO_TIMEOUT_USEC, &result);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to handle ArmCP info pkt, error %d\n", rc);
|
||||
"Failed to handle CPU-CP info pkt, error %d\n", rc);
|
||||
goto out;
|
||||
}
|
||||
|
||||
memcpy(&prop->armcp_info, armcp_info_cpu_addr,
|
||||
sizeof(prop->armcp_info));
|
||||
memcpy(&prop->cpucp_info, cpucp_info_cpu_addr,
|
||||
sizeof(prop->cpucp_info));
|
||||
|
||||
rc = hl_build_hwmon_channel_info(hdev, prop->armcp_info.sensors);
|
||||
rc = hl_build_hwmon_channel_info(hdev, prop->cpucp_info.sensors);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to build hwmon channel info, error %d\n", rc);
|
||||
@ -315,14 +315,14 @@ int hl_fw_armcp_info_get(struct hl_device *hdev)
|
||||
|
||||
out:
|
||||
hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
|
||||
sizeof(struct armcp_info), armcp_info_cpu_addr);
|
||||
sizeof(struct cpucp_info), cpucp_info_cpu_addr);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
|
||||
{
|
||||
struct armcp_packet pkt = {};
|
||||
struct cpucp_packet pkt = {};
|
||||
void *eeprom_info_cpu_addr;
|
||||
dma_addr_t eeprom_info_dma_addr;
|
||||
long result;
|
||||
@ -333,23 +333,24 @@ int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
|
||||
max_size, &eeprom_info_dma_addr);
|
||||
if (!eeprom_info_cpu_addr) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to allocate DMA memory for ArmCP EEPROM packet\n");
|
||||
"Failed to allocate DMA memory for CPU-CP EEPROM packet\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
memset(eeprom_info_cpu_addr, 0, max_size);
|
||||
|
||||
pkt.ctl = cpu_to_le32(ARMCP_PACKET_EEPROM_DATA_GET <<
|
||||
ARMCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_EEPROM_DATA_GET <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.addr = cpu_to_le64(eeprom_info_dma_addr);
|
||||
pkt.data_max_size = cpu_to_le32(max_size);
|
||||
|
||||
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
|
||||
HL_ARMCP_EEPROM_TIMEOUT_USEC, &result);
|
||||
HL_CPUCP_EEPROM_TIMEOUT_USEC, &result);
|
||||
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to handle ArmCP EEPROM packet, error %d\n", rc);
|
||||
"Failed to handle CPU-CP EEPROM packet, error %d\n",
|
||||
rc);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -363,6 +364,77 @@ int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
|
||||
return rc;
|
||||
}
|
||||
|
||||
int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
|
||||
struct hl_info_pci_counters *counters)
|
||||
{
|
||||
struct cpucp_packet pkt = {};
|
||||
long result;
|
||||
int rc;
|
||||
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_THROUGHPUT_GET <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
|
||||
/* Fetch PCI rx counter */
|
||||
pkt.index = cpu_to_le32(cpucp_pcie_throughput_rx);
|
||||
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
|
||||
HL_CPUCP_INFO_TIMEOUT_USEC, &result);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to handle CPU-CP PCI info pkt, error %d\n", rc);
|
||||
return rc;
|
||||
}
|
||||
counters->rx_throughput = result;
|
||||
|
||||
/* Fetch PCI tx counter */
|
||||
pkt.index = cpu_to_le32(cpucp_pcie_throughput_tx);
|
||||
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
|
||||
HL_CPUCP_INFO_TIMEOUT_USEC, &result);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to handle CPU-CP PCI info pkt, error %d\n", rc);
|
||||
return rc;
|
||||
}
|
||||
counters->tx_throughput = result;
|
||||
|
||||
/* Fetch PCI replay counter */
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_REPLAY_CNT_GET <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
|
||||
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
|
||||
HL_CPUCP_INFO_TIMEOUT_USEC, &result);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to handle CPU-CP PCI info pkt, error %d\n", rc);
|
||||
return rc;
|
||||
}
|
||||
counters->replay_cnt = (u32) result;
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy)
|
||||
{
|
||||
struct cpucp_packet pkt = {};
|
||||
long result;
|
||||
int rc;
|
||||
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_TOTAL_ENERGY_GET <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
|
||||
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
|
||||
HL_CPUCP_INFO_TIMEOUT_USEC, &result);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to handle CpuCP total energy pkt, error %d\n",
|
||||
rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
*total_energy = result;
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg)
|
||||
{
|
||||
u32 err_val;
|
||||
@ -402,8 +474,11 @@ static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg)
|
||||
"Device boot error - NIC F/W initialization failed\n");
|
||||
}
|
||||
|
||||
static void hl_detect_cpu_boot_status(struct hl_device *hdev, u32 status)
|
||||
static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
|
||||
{
|
||||
/* Some of the status codes below are deprecated in newer f/w
|
||||
* versions but we keep them here for backward compatibility
|
||||
*/
|
||||
switch (status) {
|
||||
case CPU_BOOT_STATUS_NA:
|
||||
dev_err(hdev->dev,
|
||||
@ -449,6 +524,48 @@ static void hl_detect_cpu_boot_status(struct hl_device *hdev, u32 status)
|
||||
}
|
||||
}
|
||||
|
||||
int hl_fw_read_preboot_ver(struct hl_device *hdev, u32 cpu_boot_status_reg,
|
||||
u32 boot_err0_reg, u32 timeout)
|
||||
{
|
||||
u32 status;
|
||||
int rc;
|
||||
|
||||
if (!hdev->cpu_enable)
|
||||
return 0;
|
||||
|
||||
/* Need to check two possible scenarios:
|
||||
*
|
||||
* CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT - for newer firmwares where
|
||||
* the preboot is waiting for the boot fit
|
||||
*
|
||||
* All other status values - for older firmwares where the uboot was
|
||||
* loaded from the FLASH
|
||||
*/
|
||||
rc = hl_poll_timeout(
|
||||
hdev,
|
||||
cpu_boot_status_reg,
|
||||
status,
|
||||
(status == CPU_BOOT_STATUS_IN_UBOOT) ||
|
||||
(status == CPU_BOOT_STATUS_DRAM_RDY) ||
|
||||
(status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
|
||||
(status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
|
||||
(status == CPU_BOOT_STATUS_SRAM_AVAIL) ||
|
||||
(status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT),
|
||||
10000,
|
||||
timeout);
|
||||
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to read preboot version\n");
|
||||
detect_cpu_boot_status(hdev, status);
|
||||
fw_read_errors(hdev, boot_err0_reg);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_PREBOOT);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
|
||||
u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
|
||||
u32 boot_err0_reg, bool skip_bmc,
|
||||
@ -514,15 +631,11 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
|
||||
10000,
|
||||
cpu_timeout);
|
||||
|
||||
/* Read U-Boot, preboot versions now in case we will later fail */
|
||||
/* Read U-Boot version now in case we will later fail */
|
||||
hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_UBOOT);
|
||||
hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_PREBOOT);
|
||||
|
||||
/* Some of the status codes below are deprecated in newer f/w
|
||||
* versions but we keep them here for backward compatibility
|
||||
*/
|
||||
if (rc) {
|
||||
hl_detect_cpu_boot_status(hdev, status);
|
||||
detect_cpu_boot_status(hdev, status);
|
||||
rc = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
@ -8,21 +8,33 @@
|
||||
#ifndef HABANALABSP_H_
|
||||
#define HABANALABSP_H_
|
||||
|
||||
#include "../include/common/armcp_if.h"
|
||||
#include "../include/common/cpucp_if.h"
|
||||
#include "../include/common/qman_if.h"
|
||||
#include <uapi/misc/habanalabs.h>
|
||||
|
||||
#include <linux/cdev.h>
|
||||
#include <linux/iopoll.h>
|
||||
#include <linux/irqreturn.h>
|
||||
#include <linux/dma-fence.h>
|
||||
#include <linux/dma-direction.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <linux/hashtable.h>
|
||||
#include <linux/bitfield.h>
|
||||
|
||||
#define HL_NAME "habanalabs"
|
||||
|
||||
#define HL_MMAP_CB_MASK (0x8000000000000000ull >> PAGE_SHIFT)
|
||||
/* Use upper bits of mmap offset to store habana driver specific information.
|
||||
* bits[63:62] - Encode mmap type
|
||||
* bits[45:0] - mmap offset value
|
||||
*
|
||||
* NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
|
||||
* defines are w.r.t to PAGE_SIZE
|
||||
*/
|
||||
#define HL_MMAP_TYPE_SHIFT (62 - PAGE_SHIFT)
|
||||
#define HL_MMAP_TYPE_MASK (0x3ull << HL_MMAP_TYPE_SHIFT)
|
||||
#define HL_MMAP_TYPE_CB (0x2ull << HL_MMAP_TYPE_SHIFT)
|
||||
|
||||
#define HL_MMAP_OFFSET_VALUE_MASK (0x3FFFFFFFFFFFull >> PAGE_SHIFT)
|
||||
#define HL_MMAP_OFFSET_VALUE_GET(off) (off & HL_MMAP_OFFSET_VALUE_MASK)
|
||||
|
||||
#define HL_PENDING_RESET_PER_SEC 30
|
||||
|
||||
@ -34,8 +46,8 @@
|
||||
|
||||
#define HL_PLL_LOW_JOB_FREQ_USEC 5000000 /* 5 s */
|
||||
|
||||
#define HL_ARMCP_INFO_TIMEOUT_USEC 10000000 /* 10s */
|
||||
#define HL_ARMCP_EEPROM_TIMEOUT_USEC 10000000 /* 10s */
|
||||
#define HL_CPUCP_INFO_TIMEOUT_USEC 10000000 /* 10s */
|
||||
#define HL_CPUCP_EEPROM_TIMEOUT_USEC 10000000 /* 10s */
|
||||
|
||||
#define HL_PCI_ELBI_TIMEOUT_MSEC 10 /* 10ms */
|
||||
|
||||
@ -66,6 +78,8 @@
|
||||
|
||||
#define HL_PCI_NUM_BARS 6
|
||||
|
||||
#define HL_MAX_DCORES 4
|
||||
|
||||
/**
|
||||
* struct pgt_info - MMU hop page info.
|
||||
* @node: hash linked-list node for the pgts shadow hash of pgts.
|
||||
@ -222,12 +236,15 @@ enum hl_device_hw_state {
|
||||
* @hop2_shift: shift of hop 2 mask.
|
||||
* @hop3_shift: shift of hop 3 mask.
|
||||
* @hop4_shift: shift of hop 4 mask.
|
||||
* @hop5_shift: shift of hop 5 mask.
|
||||
* @hop0_mask: mask to get the PTE address in hop 0.
|
||||
* @hop1_mask: mask to get the PTE address in hop 1.
|
||||
* @hop2_mask: mask to get the PTE address in hop 2.
|
||||
* @hop3_mask: mask to get the PTE address in hop 3.
|
||||
* @hop4_mask: mask to get the PTE address in hop 4.
|
||||
* @hop5_mask: mask to get the PTE address in hop 5.
|
||||
* @page_size: default page size used to allocate memory.
|
||||
* @num_hops: The amount of hops supported by the translation table.
|
||||
*/
|
||||
struct hl_mmu_properties {
|
||||
u64 start_addr;
|
||||
@ -237,18 +254,21 @@ struct hl_mmu_properties {
|
||||
u64 hop2_shift;
|
||||
u64 hop3_shift;
|
||||
u64 hop4_shift;
|
||||
u64 hop5_shift;
|
||||
u64 hop0_mask;
|
||||
u64 hop1_mask;
|
||||
u64 hop2_mask;
|
||||
u64 hop3_mask;
|
||||
u64 hop4_mask;
|
||||
u64 hop5_mask;
|
||||
u32 page_size;
|
||||
u32 num_hops;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct asic_fixed_properties - ASIC specific immutable properties.
|
||||
* @hw_queues_props: H/W queues properties.
|
||||
* @armcp_info: received various information from ArmCP regarding the H/W, e.g.
|
||||
* @cpucp_info: received various information from CPU-CP regarding the H/W, e.g.
|
||||
* available sensors.
|
||||
* @uboot_ver: F/W U-boot version.
|
||||
* @preboot_ver: F/W Preboot version.
|
||||
@ -271,6 +291,10 @@ struct hl_mmu_properties {
|
||||
* @pcie_aux_dbi_reg_addr: Address of the PCIE_AUX DBI register.
|
||||
* @mmu_pgt_addr: base physical address in DRAM of MMU page tables.
|
||||
* @mmu_dram_default_page_addr: DRAM default page physical address.
|
||||
* @cb_va_start_addr: virtual start address of command buffers which are mapped
|
||||
* to the device's MMU.
|
||||
* @cb_va_end_addr: virtual end address of command buffers which are mapped to
|
||||
* the device's MMU.
|
||||
* @mmu_pgt_size: MMU page tables total size.
|
||||
* @mmu_pte_size: PTE size in MMU page tables.
|
||||
* @mmu_hop_table_size: MMU hop table size.
|
||||
@ -292,12 +316,16 @@ struct hl_mmu_properties {
|
||||
* @max_queues: maximum amount of queues in the system
|
||||
* @sync_stream_first_sob: first sync object available for sync stream use
|
||||
* @sync_stream_first_mon: first monitor available for sync stream use
|
||||
* @first_available_user_sob: first sob available for the user
|
||||
* @first_available_user_mon: first monitor available for the user
|
||||
* @tpc_enabled_mask: which TPCs are enabled.
|
||||
* @completion_queues_count: number of completion queues.
|
||||
* @fw_security_disabled: true if security measures are disabled in firmware,
|
||||
* false otherwise
|
||||
*/
|
||||
struct asic_fixed_properties {
|
||||
struct hw_queue_properties *hw_queues_props;
|
||||
struct armcp_info armcp_info;
|
||||
struct cpucp_info cpucp_info;
|
||||
char uboot_ver[VERSION_MAX_LEN];
|
||||
char preboot_ver[VERSION_MAX_LEN];
|
||||
struct hl_mmu_properties dmmu;
|
||||
@ -317,6 +345,8 @@ struct asic_fixed_properties {
|
||||
u64 pcie_aux_dbi_reg_addr;
|
||||
u64 mmu_pgt_addr;
|
||||
u64 mmu_dram_default_page_addr;
|
||||
u64 cb_va_start_addr;
|
||||
u64 cb_va_end_addr;
|
||||
u32 mmu_pgt_size;
|
||||
u32 mmu_pte_size;
|
||||
u32 mmu_hop_table_size;
|
||||
@ -338,13 +368,29 @@ struct asic_fixed_properties {
|
||||
u32 max_queues;
|
||||
u16 sync_stream_first_sob;
|
||||
u16 sync_stream_first_mon;
|
||||
u16 first_available_user_sob[HL_MAX_DCORES];
|
||||
u16 first_available_user_mon[HL_MAX_DCORES];
|
||||
u8 tpc_enabled_mask;
|
||||
u8 completion_queues_count;
|
||||
u8 fw_security_disabled;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_fence - software synchronization primitive
|
||||
* @completion: fence is implemented using completion
|
||||
* @refcount: refcount for this fence
|
||||
* @error: mark this fence with error
|
||||
*
|
||||
*/
|
||||
struct hl_fence {
|
||||
struct completion completion;
|
||||
struct kref refcount;
|
||||
int error;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_cs_compl - command submission completion object.
|
||||
* @base_fence: kernel fence object.
|
||||
* @base_fence: hl fence object.
|
||||
* @lock: spinlock to protect fence.
|
||||
* @hdev: habanalabs device structure.
|
||||
* @hw_sob: the H/W SOB used in this signal/wait CS.
|
||||
@ -353,7 +399,7 @@ struct asic_fixed_properties {
|
||||
* @sob_val: the SOB value that is used in this signal/wait CS.
|
||||
*/
|
||||
struct hl_cs_compl {
|
||||
struct dma_fence base_fence;
|
||||
struct hl_fence base_fence;
|
||||
spinlock_t lock;
|
||||
struct hl_device *hdev;
|
||||
struct hl_hw_sob *hw_sob;
|
||||
@ -380,36 +426,41 @@ struct hl_cb_mgr {
|
||||
* struct hl_cb - describes a Command Buffer.
|
||||
* @refcount: reference counter for usage of the CB.
|
||||
* @hdev: pointer to device this CB belongs to.
|
||||
* @ctx: pointer to the CB owner's context.
|
||||
* @lock: spinlock to protect mmap/cs flows.
|
||||
* @debugfs_list: node in debugfs list of command buffers.
|
||||
* @pool_list: node in pool list of command buffers.
|
||||
* @va_block_list: list of virtual addresses blocks of the CB if it is mapped to
|
||||
* the device's MMU.
|
||||
* @id: the CB's ID.
|
||||
* @kernel_address: Holds the CB's kernel virtual address.
|
||||
* @bus_address: Holds the CB's DMA address.
|
||||
* @mmap_size: Holds the CB's size that was mmaped.
|
||||
* @size: holds the CB's size.
|
||||
* @id: the CB's ID.
|
||||
* @cs_cnt: holds number of CS that this CB participates in.
|
||||
* @ctx_id: holds the ID of the owner's context.
|
||||
* @mmap: true if the CB is currently mmaped to user.
|
||||
* @is_pool: true if CB was acquired from the pool, false otherwise.
|
||||
* @is_internal: internaly allocated
|
||||
* @is_mmu_mapped: true if the CB is mapped to the device's MMU.
|
||||
*/
|
||||
struct hl_cb {
|
||||
struct kref refcount;
|
||||
struct hl_device *hdev;
|
||||
struct hl_ctx *ctx;
|
||||
spinlock_t lock;
|
||||
struct list_head debugfs_list;
|
||||
struct list_head pool_list;
|
||||
struct list_head va_block_list;
|
||||
u64 id;
|
||||
u64 kernel_address;
|
||||
dma_addr_t bus_address;
|
||||
u32 mmap_size;
|
||||
u32 size;
|
||||
u32 id;
|
||||
u32 cs_cnt;
|
||||
u32 ctx_id;
|
||||
u8 mmap;
|
||||
u8 is_pool;
|
||||
u8 is_internal;
|
||||
u8 is_mmu_mapped;
|
||||
};
|
||||
|
||||
|
||||
@ -435,7 +486,7 @@ struct hl_cs_job;
|
||||
#define HL_EQ_LENGTH 64
|
||||
#define HL_EQ_SIZE_IN_BYTES (HL_EQ_LENGTH * HL_EQ_ENTRY_SIZE)
|
||||
|
||||
/* Host <-> ArmCP shared memory size */
|
||||
/* Host <-> CPU-CP shared memory size */
|
||||
#define HL_CPU_ACCESSIBLE_MEM_SIZE SZ_2M
|
||||
|
||||
/**
|
||||
@ -617,7 +668,7 @@ enum div_select_defs {
|
||||
* @debugfs_read32: debug interface for reading u32 from DRAM/SRAM.
|
||||
* @debugfs_write32: debug interface for writing u32 to DRAM/SRAM.
|
||||
* @add_device_attr: add ASIC specific device attributes.
|
||||
* @handle_eqe: handle event queue entry (IRQ) from ArmCP.
|
||||
* @handle_eqe: handle event queue entry (IRQ) from CPU-CP.
|
||||
* @set_pll_profile: change PLL profile (manual/automatic).
|
||||
* @get_events_stat: retrieve event queue entries histogram.
|
||||
* @read_pte: read MMU page table entry from DRAM.
|
||||
@ -626,7 +677,7 @@ enum div_select_defs {
|
||||
* (L1 only) or hard (L0 & L1) flush.
|
||||
* @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with
|
||||
* ASID-VA-size mask.
|
||||
* @send_heartbeat: send is-alive packet to ArmCP and verify response.
|
||||
* @send_heartbeat: send is-alive packet to CPU-CP and verify response.
|
||||
* @set_clock_gating: enable/disable clock gating per engine according to
|
||||
* clock gating mask in hdev
|
||||
* @disable_clock_gating: disable clock gating completely
|
||||
@ -644,8 +695,6 @@ enum div_select_defs {
|
||||
* ASIC
|
||||
* @get_hw_state: retrieve the H/W state
|
||||
* @pci_bars_map: Map PCI BARs.
|
||||
* @set_dram_bar_base: Set DRAM BAR to map specific device address. Returns
|
||||
* old address the bar pointed to or U64_MAX for failure
|
||||
* @init_iatu: Initialize the iATU unit inside the PCI controller.
|
||||
* @rreg: Read a register. Needed for simulator support.
|
||||
* @wreg: Write a register. Needed for simulator support.
|
||||
@ -679,7 +728,7 @@ struct hl_asic_funcs {
|
||||
int (*suspend)(struct hl_device *hdev);
|
||||
int (*resume)(struct hl_device *hdev);
|
||||
int (*cb_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
|
||||
u64 kaddress, phys_addr_t paddress, u32 size);
|
||||
void *cpu_addr, dma_addr_t dma_addr, size_t size);
|
||||
void (*ring_doorbell)(struct hl_device *hdev, u32 hw_queue_id, u32 pi);
|
||||
void (*pqe_write)(struct hl_device *hdev, __le64 *pqe,
|
||||
struct hl_bd *bd);
|
||||
@ -736,7 +785,7 @@ struct hl_asic_funcs {
|
||||
void (*set_clock_gating)(struct hl_device *hdev);
|
||||
void (*disable_clock_gating)(struct hl_device *hdev);
|
||||
int (*debug_coresight)(struct hl_device *hdev, void *data);
|
||||
bool (*is_device_idle)(struct hl_device *hdev, u32 *mask,
|
||||
bool (*is_device_idle)(struct hl_device *hdev, u64 *mask,
|
||||
struct seq_file *s);
|
||||
int (*soft_reset_late_init)(struct hl_device *hdev);
|
||||
void (*hw_queues_lock)(struct hl_device *hdev);
|
||||
@ -748,7 +797,6 @@ struct hl_asic_funcs {
|
||||
u16 len, u32 timeout, long *result);
|
||||
enum hl_device_hw_state (*get_hw_state)(struct hl_device *hdev);
|
||||
int (*pci_bars_map)(struct hl_device *hdev);
|
||||
u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr);
|
||||
int (*init_iatu)(struct hl_device *hdev);
|
||||
u32 (*rreg)(struct hl_device *hdev, u32 reg);
|
||||
void (*wreg)(struct hl_device *hdev, u32 reg, u32 val);
|
||||
@ -800,7 +848,7 @@ struct hl_va_range {
|
||||
* @hdev: pointer to the device structure.
|
||||
* @refcount: reference counter for the context. Context is released only when
|
||||
* this hits 0l. It is incremented on CS and CS_WAIT.
|
||||
* @cs_pending: array of DMA fence objects representing pending CS.
|
||||
* @cs_pending: array of hl fence objects representing pending CS.
|
||||
* @host_va_range: holds available virtual addresses for host mappings.
|
||||
* @host_huge_va_range: holds available virtual addresses for host mappings
|
||||
* with huge pages.
|
||||
@ -809,6 +857,8 @@ struct hl_va_range {
|
||||
* @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the
|
||||
* MMU hash or walking the PGT requires talking this lock.
|
||||
* @debugfs_list: node in debugfs list of contexts.
|
||||
* @cb_va_pool: device VA pool for command buffers which are mapped to the
|
||||
* device's MMU.
|
||||
* @cs_sequence: sequence number for CS. Value is assigned to a CS and passed
|
||||
* to user so user could inquire about CS. It is used as
|
||||
* index to cs_pending array.
|
||||
@ -832,7 +882,7 @@ struct hl_ctx {
|
||||
struct hl_fpriv *hpriv;
|
||||
struct hl_device *hdev;
|
||||
struct kref refcount;
|
||||
struct dma_fence **cs_pending;
|
||||
struct hl_fence **cs_pending;
|
||||
struct hl_va_range *host_va_range;
|
||||
struct hl_va_range *host_huge_va_range;
|
||||
struct hl_va_range *dram_va_range;
|
||||
@ -840,6 +890,7 @@ struct hl_ctx {
|
||||
struct mutex mmu_lock;
|
||||
struct list_head debugfs_list;
|
||||
struct hl_cs_counters cs_counters;
|
||||
struct gen_pool *cb_va_pool;
|
||||
u64 cs_sequence;
|
||||
u64 *dram_default_hops;
|
||||
spinlock_t cs_lock;
|
||||
@ -919,8 +970,8 @@ struct hl_cs {
|
||||
struct list_head job_list;
|
||||
spinlock_t job_lock;
|
||||
struct kref refcount;
|
||||
struct dma_fence *fence;
|
||||
struct dma_fence *signal_fence;
|
||||
struct hl_fence *fence;
|
||||
struct hl_fence *signal_fence;
|
||||
struct work_struct finish_work;
|
||||
struct delayed_work work_tdr;
|
||||
struct list_head mirror_node;
|
||||
@ -1395,6 +1446,44 @@ struct hl_device_idle_busy_ts {
|
||||
ktime_t busy_to_idle_ts;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* struct hl_mmu_priv - used for holding per-device mmu internal information.
|
||||
* @mmu_pgt_pool: pool of page tables used by MMU for allocating hops.
|
||||
* @mmu_shadow_hop0: shadow array of hop0 tables.
|
||||
*/
|
||||
struct hl_mmu_priv {
|
||||
struct gen_pool *mmu_pgt_pool;
|
||||
void *mmu_shadow_hop0;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_mmu_funcs - Device related MMU functions.
|
||||
* @init: initialize the MMU module.
|
||||
* @fini: release the MMU module.
|
||||
* @ctx_init: Initialize a context for using the MMU module.
|
||||
* @ctx_fini: disable a ctx from using the mmu module.
|
||||
* @map: maps a virtual address to physical address for a context.
|
||||
* @unmap: unmap a virtual address of a context.
|
||||
* @flush: flush all writes from all cores to reach device MMU.
|
||||
* @swap_out: marks all mapping of the given context as swapped out.
|
||||
* @swap_in: marks all mapping of the given context as swapped in.
|
||||
*/
|
||||
struct hl_mmu_funcs {
|
||||
int (*init)(struct hl_device *hdev);
|
||||
void (*fini)(struct hl_device *hdev);
|
||||
int (*ctx_init)(struct hl_ctx *ctx);
|
||||
void (*ctx_fini)(struct hl_ctx *ctx);
|
||||
int (*map)(struct hl_ctx *ctx,
|
||||
u64 virt_addr, u64 phys_addr, u32 page_size,
|
||||
bool is_dram_addr);
|
||||
int (*unmap)(struct hl_ctx *ctx,
|
||||
u64 virt_addr, bool is_dram_addr);
|
||||
void (*flush)(struct hl_ctx *ctx);
|
||||
void (*swap_out)(struct hl_ctx *ctx);
|
||||
void (*swap_in)(struct hl_ctx *ctx);
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_device - habanalabs device structure.
|
||||
* @pdev: pointer to PCI device, can be NULL in case of simulator device.
|
||||
@ -1407,8 +1496,8 @@ struct hl_device_idle_busy_ts {
|
||||
* @dev: related kernel basic device structure.
|
||||
* @dev_ctrl: related kernel device structure for the control device
|
||||
* @work_freq: delayed work to lower device frequency if possible.
|
||||
* @work_heartbeat: delayed work for ArmCP is-alive check.
|
||||
* @asic_name: ASIC specific nmae.
|
||||
* @work_heartbeat: delayed work for CPU-CP is-alive check.
|
||||
* @asic_name: ASIC specific name.
|
||||
* @asic_type: ASIC specific type.
|
||||
* @completion_queue: array of hl_cq.
|
||||
* @cq_wq: work queues of completion queues for executing work in process
|
||||
@ -1419,22 +1508,20 @@ struct hl_device_idle_busy_ts {
|
||||
* @hw_queues_mirror_list: CS mirror list for TDR.
|
||||
* @hw_queues_mirror_lock: protects hw_queues_mirror_list.
|
||||
* @kernel_cb_mgr: command buffer manager for creating/destroying/handling CGs.
|
||||
* @event_queue: event queue for IRQ from ArmCP.
|
||||
* @event_queue: event queue for IRQ from CPU-CP.
|
||||
* @dma_pool: DMA pool for small allocations.
|
||||
* @cpu_accessible_dma_mem: Host <-> ArmCP shared memory CPU address.
|
||||
* @cpu_accessible_dma_address: Host <-> ArmCP shared memory DMA address.
|
||||
* @cpu_accessible_dma_pool: Host <-> ArmCP shared memory pool.
|
||||
* @cpu_accessible_dma_mem: Host <-> CPU-CP shared memory CPU address.
|
||||
* @cpu_accessible_dma_address: Host <-> CPU-CP shared memory DMA address.
|
||||
* @cpu_accessible_dma_pool: Host <-> CPU-CP shared memory pool.
|
||||
* @asid_bitmap: holds used/available ASIDs.
|
||||
* @asid_mutex: protects asid_bitmap.
|
||||
* @send_cpu_message_lock: enforces only one message in Host <-> ArmCP queue.
|
||||
* @send_cpu_message_lock: enforces only one message in Host <-> CPU-CP queue.
|
||||
* @debug_lock: protects critical section of setting debug mode for device
|
||||
* @asic_prop: ASIC specific immutable properties.
|
||||
* @asic_funcs: ASIC specific functions.
|
||||
* @asic_specific: ASIC specific information to use only from ASIC files.
|
||||
* @mmu_pgt_pool: pool of available MMU hops.
|
||||
* @vm: virtual memory manager for MMU.
|
||||
* @mmu_cache_lock: protects MMU cache invalidation as it can serve one context.
|
||||
* @mmu_shadow_hop0: shadow mapping of the MMU hop 0 zone.
|
||||
* @hwmon_dev: H/W monitor device.
|
||||
* @pm_mng_profile: current power management profile.
|
||||
* @hl_chip_info: ASIC's sensors information.
|
||||
@ -1452,6 +1539,8 @@ struct hl_device_idle_busy_ts {
|
||||
* @idle_busy_ts_arr: array to hold time stamps of transitions from idle to busy
|
||||
* and vice-versa
|
||||
* @aggregated_cs_counters: aggregated cs counters among all contexts
|
||||
* @mmu_priv: device-specific MMU data.
|
||||
* @mmu_func: device-related MMU functions.
|
||||
* @dram_used_mem: current DRAM memory consumption.
|
||||
* @timeout_jiffies: device CS timeout value.
|
||||
* @max_power: the max power of the device, as configured by the sysadmin. This
|
||||
@ -1471,6 +1560,7 @@ struct hl_device_idle_busy_ts {
|
||||
* @soft_reset_cnt: number of soft reset since the driver was loaded.
|
||||
* @hard_reset_cnt: number of hard reset since the driver was loaded.
|
||||
* @idle_busy_ts_idx: index of current entry in idle_busy_ts_arr
|
||||
* @clk_throttling_reason: bitmask represents the current clk throttling reasons
|
||||
* @id: device minor.
|
||||
* @id_control: minor of the control device
|
||||
* @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit
|
||||
@ -1479,7 +1569,7 @@ struct hl_device_idle_busy_ts {
|
||||
* @late_init_done: is late init stage was done during initialization.
|
||||
* @hwmon_initialized: is H/W monitor sensors was initialized.
|
||||
* @hard_reset_pending: is there a hard reset work pending.
|
||||
* @heartbeat: is heartbeat sanity check towards ArmCP enabled.
|
||||
* @heartbeat: is heartbeat sanity check towards CPU-CP enabled.
|
||||
* @reset_on_lockup: true if a reset should be done in case of stuck CS, false
|
||||
* otherwise.
|
||||
* @dram_supports_virtual_memory: is MMU enabled towards DRAM.
|
||||
@ -1501,6 +1591,7 @@ struct hl_device_idle_busy_ts {
|
||||
* @sync_stream_queue_idx: helper index for sync stream queues initialization.
|
||||
* @supports_coresight: is CoreSight supported.
|
||||
* @supports_soft_reset: is soft reset supported.
|
||||
* @supports_cb_mapping: is mapping a CB to the device's MMU supported.
|
||||
*/
|
||||
struct hl_device {
|
||||
struct pci_dev *pdev;
|
||||
@ -1513,7 +1604,7 @@ struct hl_device {
|
||||
struct device *dev_ctrl;
|
||||
struct delayed_work work_freq;
|
||||
struct delayed_work work_heartbeat;
|
||||
char asic_name[16];
|
||||
char asic_name[32];
|
||||
enum hl_asic_type asic_type;
|
||||
struct hl_cq *completion_queue;
|
||||
struct workqueue_struct **cq_wq;
|
||||
@ -1535,10 +1626,8 @@ struct hl_device {
|
||||
struct asic_fixed_properties asic_prop;
|
||||
const struct hl_asic_funcs *asic_funcs;
|
||||
void *asic_specific;
|
||||
struct gen_pool *mmu_pgt_pool;
|
||||
struct hl_vm vm;
|
||||
struct mutex mmu_cache_lock;
|
||||
void *mmu_shadow_hop0;
|
||||
struct device *hwmon_dev;
|
||||
enum hl_pm_mng_profile pm_mng_profile;
|
||||
struct hwmon_chip_info *hl_chip_info;
|
||||
@ -1562,19 +1651,23 @@ struct hl_device {
|
||||
|
||||
struct hl_cs_counters aggregated_cs_counters;
|
||||
|
||||
struct hl_mmu_priv mmu_priv;
|
||||
struct hl_mmu_funcs mmu_func;
|
||||
|
||||
atomic64_t dram_used_mem;
|
||||
u64 timeout_jiffies;
|
||||
u64 max_power;
|
||||
u64 clock_gating_mask;
|
||||
atomic_t in_reset;
|
||||
enum hl_pll_frequency curr_pll_profile;
|
||||
enum armcp_card_types card_type;
|
||||
enum cpucp_card_types card_type;
|
||||
int cs_active_cnt;
|
||||
u32 major;
|
||||
u32 high_pll;
|
||||
u32 soft_reset_cnt;
|
||||
u32 hard_reset_cnt;
|
||||
u32 idle_busy_ts_idx;
|
||||
u32 clk_throttling_reason;
|
||||
u16 id;
|
||||
u16 id_control;
|
||||
u16 cpu_pci_msb_addr;
|
||||
@ -1598,6 +1691,7 @@ struct hl_device {
|
||||
u8 sync_stream_queue_idx;
|
||||
u8 supports_coresight;
|
||||
u8 supports_soft_reset;
|
||||
u8 supports_cb_mapping;
|
||||
|
||||
/* Parameters for bring-up */
|
||||
u8 mmu_enable;
|
||||
@ -1739,7 +1833,7 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx);
|
||||
void hl_ctx_do_release(struct kref *ref);
|
||||
void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx);
|
||||
int hl_ctx_put(struct hl_ctx *ctx);
|
||||
struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq);
|
||||
struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq);
|
||||
void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr);
|
||||
void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr);
|
||||
|
||||
@ -1755,7 +1849,7 @@ int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq);
|
||||
uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms);
|
||||
|
||||
int hl_build_hwmon_channel_info(struct hl_device *hdev,
|
||||
struct armcp_sensor *sensors_arr);
|
||||
struct cpucp_sensor *sensors_arr);
|
||||
|
||||
int hl_sysfs_init(struct hl_device *hdev);
|
||||
void hl_sysfs_fini(struct hl_device *hdev);
|
||||
@ -1763,8 +1857,9 @@ void hl_sysfs_fini(struct hl_device *hdev);
|
||||
int hl_hwmon_init(struct hl_device *hdev);
|
||||
void hl_hwmon_fini(struct hl_device *hdev);
|
||||
|
||||
int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, u32 cb_size,
|
||||
u64 *handle, int ctx_id, bool internal_cb);
|
||||
int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
|
||||
struct hl_ctx *ctx, u32 cb_size, bool internal_cb,
|
||||
bool map_cb, u64 *handle);
|
||||
int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle);
|
||||
int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
|
||||
struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr,
|
||||
@ -1776,11 +1871,15 @@ struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size,
|
||||
bool internal_cb);
|
||||
int hl_cb_pool_init(struct hl_device *hdev);
|
||||
int hl_cb_pool_fini(struct hl_device *hdev);
|
||||
int hl_cb_va_pool_init(struct hl_ctx *ctx);
|
||||
void hl_cb_va_pool_fini(struct hl_ctx *ctx);
|
||||
|
||||
void hl_cs_rollback_all(struct hl_device *hdev);
|
||||
struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
|
||||
enum hl_queue_type queue_type, bool is_kernel_allocated_cb);
|
||||
void hl_sob_reset_error(struct kref *ref);
|
||||
void hl_fence_put(struct hl_fence *fence);
|
||||
void hl_fence_get(struct hl_fence *fence);
|
||||
|
||||
void goya_set_asic_funcs(struct hl_device *hdev);
|
||||
void gaudi_set_asic_funcs(struct hl_device *hdev);
|
||||
@ -1810,6 +1909,8 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
|
||||
bool flush_pte);
|
||||
void hl_mmu_swap_out(struct hl_ctx *ctx);
|
||||
void hl_mmu_swap_in(struct hl_ctx *ctx);
|
||||
int hl_mmu_if_set_funcs(struct hl_device *hdev);
|
||||
void hl_mmu_v1_set_funcs(struct hl_device *hdev);
|
||||
|
||||
int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
|
||||
void __iomem *dst);
|
||||
@ -1825,23 +1926,28 @@ void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
|
||||
void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
|
||||
void *vaddr);
|
||||
int hl_fw_send_heartbeat(struct hl_device *hdev);
|
||||
int hl_fw_armcp_info_get(struct hl_device *hdev);
|
||||
int hl_fw_cpucp_info_get(struct hl_device *hdev);
|
||||
int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size);
|
||||
int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
|
||||
struct hl_info_pci_counters *counters);
|
||||
int hl_fw_cpucp_total_energy_get(struct hl_device *hdev,
|
||||
u64 *total_energy);
|
||||
int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
|
||||
u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
|
||||
u32 boot_err0_reg, bool skip_bmc,
|
||||
u32 cpu_timeout, u32 boot_fit_timeout);
|
||||
int hl_fw_read_preboot_ver(struct hl_device *hdev, u32 cpu_boot_status_reg,
|
||||
u32 boot_err0_reg, u32 timeout);
|
||||
|
||||
int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
|
||||
bool is_wc[3]);
|
||||
int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data);
|
||||
int hl_pci_set_dram_bar_base(struct hl_device *hdev, u8 inbound_region, u8 bar,
|
||||
u64 addr);
|
||||
int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
|
||||
struct hl_inbound_pci_region *pci_region);
|
||||
int hl_pci_set_outbound_region(struct hl_device *hdev,
|
||||
struct hl_outbound_pci_region *pci_region);
|
||||
int hl_pci_init(struct hl_device *hdev);
|
||||
int hl_pci_init(struct hl_device *hdev, u32 cpu_boot_status_reg,
|
||||
u32 boot_err0_reg, u32 preboot_ver_timeout);
|
||||
void hl_pci_fini(struct hl_device *hdev);
|
||||
|
||||
long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr);
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "habanalabs.h"
|
||||
|
||||
#include <linux/pci.h>
|
||||
#include <linux/aer.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#define HL_DRIVER_AUTHOR "HabanaLabs Kernel Driver Team"
|
||||
@ -408,6 +409,8 @@ static int hl_pci_probe(struct pci_dev *pdev,
|
||||
|
||||
pci_set_drvdata(pdev, hdev);
|
||||
|
||||
pci_enable_pcie_error_reporting(pdev);
|
||||
|
||||
rc = hl_device_init(hdev, hl_class);
|
||||
if (rc) {
|
||||
dev_err(&pdev->dev, "Fatal error during habanalabs device init\n");
|
||||
@ -440,22 +443,93 @@ static void hl_pci_remove(struct pci_dev *pdev)
|
||||
return;
|
||||
|
||||
hl_device_fini(hdev);
|
||||
pci_disable_pcie_error_reporting(pdev);
|
||||
pci_set_drvdata(pdev, NULL);
|
||||
|
||||
destroy_hdev(hdev);
|
||||
}
|
||||
|
||||
/**
|
||||
* hl_pci_err_detected - a PCI bus error detected on this device
|
||||
*
|
||||
* @pdev: pointer to pci device
|
||||
* @state: PCI error type
|
||||
*
|
||||
* Called by the PCI subsystem whenever a non-correctable
|
||||
* PCI bus error is detected
|
||||
*/
|
||||
static pci_ers_result_t
|
||||
hl_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state)
|
||||
{
|
||||
struct hl_device *hdev = pci_get_drvdata(pdev);
|
||||
enum pci_ers_result result;
|
||||
|
||||
switch (state) {
|
||||
case pci_channel_io_normal:
|
||||
return PCI_ERS_RESULT_CAN_RECOVER;
|
||||
|
||||
case pci_channel_io_frozen:
|
||||
dev_warn(hdev->dev, "frozen state error detected\n");
|
||||
result = PCI_ERS_RESULT_NEED_RESET;
|
||||
break;
|
||||
|
||||
case pci_channel_io_perm_failure:
|
||||
dev_warn(hdev->dev, "failure state error detected\n");
|
||||
result = PCI_ERS_RESULT_DISCONNECT;
|
||||
break;
|
||||
|
||||
default:
|
||||
result = PCI_ERS_RESULT_NONE;
|
||||
}
|
||||
|
||||
hdev->asic_funcs->halt_engines(hdev, true);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* hl_pci_err_resume - resume after a PCI slot reset
|
||||
*
|
||||
* @pdev: pointer to pci device
|
||||
*
|
||||
*/
|
||||
static void hl_pci_err_resume(struct pci_dev *pdev)
|
||||
{
|
||||
struct hl_device *hdev = pci_get_drvdata(pdev);
|
||||
|
||||
dev_warn(hdev->dev, "Resuming device after PCI slot reset\n");
|
||||
hl_device_resume(hdev);
|
||||
}
|
||||
|
||||
/**
|
||||
* hl_pci_err_slot_reset - a PCI slot reset has just happened
|
||||
*
|
||||
* @pdev: pointer to pci device
|
||||
*
|
||||
* Determine if the driver can recover from the PCI slot reset
|
||||
*/
|
||||
static pci_ers_result_t hl_pci_err_slot_reset(struct pci_dev *pdev)
|
||||
{
|
||||
return PCI_ERS_RESULT_RECOVERED;
|
||||
}
|
||||
|
||||
static const struct dev_pm_ops hl_pm_ops = {
|
||||
.suspend = hl_pmops_suspend,
|
||||
.resume = hl_pmops_resume,
|
||||
};
|
||||
|
||||
static const struct pci_error_handlers hl_pci_err_handler = {
|
||||
.error_detected = hl_pci_err_detected,
|
||||
.slot_reset = hl_pci_err_slot_reset,
|
||||
.resume = hl_pci_err_resume,
|
||||
};
|
||||
|
||||
static struct pci_driver hl_pci_driver = {
|
||||
.name = HL_NAME,
|
||||
.id_table = ids,
|
||||
.probe = hl_pci_probe,
|
||||
.remove = hl_pci_remove,
|
||||
.driver.pm = &hl_pm_ops,
|
||||
.err_handler = &hl_pci_err_handler,
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <uapi/misc/habanalabs.h>
|
||||
#include "habanalabs.h"
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/slab.h>
|
||||
@ -64,14 +65,14 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
|
||||
hw_ip.dram_enabled = 1;
|
||||
hw_ip.num_of_events = prop->num_of_events;
|
||||
|
||||
memcpy(hw_ip.armcp_version, prop->armcp_info.armcp_version,
|
||||
memcpy(hw_ip.cpucp_version, prop->cpucp_info.cpucp_version,
|
||||
min(VERSION_MAX_LEN, HL_INFO_VERSION_MAX_LEN));
|
||||
|
||||
memcpy(hw_ip.card_name, prop->armcp_info.card_name,
|
||||
memcpy(hw_ip.card_name, prop->cpucp_info.card_name,
|
||||
min(CARD_NAME_MAX_LEN, HL_INFO_CARD_NAME_MAX_LEN));
|
||||
|
||||
hw_ip.armcp_cpld_version = le32_to_cpu(prop->armcp_info.cpld_version);
|
||||
hw_ip.module_id = le32_to_cpu(prop->armcp_info.card_location);
|
||||
hw_ip.cpld_version = le32_to_cpu(prop->cpucp_info.cpld_version);
|
||||
hw_ip.module_id = le32_to_cpu(prop->cpucp_info.card_location);
|
||||
|
||||
hw_ip.psoc_pci_pll_nr = prop->psoc_pci_pll_nr;
|
||||
hw_ip.psoc_pci_pll_nf = prop->psoc_pci_pll_nf;
|
||||
@ -131,7 +132,7 @@ static int hw_idle(struct hl_device *hdev, struct hl_info_args *args)
|
||||
return -EINVAL;
|
||||
|
||||
hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev,
|
||||
&hw_idle.busy_engines_mask, NULL);
|
||||
&hw_idle.busy_engines_mask_ext, NULL);
|
||||
|
||||
return copy_to_user(out, &hw_idle,
|
||||
min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0;
|
||||
@ -276,10 +277,45 @@ static int time_sync_info(struct hl_device *hdev, struct hl_info_args *args)
|
||||
min((size_t) max_size, sizeof(time_sync))) ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
static int pci_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
|
||||
{
|
||||
struct hl_device *hdev = hpriv->hdev;
|
||||
struct hl_info_pci_counters pci_counters = {0};
|
||||
u32 max_size = args->return_size;
|
||||
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
|
||||
int rc;
|
||||
|
||||
if ((!max_size) || (!out))
|
||||
return -EINVAL;
|
||||
|
||||
rc = hl_fw_cpucp_pci_counters_get(hdev, &pci_counters);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
return copy_to_user(out, &pci_counters,
|
||||
min((size_t) max_size, sizeof(pci_counters))) ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
static int clk_throttle_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
|
||||
{
|
||||
struct hl_device *hdev = hpriv->hdev;
|
||||
struct hl_info_clk_throttle clk_throttle = {0};
|
||||
u32 max_size = args->return_size;
|
||||
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
|
||||
|
||||
if ((!max_size) || (!out))
|
||||
return -EINVAL;
|
||||
|
||||
clk_throttle.clk_throttling_reason = hdev->clk_throttling_reason;
|
||||
|
||||
return copy_to_user(out, &clk_throttle,
|
||||
min((size_t) max_size, sizeof(clk_throttle))) ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
|
||||
{
|
||||
struct hl_device *hdev = hpriv->hdev;
|
||||
struct hl_info_cs_counters cs_counters = {0};
|
||||
struct hl_info_cs_counters cs_counters = { {0} };
|
||||
u32 max_size = args->return_size;
|
||||
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
|
||||
|
||||
@ -297,6 +333,51 @@ static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
|
||||
min((size_t) max_size, sizeof(cs_counters))) ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
static int sync_manager_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
|
||||
{
|
||||
struct hl_device *hdev = hpriv->hdev;
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
struct hl_info_sync_manager sm_info = {0};
|
||||
u32 max_size = args->return_size;
|
||||
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
|
||||
|
||||
if ((!max_size) || (!out))
|
||||
return -EINVAL;
|
||||
|
||||
if (args->dcore_id >= HL_MAX_DCORES)
|
||||
return -EINVAL;
|
||||
|
||||
sm_info.first_available_sync_object =
|
||||
prop->first_available_user_sob[args->dcore_id];
|
||||
sm_info.first_available_monitor =
|
||||
prop->first_available_user_mon[args->dcore_id];
|
||||
|
||||
|
||||
return copy_to_user(out, &sm_info, min_t(size_t, (size_t) max_size,
|
||||
sizeof(sm_info))) ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
static int total_energy_consumption_info(struct hl_fpriv *hpriv,
|
||||
struct hl_info_args *args)
|
||||
{
|
||||
struct hl_device *hdev = hpriv->hdev;
|
||||
struct hl_info_energy total_energy = {0};
|
||||
u32 max_size = args->return_size;
|
||||
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
|
||||
int rc;
|
||||
|
||||
if ((!max_size) || (!out))
|
||||
return -EINVAL;
|
||||
|
||||
rc = hl_fw_cpucp_total_energy_get(hdev,
|
||||
&total_energy.total_energy_consumption);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
return copy_to_user(out, &total_energy,
|
||||
min((size_t) max_size, sizeof(total_energy))) ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
|
||||
struct device *dev)
|
||||
{
|
||||
@ -360,6 +441,18 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
|
||||
case HL_INFO_CS_COUNTERS:
|
||||
return cs_counters_info(hpriv, args);
|
||||
|
||||
case HL_INFO_PCI_COUNTERS:
|
||||
return pci_counters_info(hpriv, args);
|
||||
|
||||
case HL_INFO_CLK_THROTTLE_REASON:
|
||||
return clk_throttle_info(hpriv, args);
|
||||
|
||||
case HL_INFO_SYNC_MANAGER:
|
||||
return sync_manager_info(hpriv, args);
|
||||
|
||||
case HL_INFO_TOTAL_ENERGY:
|
||||
return total_energy_consumption_info(hpriv, args);
|
||||
|
||||
default:
|
||||
dev_err(dev, "Invalid request %d\n", args->op);
|
||||
rc = -ENOTTY;
|
||||
|
@ -288,10 +288,10 @@ static void ext_queue_schedule_job(struct hl_cs_job *job)
|
||||
ptr = cb->bus_address;
|
||||
|
||||
cq_pkt.data = cpu_to_le32(
|
||||
((q->pi << CQ_ENTRY_SHADOW_INDEX_SHIFT)
|
||||
& CQ_ENTRY_SHADOW_INDEX_MASK) |
|
||||
(1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) |
|
||||
(1 << CQ_ENTRY_READY_SHIFT));
|
||||
((q->pi << CQ_ENTRY_SHADOW_INDEX_SHIFT)
|
||||
& CQ_ENTRY_SHADOW_INDEX_MASK) |
|
||||
FIELD_PREP(CQ_ENTRY_SHADOW_INDEX_VALID_MASK, 1) |
|
||||
FIELD_PREP(CQ_ENTRY_READY_MASK, 1));
|
||||
|
||||
/*
|
||||
* No need to protect pi_offset because scheduling to the
|
||||
@ -474,7 +474,7 @@ static void init_signal_wait_cs(struct hl_cs *cs)
|
||||
* wait CS was submitted.
|
||||
*/
|
||||
mb();
|
||||
dma_fence_put(cs->signal_fence);
|
||||
hl_fence_put(cs->signal_fence);
|
||||
cs->signal_fence = NULL;
|
||||
}
|
||||
}
|
||||
|
@ -13,7 +13,7 @@
|
||||
#define HWMON_NR_SENSOR_TYPES (hwmon_pwm + 1)
|
||||
|
||||
int hl_build_hwmon_channel_info(struct hl_device *hdev,
|
||||
struct armcp_sensor *sensors_arr)
|
||||
struct cpucp_sensor *sensors_arr)
|
||||
{
|
||||
u32 counts[HWMON_NR_SENSOR_TYPES] = {0};
|
||||
u32 *sensors_by_type[HWMON_NR_SENSOR_TYPES] = {NULL};
|
||||
@ -24,7 +24,7 @@ int hl_build_hwmon_channel_info(struct hl_device *hdev,
|
||||
enum hwmon_sensor_types type;
|
||||
int rc, i, j;
|
||||
|
||||
for (i = 0 ; i < ARMCP_MAX_SENSORS ; i++) {
|
||||
for (i = 0 ; i < CPUCP_MAX_SENSORS ; i++) {
|
||||
type = le32_to_cpu(sensors_arr[i].type);
|
||||
|
||||
if ((type == 0) && (sensors_arr[i].flags == 0))
|
||||
@ -311,13 +311,13 @@ static const struct hwmon_ops hl_hwmon_ops = {
|
||||
int hl_get_temperature(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long *value)
|
||||
{
|
||||
struct armcp_packet pkt;
|
||||
struct cpucp_packet pkt;
|
||||
int rc;
|
||||
|
||||
memset(&pkt, 0, sizeof(pkt));
|
||||
|
||||
pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_GET <<
|
||||
ARMCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEMPERATURE_GET <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.sensor_index = __cpu_to_le16(sensor_index);
|
||||
pkt.type = __cpu_to_le16(attr);
|
||||
|
||||
@ -337,13 +337,13 @@ int hl_get_temperature(struct hl_device *hdev,
|
||||
int hl_set_temperature(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long value)
|
||||
{
|
||||
struct armcp_packet pkt;
|
||||
struct cpucp_packet pkt;
|
||||
int rc;
|
||||
|
||||
memset(&pkt, 0, sizeof(pkt));
|
||||
|
||||
pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_SET <<
|
||||
ARMCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEMPERATURE_SET <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.sensor_index = __cpu_to_le16(sensor_index);
|
||||
pkt.type = __cpu_to_le16(attr);
|
||||
pkt.value = __cpu_to_le64(value);
|
||||
@ -362,13 +362,13 @@ int hl_set_temperature(struct hl_device *hdev,
|
||||
int hl_get_voltage(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long *value)
|
||||
{
|
||||
struct armcp_packet pkt;
|
||||
struct cpucp_packet pkt;
|
||||
int rc;
|
||||
|
||||
memset(&pkt, 0, sizeof(pkt));
|
||||
|
||||
pkt.ctl = cpu_to_le32(ARMCP_PACKET_VOLTAGE_GET <<
|
||||
ARMCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_VOLTAGE_GET <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.sensor_index = __cpu_to_le16(sensor_index);
|
||||
pkt.type = __cpu_to_le16(attr);
|
||||
|
||||
@ -388,13 +388,13 @@ int hl_get_voltage(struct hl_device *hdev,
|
||||
int hl_get_current(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long *value)
|
||||
{
|
||||
struct armcp_packet pkt;
|
||||
struct cpucp_packet pkt;
|
||||
int rc;
|
||||
|
||||
memset(&pkt, 0, sizeof(pkt));
|
||||
|
||||
pkt.ctl = cpu_to_le32(ARMCP_PACKET_CURRENT_GET <<
|
||||
ARMCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_CURRENT_GET <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.sensor_index = __cpu_to_le16(sensor_index);
|
||||
pkt.type = __cpu_to_le16(attr);
|
||||
|
||||
@ -414,13 +414,13 @@ int hl_get_current(struct hl_device *hdev,
|
||||
int hl_get_fan_speed(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long *value)
|
||||
{
|
||||
struct armcp_packet pkt;
|
||||
struct cpucp_packet pkt;
|
||||
int rc;
|
||||
|
||||
memset(&pkt, 0, sizeof(pkt));
|
||||
|
||||
pkt.ctl = cpu_to_le32(ARMCP_PACKET_FAN_SPEED_GET <<
|
||||
ARMCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_FAN_SPEED_GET <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.sensor_index = __cpu_to_le16(sensor_index);
|
||||
pkt.type = __cpu_to_le16(attr);
|
||||
|
||||
@ -440,13 +440,13 @@ int hl_get_fan_speed(struct hl_device *hdev,
|
||||
int hl_get_pwm_info(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long *value)
|
||||
{
|
||||
struct armcp_packet pkt;
|
||||
struct cpucp_packet pkt;
|
||||
int rc;
|
||||
|
||||
memset(&pkt, 0, sizeof(pkt));
|
||||
|
||||
pkt.ctl = cpu_to_le32(ARMCP_PACKET_PWM_GET <<
|
||||
ARMCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_PWM_GET <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.sensor_index = __cpu_to_le16(sensor_index);
|
||||
pkt.type = __cpu_to_le16(attr);
|
||||
|
||||
@ -466,13 +466,13 @@ int hl_get_pwm_info(struct hl_device *hdev,
|
||||
void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
|
||||
long value)
|
||||
{
|
||||
struct armcp_packet pkt;
|
||||
struct cpucp_packet pkt;
|
||||
int rc;
|
||||
|
||||
memset(&pkt, 0, sizeof(pkt));
|
||||
|
||||
pkt.ctl = cpu_to_le32(ARMCP_PACKET_PWM_SET <<
|
||||
ARMCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_PWM_SET <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.sensor_index = __cpu_to_le16(sensor_index);
|
||||
pkt.type = __cpu_to_le16(attr);
|
||||
pkt.value = cpu_to_le64(value);
|
||||
@ -489,13 +489,13 @@ void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
|
||||
int hl_set_voltage(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long value)
|
||||
{
|
||||
struct armcp_packet pkt;
|
||||
struct cpucp_packet pkt;
|
||||
int rc;
|
||||
|
||||
memset(&pkt, 0, sizeof(pkt));
|
||||
|
||||
pkt.ctl = cpu_to_le32(ARMCP_PACKET_VOLTAGE_SET <<
|
||||
ARMCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_VOLTAGE_SET <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.sensor_index = __cpu_to_le16(sensor_index);
|
||||
pkt.type = __cpu_to_le16(attr);
|
||||
pkt.value = __cpu_to_le64(value);
|
||||
@ -514,13 +514,13 @@ int hl_set_voltage(struct hl_device *hdev,
|
||||
int hl_set_current(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long value)
|
||||
{
|
||||
struct armcp_packet pkt;
|
||||
struct cpucp_packet pkt;
|
||||
int rc;
|
||||
|
||||
memset(&pkt, 0, sizeof(pkt));
|
||||
|
||||
pkt.ctl = cpu_to_le32(ARMCP_PACKET_CURRENT_SET <<
|
||||
ARMCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_CURRENT_SET <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.sensor_index = __cpu_to_le16(sensor_index);
|
||||
pkt.type = __cpu_to_le16(attr);
|
||||
pkt.value = __cpu_to_le64(value);
|
||||
@ -549,7 +549,7 @@ int hl_hwmon_init(struct hl_device *hdev)
|
||||
hdev->hl_chip_info->ops = &hl_hwmon_ops;
|
||||
|
||||
hdev->hwmon_dev = hwmon_device_register_with_info(dev,
|
||||
prop->armcp_info.card_name, hdev,
|
||||
prop->cpucp_info.card_name, hdev,
|
||||
hdev->hl_chip_info, NULL);
|
||||
if (IS_ERR(hdev->hwmon_dev)) {
|
||||
rc = PTR_ERR(hdev->hwmon_dev);
|
||||
|
@ -11,7 +11,7 @@
|
||||
|
||||
/**
|
||||
* struct hl_eqe_work - This structure is used to schedule work of EQ
|
||||
* entry and armcp_reset event
|
||||
* entry and cpucp_reset event
|
||||
*
|
||||
* @eq_work: workqueue object to run when EQ entry is received
|
||||
* @hdev: pointer to device structure
|
||||
|
@ -505,41 +505,32 @@ static inline int add_va_block(struct hl_device *hdev,
|
||||
}
|
||||
|
||||
/*
|
||||
* get_va_block - get a virtual block with the requested size
|
||||
*
|
||||
* @hdev : pointer to the habanalabs device structure
|
||||
* @va_range : pointer to the virtual addresses range
|
||||
* @size : requested block size
|
||||
* @hint_addr : hint for request address by the user
|
||||
* @is_userptr : is host or DRAM memory
|
||||
* get_va_block() - get a virtual block for the given size and alignment.
|
||||
* @hdev: pointer to the habanalabs device structure.
|
||||
* @va_range: pointer to the virtual addresses range.
|
||||
* @size: requested block size.
|
||||
* @hint_addr: hint for requested address by the user.
|
||||
* @va_block_align: required alignment of the virtual block start address.
|
||||
*
|
||||
* This function does the following:
|
||||
* - Iterate on the virtual block list to find a suitable virtual block for the
|
||||
* requested size
|
||||
* - Reserve the requested block and update the list
|
||||
* - Return the start address of the virtual block
|
||||
* given size and alignment.
|
||||
* - Reserve the requested block and update the list.
|
||||
* - Return the start address of the virtual block.
|
||||
*/
|
||||
static u64 get_va_block(struct hl_device *hdev,
|
||||
struct hl_va_range *va_range, u64 size, u64 hint_addr,
|
||||
bool is_userptr)
|
||||
static u64 get_va_block(struct hl_device *hdev, struct hl_va_range *va_range,
|
||||
u64 size, u64 hint_addr, u32 va_block_align)
|
||||
{
|
||||
struct hl_vm_va_block *va_block, *new_va_block = NULL;
|
||||
u64 valid_start, valid_size, prev_start, prev_end, page_mask,
|
||||
u64 valid_start, valid_size, prev_start, prev_end, align_mask,
|
||||
res_valid_start = 0, res_valid_size = 0;
|
||||
u32 page_size;
|
||||
bool add_prev = false;
|
||||
|
||||
if (is_userptr)
|
||||
/*
|
||||
* We cannot know if the user allocated memory with huge pages
|
||||
* or not, hence we continue with the biggest possible
|
||||
* granularity.
|
||||
*/
|
||||
page_size = hdev->asic_prop.pmmu_huge.page_size;
|
||||
else
|
||||
page_size = hdev->asic_prop.dmmu.page_size;
|
||||
align_mask = ~((u64)va_block_align - 1);
|
||||
|
||||
page_mask = ~((u64)page_size - 1);
|
||||
/* check if hint_addr is aligned */
|
||||
if (hint_addr & (va_block_align - 1))
|
||||
hint_addr = 0;
|
||||
|
||||
mutex_lock(&va_range->lock);
|
||||
|
||||
@ -549,9 +540,9 @@ static u64 get_va_block(struct hl_device *hdev,
|
||||
/* calc the first possible aligned addr */
|
||||
valid_start = va_block->start;
|
||||
|
||||
if (valid_start & (page_size - 1)) {
|
||||
valid_start &= page_mask;
|
||||
valid_start += page_size;
|
||||
if (valid_start & (va_block_align - 1)) {
|
||||
valid_start &= align_mask;
|
||||
valid_start += va_block_align;
|
||||
if (valid_start > va_block->end)
|
||||
continue;
|
||||
}
|
||||
@ -863,7 +854,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
|
||||
struct hl_va_range *va_range;
|
||||
enum vm_type_t *vm_type;
|
||||
u64 ret_vaddr, hint_addr;
|
||||
u32 handle = 0;
|
||||
u32 handle = 0, va_block_align;
|
||||
int rc;
|
||||
bool is_userptr = args->flags & HL_MEM_USERPTR;
|
||||
|
||||
@ -873,6 +864,8 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
|
||||
if (is_userptr) {
|
||||
u64 addr = args->map_host.host_virt_addr,
|
||||
size = args->map_host.mem_size;
|
||||
u32 page_size = hdev->asic_prop.pmmu.page_size,
|
||||
huge_page_size = hdev->asic_prop.pmmu_huge.page_size;
|
||||
|
||||
rc = dma_map_host_va(hdev, addr, size, &userptr);
|
||||
if (rc) {
|
||||
@ -892,6 +885,27 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
|
||||
vm_type = (enum vm_type_t *) userptr;
|
||||
hint_addr = args->map_host.hint_addr;
|
||||
handle = phys_pg_pack->handle;
|
||||
|
||||
/* get required alignment */
|
||||
if (phys_pg_pack->page_size == page_size) {
|
||||
va_range = ctx->host_va_range;
|
||||
|
||||
/*
|
||||
* huge page alignment may be needed in case of regular
|
||||
* page mapping, depending on the host VA alignment
|
||||
*/
|
||||
if (addr & (huge_page_size - 1))
|
||||
va_block_align = page_size;
|
||||
else
|
||||
va_block_align = huge_page_size;
|
||||
} else {
|
||||
/*
|
||||
* huge page alignment is needed in case of huge page
|
||||
* mapping
|
||||
*/
|
||||
va_range = ctx->host_huge_va_range;
|
||||
va_block_align = huge_page_size;
|
||||
}
|
||||
} else {
|
||||
handle = lower_32_bits(args->map_device.handle);
|
||||
|
||||
@ -912,6 +926,10 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
|
||||
vm_type = (enum vm_type_t *) phys_pg_pack;
|
||||
|
||||
hint_addr = args->map_device.hint_addr;
|
||||
|
||||
/* DRAM VA alignment is the same as the DRAM page size */
|
||||
va_range = ctx->dram_va_range;
|
||||
va_block_align = hdev->asic_prop.dmmu.page_size;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -933,16 +951,8 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
|
||||
goto hnode_err;
|
||||
}
|
||||
|
||||
if (is_userptr)
|
||||
if (phys_pg_pack->page_size == hdev->asic_prop.pmmu.page_size)
|
||||
va_range = ctx->host_va_range;
|
||||
else
|
||||
va_range = ctx->host_huge_va_range;
|
||||
else
|
||||
va_range = ctx->dram_va_range;
|
||||
|
||||
ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size,
|
||||
hint_addr, is_userptr);
|
||||
hint_addr, va_block_align);
|
||||
if (!ret_vaddr) {
|
||||
dev_err(hdev->dev, "no available va block for handle %u\n",
|
||||
handle);
|
||||
|
@ -1,258 +1,13 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
/*
|
||||
* Copyright 2016-2019 HabanaLabs, Ltd.
|
||||
* Copyright 2016-2020 HabanaLabs, Ltd.
|
||||
* All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include "habanalabs.h"
|
||||
#include "../include/hw_ip/mmu/mmu_general.h"
|
||||
|
||||
#include <linux/genalloc.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr);
|
||||
|
||||
static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr)
|
||||
{
|
||||
struct pgt_info *pgt_info = NULL;
|
||||
|
||||
hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node,
|
||||
(unsigned long) hop_addr)
|
||||
if (hop_addr == pgt_info->shadow_addr)
|
||||
break;
|
||||
|
||||
return pgt_info;
|
||||
}
|
||||
|
||||
static void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
|
||||
gen_pool_free(hdev->mmu_pgt_pool, pgt_info->phys_addr,
|
||||
hdev->asic_prop.mmu_hop_table_size);
|
||||
hash_del(&pgt_info->node);
|
||||
kfree((u64 *) (uintptr_t) pgt_info->shadow_addr);
|
||||
kfree(pgt_info);
|
||||
}
|
||||
|
||||
static void free_hop(struct hl_ctx *ctx, u64 hop_addr)
|
||||
{
|
||||
struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
|
||||
|
||||
_free_hop(ctx, pgt_info);
|
||||
}
|
||||
|
||||
static u64 alloc_hop(struct hl_ctx *ctx)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
struct pgt_info *pgt_info;
|
||||
u64 phys_addr, shadow_addr;
|
||||
|
||||
pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL);
|
||||
if (!pgt_info)
|
||||
return ULLONG_MAX;
|
||||
|
||||
phys_addr = (u64) gen_pool_alloc(hdev->mmu_pgt_pool,
|
||||
prop->mmu_hop_table_size);
|
||||
if (!phys_addr) {
|
||||
dev_err(hdev->dev, "failed to allocate page\n");
|
||||
goto pool_add_err;
|
||||
}
|
||||
|
||||
shadow_addr = (u64) (uintptr_t) kzalloc(prop->mmu_hop_table_size,
|
||||
GFP_KERNEL);
|
||||
if (!shadow_addr)
|
||||
goto shadow_err;
|
||||
|
||||
pgt_info->phys_addr = phys_addr;
|
||||
pgt_info->shadow_addr = shadow_addr;
|
||||
pgt_info->ctx = ctx;
|
||||
pgt_info->num_of_ptes = 0;
|
||||
hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr);
|
||||
|
||||
return shadow_addr;
|
||||
|
||||
shadow_err:
|
||||
gen_pool_free(hdev->mmu_pgt_pool, phys_addr, prop->mmu_hop_table_size);
|
||||
pool_add_err:
|
||||
kfree(pgt_info);
|
||||
|
||||
return ULLONG_MAX;
|
||||
}
|
||||
|
||||
static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx)
|
||||
{
|
||||
return ctx->hdev->asic_prop.mmu_pgt_addr +
|
||||
(ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
|
||||
}
|
||||
|
||||
static inline u64 get_hop0_addr(struct hl_ctx *ctx)
|
||||
{
|
||||
return (u64) (uintptr_t) ctx->hdev->mmu_shadow_hop0 +
|
||||
(ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
|
||||
}
|
||||
|
||||
static inline void flush(struct hl_ctx *ctx)
|
||||
{
|
||||
/* flush all writes from all cores to reach PCI */
|
||||
mb();
|
||||
ctx->hdev->asic_funcs->read_pte(ctx->hdev, get_phys_hop0_addr(ctx));
|
||||
}
|
||||
|
||||
/* transform the value to physical address when writing to H/W */
|
||||
static inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val)
|
||||
{
|
||||
/*
|
||||
* The value to write is actually the address of the next shadow hop +
|
||||
* flags at the 12 LSBs.
|
||||
* Hence in order to get the value to write to the physical PTE, we
|
||||
* clear the 12 LSBs and translate the shadow hop to its associated
|
||||
* physical hop, and add back the original 12 LSBs.
|
||||
*/
|
||||
u64 phys_val = get_phys_addr(ctx, val & HOP_PHYS_ADDR_MASK) |
|
||||
(val & FLAGS_MASK);
|
||||
|
||||
ctx->hdev->asic_funcs->write_pte(ctx->hdev,
|
||||
get_phys_addr(ctx, shadow_pte_addr),
|
||||
phys_val);
|
||||
|
||||
*(u64 *) (uintptr_t) shadow_pte_addr = val;
|
||||
}
|
||||
|
||||
/* do not transform the value to physical address when writing to H/W */
|
||||
static inline void write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr,
|
||||
u64 val)
|
||||
{
|
||||
ctx->hdev->asic_funcs->write_pte(ctx->hdev,
|
||||
get_phys_addr(ctx, shadow_pte_addr),
|
||||
val);
|
||||
*(u64 *) (uintptr_t) shadow_pte_addr = val;
|
||||
}
|
||||
|
||||
/* clear the last and present bits */
|
||||
static inline void clear_pte(struct hl_ctx *ctx, u64 pte_addr)
|
||||
{
|
||||
/* no need to transform the value to physical address */
|
||||
write_final_pte(ctx, pte_addr, 0);
|
||||
}
|
||||
|
||||
static inline void get_pte(struct hl_ctx *ctx, u64 hop_addr)
|
||||
{
|
||||
get_pgt_info(ctx, hop_addr)->num_of_ptes++;
|
||||
}
|
||||
|
||||
/*
|
||||
* put_pte - decrement the num of ptes and free the hop if possible
|
||||
*
|
||||
* @ctx: pointer to the context structure
|
||||
* @hop_addr: addr of the hop
|
||||
*
|
||||
* This function returns the number of ptes left on this hop. If the number is
|
||||
* 0, it means the pte was freed.
|
||||
*/
|
||||
static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr)
|
||||
{
|
||||
struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
|
||||
int num_of_ptes_left;
|
||||
|
||||
pgt_info->num_of_ptes--;
|
||||
|
||||
/*
|
||||
* Need to save the number of ptes left because free_hop might free
|
||||
* the pgt_info
|
||||
*/
|
||||
num_of_ptes_left = pgt_info->num_of_ptes;
|
||||
if (!num_of_ptes_left)
|
||||
_free_hop(ctx, pgt_info);
|
||||
|
||||
return num_of_ptes_left;
|
||||
}
|
||||
|
||||
static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
|
||||
u64 virt_addr, u64 mask, u64 shift)
|
||||
{
|
||||
return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
|
||||
((virt_addr & mask) >> shift);
|
||||
}
|
||||
|
||||
static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx,
|
||||
struct hl_mmu_properties *mmu_prop,
|
||||
u64 hop_addr, u64 vaddr)
|
||||
{
|
||||
return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop0_mask,
|
||||
mmu_prop->hop0_shift);
|
||||
}
|
||||
|
||||
static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx,
|
||||
struct hl_mmu_properties *mmu_prop,
|
||||
u64 hop_addr, u64 vaddr)
|
||||
{
|
||||
return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop1_mask,
|
||||
mmu_prop->hop1_shift);
|
||||
}
|
||||
|
||||
static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx,
|
||||
struct hl_mmu_properties *mmu_prop,
|
||||
u64 hop_addr, u64 vaddr)
|
||||
{
|
||||
return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop2_mask,
|
||||
mmu_prop->hop2_shift);
|
||||
}
|
||||
|
||||
static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx,
|
||||
struct hl_mmu_properties *mmu_prop,
|
||||
u64 hop_addr, u64 vaddr)
|
||||
{
|
||||
return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop3_mask,
|
||||
mmu_prop->hop3_shift);
|
||||
}
|
||||
|
||||
static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
|
||||
struct hl_mmu_properties *mmu_prop,
|
||||
u64 hop_addr, u64 vaddr)
|
||||
{
|
||||
return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop4_mask,
|
||||
mmu_prop->hop4_shift);
|
||||
}
|
||||
|
||||
static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
|
||||
{
|
||||
if (curr_pte & PAGE_PRESENT_MASK)
|
||||
return curr_pte & HOP_PHYS_ADDR_MASK;
|
||||
else
|
||||
return ULLONG_MAX;
|
||||
}
|
||||
|
||||
static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte,
|
||||
bool *is_new_hop)
|
||||
{
|
||||
u64 hop_addr = get_next_hop_addr(ctx, curr_pte);
|
||||
|
||||
if (hop_addr == ULLONG_MAX) {
|
||||
hop_addr = alloc_hop(ctx);
|
||||
*is_new_hop = (hop_addr != ULLONG_MAX);
|
||||
}
|
||||
|
||||
return hop_addr;
|
||||
}
|
||||
|
||||
/* translates shadow address inside hop to a physical address */
|
||||
static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
|
||||
{
|
||||
u64 page_mask = (ctx->hdev->asic_prop.mmu_hop_table_size - 1);
|
||||
u64 shadow_hop_addr = shadow_addr & ~page_mask;
|
||||
u64 pte_offset = shadow_addr & page_mask;
|
||||
u64 phys_hop_addr;
|
||||
|
||||
if (shadow_hop_addr != get_hop0_addr(ctx))
|
||||
phys_hop_addr = get_pgt_info(ctx, shadow_hop_addr)->phys_addr;
|
||||
else
|
||||
phys_hop_addr = get_phys_hop0_addr(ctx);
|
||||
|
||||
return phys_hop_addr + pte_offset;
|
||||
}
|
||||
#include "habanalabs.h"
|
||||
|
||||
static bool is_dram_va(struct hl_device *hdev, u64 virt_addr)
|
||||
{
|
||||
@ -263,155 +18,6 @@ static bool is_dram_va(struct hl_device *hdev, u64 virt_addr)
|
||||
prop->dmmu.end_addr);
|
||||
}
|
||||
|
||||
static int dram_default_mapping_init(struct hl_ctx *ctx)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
|
||||
hop2_pte_addr, hop3_pte_addr, pte_val;
|
||||
int rc, i, j, hop3_allocated = 0;
|
||||
|
||||
if ((!hdev->dram_supports_virtual_memory) ||
|
||||
(!hdev->dram_default_page_mapping) ||
|
||||
(ctx->asid == HL_KERNEL_ASID_ID))
|
||||
return 0;
|
||||
|
||||
num_of_hop3 = prop->dram_size_for_default_page_mapping;
|
||||
do_div(num_of_hop3, prop->dram_page_size);
|
||||
do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
|
||||
|
||||
/* add hop1 and hop2 */
|
||||
total_hops = num_of_hop3 + 2;
|
||||
|
||||
ctx->dram_default_hops = kzalloc(HL_PTE_SIZE * total_hops, GFP_KERNEL);
|
||||
if (!ctx->dram_default_hops)
|
||||
return -ENOMEM;
|
||||
|
||||
hop0_addr = get_hop0_addr(ctx);
|
||||
|
||||
hop1_addr = alloc_hop(ctx);
|
||||
if (hop1_addr == ULLONG_MAX) {
|
||||
dev_err(hdev->dev, "failed to alloc hop 1\n");
|
||||
rc = -ENOMEM;
|
||||
goto hop1_err;
|
||||
}
|
||||
|
||||
ctx->dram_default_hops[total_hops - 1] = hop1_addr;
|
||||
|
||||
hop2_addr = alloc_hop(ctx);
|
||||
if (hop2_addr == ULLONG_MAX) {
|
||||
dev_err(hdev->dev, "failed to alloc hop 2\n");
|
||||
rc = -ENOMEM;
|
||||
goto hop2_err;
|
||||
}
|
||||
|
||||
ctx->dram_default_hops[total_hops - 2] = hop2_addr;
|
||||
|
||||
for (i = 0 ; i < num_of_hop3 ; i++) {
|
||||
ctx->dram_default_hops[i] = alloc_hop(ctx);
|
||||
if (ctx->dram_default_hops[i] == ULLONG_MAX) {
|
||||
dev_err(hdev->dev, "failed to alloc hop 3, i: %d\n", i);
|
||||
rc = -ENOMEM;
|
||||
goto hop3_err;
|
||||
}
|
||||
hop3_allocated++;
|
||||
}
|
||||
|
||||
/* need only pte 0 in hops 0 and 1 */
|
||||
pte_val = (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
|
||||
write_pte(ctx, hop0_addr, pte_val);
|
||||
|
||||
pte_val = (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
|
||||
write_pte(ctx, hop1_addr, pte_val);
|
||||
get_pte(ctx, hop1_addr);
|
||||
|
||||
hop2_pte_addr = hop2_addr;
|
||||
for (i = 0 ; i < num_of_hop3 ; i++) {
|
||||
pte_val = (ctx->dram_default_hops[i] & HOP_PHYS_ADDR_MASK) |
|
||||
PAGE_PRESENT_MASK;
|
||||
write_pte(ctx, hop2_pte_addr, pte_val);
|
||||
get_pte(ctx, hop2_addr);
|
||||
hop2_pte_addr += HL_PTE_SIZE;
|
||||
}
|
||||
|
||||
pte_val = (prop->mmu_dram_default_page_addr & HOP_PHYS_ADDR_MASK) |
|
||||
LAST_MASK | PAGE_PRESENT_MASK;
|
||||
|
||||
for (i = 0 ; i < num_of_hop3 ; i++) {
|
||||
hop3_pte_addr = ctx->dram_default_hops[i];
|
||||
for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
|
||||
write_final_pte(ctx, hop3_pte_addr, pte_val);
|
||||
get_pte(ctx, ctx->dram_default_hops[i]);
|
||||
hop3_pte_addr += HL_PTE_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
flush(ctx);
|
||||
|
||||
return 0;
|
||||
|
||||
hop3_err:
|
||||
for (i = 0 ; i < hop3_allocated ; i++)
|
||||
free_hop(ctx, ctx->dram_default_hops[i]);
|
||||
|
||||
free_hop(ctx, hop2_addr);
|
||||
hop2_err:
|
||||
free_hop(ctx, hop1_addr);
|
||||
hop1_err:
|
||||
kfree(ctx->dram_default_hops);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void dram_default_mapping_fini(struct hl_ctx *ctx)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
|
||||
hop2_pte_addr, hop3_pte_addr;
|
||||
int i, j;
|
||||
|
||||
if ((!hdev->dram_supports_virtual_memory) ||
|
||||
(!hdev->dram_default_page_mapping) ||
|
||||
(ctx->asid == HL_KERNEL_ASID_ID))
|
||||
return;
|
||||
|
||||
num_of_hop3 = prop->dram_size_for_default_page_mapping;
|
||||
do_div(num_of_hop3, prop->dram_page_size);
|
||||
do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
|
||||
|
||||
hop0_addr = get_hop0_addr(ctx);
|
||||
/* add hop1 and hop2 */
|
||||
total_hops = num_of_hop3 + 2;
|
||||
hop1_addr = ctx->dram_default_hops[total_hops - 1];
|
||||
hop2_addr = ctx->dram_default_hops[total_hops - 2];
|
||||
|
||||
for (i = 0 ; i < num_of_hop3 ; i++) {
|
||||
hop3_pte_addr = ctx->dram_default_hops[i];
|
||||
for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
|
||||
clear_pte(ctx, hop3_pte_addr);
|
||||
put_pte(ctx, ctx->dram_default_hops[i]);
|
||||
hop3_pte_addr += HL_PTE_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
hop2_pte_addr = hop2_addr;
|
||||
hop2_pte_addr = hop2_addr;
|
||||
for (i = 0 ; i < num_of_hop3 ; i++) {
|
||||
clear_pte(ctx, hop2_pte_addr);
|
||||
put_pte(ctx, hop2_addr);
|
||||
hop2_pte_addr += HL_PTE_SIZE;
|
||||
}
|
||||
|
||||
clear_pte(ctx, hop1_addr);
|
||||
put_pte(ctx, hop1_addr);
|
||||
clear_pte(ctx, hop0_addr);
|
||||
|
||||
kfree(ctx->dram_default_hops);
|
||||
|
||||
flush(ctx);
|
||||
}
|
||||
|
||||
/**
|
||||
* hl_mmu_init() - initialize the MMU module.
|
||||
* @hdev: habanalabs device structure.
|
||||
@ -424,45 +30,10 @@ static void dram_default_mapping_fini(struct hl_ctx *ctx)
|
||||
*/
|
||||
int hl_mmu_init(struct hl_device *hdev)
|
||||
{
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
int rc;
|
||||
|
||||
if (!hdev->mmu_enable)
|
||||
return 0;
|
||||
|
||||
hdev->mmu_pgt_pool =
|
||||
gen_pool_create(__ffs(prop->mmu_hop_table_size), -1);
|
||||
|
||||
if (!hdev->mmu_pgt_pool) {
|
||||
dev_err(hdev->dev, "Failed to create page gen pool\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
rc = gen_pool_add(hdev->mmu_pgt_pool, prop->mmu_pgt_addr +
|
||||
prop->mmu_hop0_tables_total_size,
|
||||
prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size,
|
||||
-1);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to add memory to page gen pool\n");
|
||||
goto err_pool_add;
|
||||
}
|
||||
|
||||
hdev->mmu_shadow_hop0 = kvmalloc_array(prop->max_asid,
|
||||
prop->mmu_hop_table_size,
|
||||
GFP_KERNEL | __GFP_ZERO);
|
||||
if (ZERO_OR_NULL_PTR(hdev->mmu_shadow_hop0)) {
|
||||
rc = -ENOMEM;
|
||||
goto err_pool_add;
|
||||
}
|
||||
|
||||
/* MMU H/W init will be done in device hw_init() */
|
||||
if (hdev->mmu_enable)
|
||||
return hdev->mmu_func.init(hdev);
|
||||
|
||||
return 0;
|
||||
|
||||
err_pool_add:
|
||||
gen_pool_destroy(hdev->mmu_pgt_pool);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -477,13 +48,8 @@ int hl_mmu_init(struct hl_device *hdev)
|
||||
*/
|
||||
void hl_mmu_fini(struct hl_device *hdev)
|
||||
{
|
||||
if (!hdev->mmu_enable)
|
||||
return;
|
||||
|
||||
/* MMU H/W fini was already done in device hw_fini() */
|
||||
|
||||
kvfree(hdev->mmu_shadow_hop0);
|
||||
gen_pool_destroy(hdev->mmu_pgt_pool);
|
||||
if (hdev->mmu_enable)
|
||||
hdev->mmu_func.fini(hdev);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -498,13 +64,10 @@ int hl_mmu_ctx_init(struct hl_ctx *ctx)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
|
||||
if (!hdev->mmu_enable)
|
||||
return 0;
|
||||
if (hdev->mmu_enable)
|
||||
return hdev->mmu_func.ctx_init(ctx);
|
||||
|
||||
mutex_init(&ctx->mmu_lock);
|
||||
hash_init(ctx->mmu_shadow_hash);
|
||||
|
||||
return dram_default_mapping_init(ctx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -520,160 +83,9 @@ int hl_mmu_ctx_init(struct hl_ctx *ctx)
|
||||
void hl_mmu_ctx_fini(struct hl_ctx *ctx)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
struct pgt_info *pgt_info;
|
||||
struct hlist_node *tmp;
|
||||
int i;
|
||||
|
||||
if (!hdev->mmu_enable)
|
||||
return;
|
||||
|
||||
dram_default_mapping_fini(ctx);
|
||||
|
||||
if (!hash_empty(ctx->mmu_shadow_hash))
|
||||
dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n",
|
||||
ctx->asid);
|
||||
|
||||
hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) {
|
||||
dev_err_ratelimited(hdev->dev,
|
||||
"pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
|
||||
pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
|
||||
_free_hop(ctx, pgt_info);
|
||||
}
|
||||
|
||||
mutex_destroy(&ctx->mmu_lock);
|
||||
}
|
||||
|
||||
static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
struct hl_mmu_properties *mmu_prop;
|
||||
u64 hop0_addr = 0, hop0_pte_addr = 0,
|
||||
hop1_addr = 0, hop1_pte_addr = 0,
|
||||
hop2_addr = 0, hop2_pte_addr = 0,
|
||||
hop3_addr = 0, hop3_pte_addr = 0,
|
||||
hop4_addr = 0, hop4_pte_addr = 0,
|
||||
curr_pte;
|
||||
bool is_huge, clear_hop3 = true;
|
||||
|
||||
/* shifts and masks are the same in PMMU and HPMMU, use one of them */
|
||||
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
|
||||
|
||||
hop0_addr = get_hop0_addr(ctx);
|
||||
hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
|
||||
|
||||
curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
|
||||
|
||||
hop1_addr = get_next_hop_addr(ctx, curr_pte);
|
||||
|
||||
if (hop1_addr == ULLONG_MAX)
|
||||
goto not_mapped;
|
||||
|
||||
hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
|
||||
|
||||
curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
|
||||
|
||||
hop2_addr = get_next_hop_addr(ctx, curr_pte);
|
||||
|
||||
if (hop2_addr == ULLONG_MAX)
|
||||
goto not_mapped;
|
||||
|
||||
hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
|
||||
|
||||
curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
|
||||
|
||||
hop3_addr = get_next_hop_addr(ctx, curr_pte);
|
||||
|
||||
if (hop3_addr == ULLONG_MAX)
|
||||
goto not_mapped;
|
||||
|
||||
hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
|
||||
|
||||
curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
|
||||
|
||||
is_huge = curr_pte & LAST_MASK;
|
||||
|
||||
if (is_dram_addr && !is_huge) {
|
||||
dev_err(hdev->dev,
|
||||
"DRAM unmapping should use huge pages only\n");
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
if (!is_huge) {
|
||||
hop4_addr = get_next_hop_addr(ctx, curr_pte);
|
||||
|
||||
if (hop4_addr == ULLONG_MAX)
|
||||
goto not_mapped;
|
||||
|
||||
hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
|
||||
virt_addr);
|
||||
|
||||
curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
|
||||
|
||||
clear_hop3 = false;
|
||||
}
|
||||
|
||||
if (hdev->dram_default_page_mapping && is_dram_addr) {
|
||||
u64 default_pte = (prop->mmu_dram_default_page_addr &
|
||||
HOP_PHYS_ADDR_MASK) | LAST_MASK |
|
||||
PAGE_PRESENT_MASK;
|
||||
if (curr_pte == default_pte) {
|
||||
dev_err(hdev->dev,
|
||||
"DRAM: hop3 PTE points to zero page, can't unmap, va: 0x%llx\n",
|
||||
virt_addr);
|
||||
goto not_mapped;
|
||||
}
|
||||
|
||||
if (!(curr_pte & PAGE_PRESENT_MASK)) {
|
||||
dev_err(hdev->dev,
|
||||
"DRAM: hop3 PTE is cleared! can't unmap, va: 0x%llx\n",
|
||||
virt_addr);
|
||||
goto not_mapped;
|
||||
}
|
||||
|
||||
write_final_pte(ctx, hop3_pte_addr, default_pte);
|
||||
put_pte(ctx, hop3_addr);
|
||||
} else {
|
||||
if (!(curr_pte & PAGE_PRESENT_MASK))
|
||||
goto not_mapped;
|
||||
|
||||
if (hop4_addr)
|
||||
clear_pte(ctx, hop4_pte_addr);
|
||||
else
|
||||
clear_pte(ctx, hop3_pte_addr);
|
||||
|
||||
if (hop4_addr && !put_pte(ctx, hop4_addr))
|
||||
clear_hop3 = true;
|
||||
|
||||
if (!clear_hop3)
|
||||
goto mapped;
|
||||
|
||||
clear_pte(ctx, hop3_pte_addr);
|
||||
|
||||
if (put_pte(ctx, hop3_addr))
|
||||
goto mapped;
|
||||
|
||||
clear_pte(ctx, hop2_pte_addr);
|
||||
|
||||
if (put_pte(ctx, hop2_addr))
|
||||
goto mapped;
|
||||
|
||||
clear_pte(ctx, hop1_pte_addr);
|
||||
|
||||
if (put_pte(ctx, hop1_addr))
|
||||
goto mapped;
|
||||
|
||||
clear_pte(ctx, hop0_pte_addr);
|
||||
}
|
||||
|
||||
mapped:
|
||||
return 0;
|
||||
|
||||
not_mapped:
|
||||
dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
|
||||
virt_addr);
|
||||
|
||||
return -EINVAL;
|
||||
if (hdev->mmu_enable)
|
||||
hdev->mmu_func.ctx_fini(ctx);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -738,7 +150,7 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
|
||||
real_virt_addr = virt_addr;
|
||||
|
||||
for (i = 0 ; i < npages ; i++) {
|
||||
rc = _hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr);
|
||||
rc = hdev->mmu_func.unmap(ctx, real_virt_addr, is_dram_addr);
|
||||
if (rc)
|
||||
break;
|
||||
|
||||
@ -746,172 +158,7 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
|
||||
}
|
||||
|
||||
if (flush_pte)
|
||||
flush(ctx);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
|
||||
u32 page_size, bool is_dram_addr)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
struct hl_mmu_properties *mmu_prop;
|
||||
u64 hop0_addr = 0, hop0_pte_addr = 0,
|
||||
hop1_addr = 0, hop1_pte_addr = 0,
|
||||
hop2_addr = 0, hop2_pte_addr = 0,
|
||||
hop3_addr = 0, hop3_pte_addr = 0,
|
||||
hop4_addr = 0, hop4_pte_addr = 0,
|
||||
curr_pte = 0;
|
||||
bool hop1_new = false, hop2_new = false, hop3_new = false,
|
||||
hop4_new = false, is_huge;
|
||||
int rc = -ENOMEM;
|
||||
|
||||
/*
|
||||
* This mapping function can map a page or a huge page. For huge page
|
||||
* there are only 3 hops rather than 4. Currently the DRAM allocation
|
||||
* uses huge pages only but user memory could have been allocated with
|
||||
* one of the two page sizes. Since this is a common code for all the
|
||||
* three cases, we need this hugs page check.
|
||||
*/
|
||||
if (is_dram_addr) {
|
||||
mmu_prop = &prop->dmmu;
|
||||
is_huge = true;
|
||||
} else if (page_size == prop->pmmu_huge.page_size) {
|
||||
mmu_prop = &prop->pmmu_huge;
|
||||
is_huge = true;
|
||||
} else {
|
||||
mmu_prop = &prop->pmmu;
|
||||
is_huge = false;
|
||||
}
|
||||
|
||||
hop0_addr = get_hop0_addr(ctx);
|
||||
hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
|
||||
curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
|
||||
|
||||
hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new);
|
||||
if (hop1_addr == ULLONG_MAX)
|
||||
goto err;
|
||||
|
||||
hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
|
||||
curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
|
||||
|
||||
hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new);
|
||||
if (hop2_addr == ULLONG_MAX)
|
||||
goto err;
|
||||
|
||||
hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
|
||||
curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
|
||||
|
||||
hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new);
|
||||
if (hop3_addr == ULLONG_MAX)
|
||||
goto err;
|
||||
|
||||
hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
|
||||
curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
|
||||
|
||||
if (!is_huge) {
|
||||
hop4_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop4_new);
|
||||
if (hop4_addr == ULLONG_MAX)
|
||||
goto err;
|
||||
|
||||
hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
|
||||
virt_addr);
|
||||
curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
|
||||
}
|
||||
|
||||
if (hdev->dram_default_page_mapping && is_dram_addr) {
|
||||
u64 default_pte = (prop->mmu_dram_default_page_addr &
|
||||
HOP_PHYS_ADDR_MASK) | LAST_MASK |
|
||||
PAGE_PRESENT_MASK;
|
||||
|
||||
if (curr_pte != default_pte) {
|
||||
dev_err(hdev->dev,
|
||||
"DRAM: mapping already exists for virt_addr 0x%llx\n",
|
||||
virt_addr);
|
||||
rc = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (hop1_new || hop2_new || hop3_new || hop4_new) {
|
||||
dev_err(hdev->dev,
|
||||
"DRAM mapping should not allocate more hops\n");
|
||||
rc = -EFAULT;
|
||||
goto err;
|
||||
}
|
||||
} else if (curr_pte & PAGE_PRESENT_MASK) {
|
||||
dev_err(hdev->dev,
|
||||
"mapping already exists for virt_addr 0x%llx\n",
|
||||
virt_addr);
|
||||
|
||||
dev_dbg(hdev->dev, "hop0 pte: 0x%llx (0x%llx)\n",
|
||||
*(u64 *) (uintptr_t) hop0_pte_addr, hop0_pte_addr);
|
||||
dev_dbg(hdev->dev, "hop1 pte: 0x%llx (0x%llx)\n",
|
||||
*(u64 *) (uintptr_t) hop1_pte_addr, hop1_pte_addr);
|
||||
dev_dbg(hdev->dev, "hop2 pte: 0x%llx (0x%llx)\n",
|
||||
*(u64 *) (uintptr_t) hop2_pte_addr, hop2_pte_addr);
|
||||
dev_dbg(hdev->dev, "hop3 pte: 0x%llx (0x%llx)\n",
|
||||
*(u64 *) (uintptr_t) hop3_pte_addr, hop3_pte_addr);
|
||||
|
||||
if (!is_huge)
|
||||
dev_dbg(hdev->dev, "hop4 pte: 0x%llx (0x%llx)\n",
|
||||
*(u64 *) (uintptr_t) hop4_pte_addr,
|
||||
hop4_pte_addr);
|
||||
|
||||
rc = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | LAST_MASK
|
||||
| PAGE_PRESENT_MASK;
|
||||
|
||||
if (is_huge)
|
||||
write_final_pte(ctx, hop3_pte_addr, curr_pte);
|
||||
else
|
||||
write_final_pte(ctx, hop4_pte_addr, curr_pte);
|
||||
|
||||
if (hop1_new) {
|
||||
curr_pte =
|
||||
(hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
|
||||
write_pte(ctx, hop0_pte_addr, curr_pte);
|
||||
}
|
||||
if (hop2_new) {
|
||||
curr_pte =
|
||||
(hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
|
||||
write_pte(ctx, hop1_pte_addr, curr_pte);
|
||||
get_pte(ctx, hop1_addr);
|
||||
}
|
||||
if (hop3_new) {
|
||||
curr_pte =
|
||||
(hop3_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
|
||||
write_pte(ctx, hop2_pte_addr, curr_pte);
|
||||
get_pte(ctx, hop2_addr);
|
||||
}
|
||||
|
||||
if (!is_huge) {
|
||||
if (hop4_new) {
|
||||
curr_pte = (hop4_addr & HOP_PHYS_ADDR_MASK) |
|
||||
PAGE_PRESENT_MASK;
|
||||
write_pte(ctx, hop3_pte_addr, curr_pte);
|
||||
get_pte(ctx, hop3_addr);
|
||||
}
|
||||
|
||||
get_pte(ctx, hop4_addr);
|
||||
} else {
|
||||
get_pte(ctx, hop3_addr);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
if (hop4_new)
|
||||
free_hop(ctx, hop4_addr);
|
||||
if (hop3_new)
|
||||
free_hop(ctx, hop3_addr);
|
||||
if (hop2_new)
|
||||
free_hop(ctx, hop2_addr);
|
||||
if (hop1_new)
|
||||
free_hop(ctx, hop1_addr);
|
||||
hdev->mmu_func.flush(ctx);
|
||||
|
||||
return rc;
|
||||
}
|
||||
@ -984,7 +231,7 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
|
||||
real_phys_addr = phys_addr;
|
||||
|
||||
for (i = 0 ; i < npages ; i++) {
|
||||
rc = _hl_mmu_map(ctx, real_virt_addr, real_phys_addr,
|
||||
rc = hdev->mmu_func.map(ctx, real_virt_addr, real_phys_addr,
|
||||
real_page_size, is_dram_addr);
|
||||
if (rc)
|
||||
goto err;
|
||||
@ -995,21 +242,21 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
|
||||
}
|
||||
|
||||
if (flush_pte)
|
||||
flush(ctx);
|
||||
hdev->mmu_func.flush(ctx);
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
real_virt_addr = virt_addr;
|
||||
for (i = 0 ; i < mapped_cnt ; i++) {
|
||||
if (_hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr))
|
||||
if (hdev->mmu_func.unmap(ctx, real_virt_addr, is_dram_addr))
|
||||
dev_warn_ratelimited(hdev->dev,
|
||||
"failed to unmap va: 0x%llx\n", real_virt_addr);
|
||||
|
||||
real_virt_addr += real_page_size;
|
||||
}
|
||||
|
||||
flush(ctx);
|
||||
hdev->mmu_func.flush(ctx);
|
||||
|
||||
return rc;
|
||||
}
|
||||
@ -1022,7 +269,10 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
|
||||
*/
|
||||
void hl_mmu_swap_out(struct hl_ctx *ctx)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
|
||||
if (hdev->mmu_enable)
|
||||
hdev->mmu_func.swap_out(ctx);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1033,5 +283,27 @@ void hl_mmu_swap_out(struct hl_ctx *ctx)
|
||||
*/
|
||||
void hl_mmu_swap_in(struct hl_ctx *ctx)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
|
||||
if (hdev->mmu_enable)
|
||||
hdev->mmu_func.swap_in(ctx);
|
||||
}
|
||||
|
||||
int hl_mmu_if_set_funcs(struct hl_device *hdev)
|
||||
{
|
||||
if (!hdev->mmu_enable)
|
||||
return 0;
|
||||
|
||||
switch (hdev->asic_type) {
|
||||
case ASIC_GOYA:
|
||||
case ASIC_GAUDI:
|
||||
hl_mmu_v1_set_funcs(hdev);
|
||||
break;
|
||||
default:
|
||||
dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
|
||||
hdev->asic_type);
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
863
drivers/misc/habanalabs/common/mmu_v1.c
Normal file
863
drivers/misc/habanalabs/common/mmu_v1.c
Normal file
@ -0,0 +1,863 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
/*
|
||||
* Copyright 2016-2019 HabanaLabs, Ltd.
|
||||
* All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include "habanalabs.h"
|
||||
#include "../include/hw_ip/mmu/mmu_general.h"
|
||||
|
||||
#include <linux/genalloc.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr);
|
||||
|
||||
static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr)
|
||||
{
|
||||
struct pgt_info *pgt_info = NULL;
|
||||
|
||||
hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node,
|
||||
(unsigned long) hop_addr)
|
||||
if (hop_addr == pgt_info->shadow_addr)
|
||||
break;
|
||||
|
||||
return pgt_info;
|
||||
}
|
||||
|
||||
static void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
|
||||
gen_pool_free(hdev->mmu_priv.mmu_pgt_pool, pgt_info->phys_addr,
|
||||
hdev->asic_prop.mmu_hop_table_size);
|
||||
hash_del(&pgt_info->node);
|
||||
kfree((u64 *) (uintptr_t) pgt_info->shadow_addr);
|
||||
kfree(pgt_info);
|
||||
}
|
||||
|
||||
static void free_hop(struct hl_ctx *ctx, u64 hop_addr)
|
||||
{
|
||||
struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
|
||||
|
||||
_free_hop(ctx, pgt_info);
|
||||
}
|
||||
|
||||
static u64 alloc_hop(struct hl_ctx *ctx)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
struct pgt_info *pgt_info;
|
||||
u64 phys_addr, shadow_addr;
|
||||
|
||||
pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL);
|
||||
if (!pgt_info)
|
||||
return ULLONG_MAX;
|
||||
|
||||
phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.mmu_pgt_pool,
|
||||
prop->mmu_hop_table_size);
|
||||
if (!phys_addr) {
|
||||
dev_err(hdev->dev, "failed to allocate page\n");
|
||||
goto pool_add_err;
|
||||
}
|
||||
|
||||
shadow_addr = (u64) (uintptr_t) kzalloc(prop->mmu_hop_table_size,
|
||||
GFP_KERNEL);
|
||||
if (!shadow_addr)
|
||||
goto shadow_err;
|
||||
|
||||
pgt_info->phys_addr = phys_addr;
|
||||
pgt_info->shadow_addr = shadow_addr;
|
||||
pgt_info->ctx = ctx;
|
||||
pgt_info->num_of_ptes = 0;
|
||||
hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr);
|
||||
|
||||
return shadow_addr;
|
||||
|
||||
shadow_err:
|
||||
gen_pool_free(hdev->mmu_priv.mmu_pgt_pool, phys_addr,
|
||||
prop->mmu_hop_table_size);
|
||||
pool_add_err:
|
||||
kfree(pgt_info);
|
||||
|
||||
return ULLONG_MAX;
|
||||
}
|
||||
|
||||
static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx)
|
||||
{
|
||||
return ctx->hdev->asic_prop.mmu_pgt_addr +
|
||||
(ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
|
||||
}
|
||||
|
||||
static inline u64 get_hop0_addr(struct hl_ctx *ctx)
|
||||
{
|
||||
return (u64) (uintptr_t) ctx->hdev->mmu_priv.mmu_shadow_hop0 +
|
||||
(ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
|
||||
}
|
||||
|
||||
static void flush(struct hl_ctx *ctx)
|
||||
{
|
||||
/* flush all writes from all cores to reach PCI */
|
||||
mb();
|
||||
ctx->hdev->asic_funcs->read_pte(ctx->hdev, get_phys_hop0_addr(ctx));
|
||||
}
|
||||
|
||||
/* transform the value to physical address when writing to H/W */
|
||||
static inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val)
|
||||
{
|
||||
/*
|
||||
* The value to write is actually the address of the next shadow hop +
|
||||
* flags at the 12 LSBs.
|
||||
* Hence in order to get the value to write to the physical PTE, we
|
||||
* clear the 12 LSBs and translate the shadow hop to its associated
|
||||
* physical hop, and add back the original 12 LSBs.
|
||||
*/
|
||||
u64 phys_val = get_phys_addr(ctx, val & HOP_PHYS_ADDR_MASK) |
|
||||
(val & FLAGS_MASK);
|
||||
|
||||
ctx->hdev->asic_funcs->write_pte(ctx->hdev,
|
||||
get_phys_addr(ctx, shadow_pte_addr),
|
||||
phys_val);
|
||||
|
||||
*(u64 *) (uintptr_t) shadow_pte_addr = val;
|
||||
}
|
||||
|
||||
/* do not transform the value to physical address when writing to H/W */
|
||||
static inline void write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr,
|
||||
u64 val)
|
||||
{
|
||||
ctx->hdev->asic_funcs->write_pte(ctx->hdev,
|
||||
get_phys_addr(ctx, shadow_pte_addr),
|
||||
val);
|
||||
*(u64 *) (uintptr_t) shadow_pte_addr = val;
|
||||
}
|
||||
|
||||
/* clear the last and present bits */
|
||||
static inline void clear_pte(struct hl_ctx *ctx, u64 pte_addr)
|
||||
{
|
||||
/* no need to transform the value to physical address */
|
||||
write_final_pte(ctx, pte_addr, 0);
|
||||
}
|
||||
|
||||
static inline void get_pte(struct hl_ctx *ctx, u64 hop_addr)
|
||||
{
|
||||
get_pgt_info(ctx, hop_addr)->num_of_ptes++;
|
||||
}
|
||||
|
||||
/*
|
||||
* put_pte - decrement the num of ptes and free the hop if possible
|
||||
*
|
||||
* @ctx: pointer to the context structure
|
||||
* @hop_addr: addr of the hop
|
||||
*
|
||||
* This function returns the number of ptes left on this hop. If the number is
|
||||
* 0, it means the pte was freed.
|
||||
*/
|
||||
static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr)
|
||||
{
|
||||
struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
|
||||
int num_of_ptes_left;
|
||||
|
||||
pgt_info->num_of_ptes--;
|
||||
|
||||
/*
|
||||
* Need to save the number of ptes left because free_hop might free
|
||||
* the pgt_info
|
||||
*/
|
||||
num_of_ptes_left = pgt_info->num_of_ptes;
|
||||
if (!num_of_ptes_left)
|
||||
_free_hop(ctx, pgt_info);
|
||||
|
||||
return num_of_ptes_left;
|
||||
}
|
||||
|
||||
static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
|
||||
u64 virt_addr, u64 mask, u64 shift)
|
||||
{
|
||||
return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
|
||||
((virt_addr & mask) >> shift);
|
||||
}
|
||||
|
||||
static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx,
|
||||
struct hl_mmu_properties *mmu_prop,
|
||||
u64 hop_addr, u64 vaddr)
|
||||
{
|
||||
return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop0_mask,
|
||||
mmu_prop->hop0_shift);
|
||||
}
|
||||
|
||||
static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx,
|
||||
struct hl_mmu_properties *mmu_prop,
|
||||
u64 hop_addr, u64 vaddr)
|
||||
{
|
||||
return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop1_mask,
|
||||
mmu_prop->hop1_shift);
|
||||
}
|
||||
|
||||
static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx,
|
||||
struct hl_mmu_properties *mmu_prop,
|
||||
u64 hop_addr, u64 vaddr)
|
||||
{
|
||||
return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop2_mask,
|
||||
mmu_prop->hop2_shift);
|
||||
}
|
||||
|
||||
static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx,
|
||||
struct hl_mmu_properties *mmu_prop,
|
||||
u64 hop_addr, u64 vaddr)
|
||||
{
|
||||
return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop3_mask,
|
||||
mmu_prop->hop3_shift);
|
||||
}
|
||||
|
||||
static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
|
||||
struct hl_mmu_properties *mmu_prop,
|
||||
u64 hop_addr, u64 vaddr)
|
||||
{
|
||||
return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop4_mask,
|
||||
mmu_prop->hop4_shift);
|
||||
}
|
||||
|
||||
static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
|
||||
{
|
||||
if (curr_pte & PAGE_PRESENT_MASK)
|
||||
return curr_pte & HOP_PHYS_ADDR_MASK;
|
||||
else
|
||||
return ULLONG_MAX;
|
||||
}
|
||||
|
||||
static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte,
|
||||
bool *is_new_hop)
|
||||
{
|
||||
u64 hop_addr = get_next_hop_addr(ctx, curr_pte);
|
||||
|
||||
if (hop_addr == ULLONG_MAX) {
|
||||
hop_addr = alloc_hop(ctx);
|
||||
*is_new_hop = (hop_addr != ULLONG_MAX);
|
||||
}
|
||||
|
||||
return hop_addr;
|
||||
}
|
||||
|
||||
/* translates shadow address inside hop to a physical address */
|
||||
static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
|
||||
{
|
||||
u64 page_mask = (ctx->hdev->asic_prop.mmu_hop_table_size - 1);
|
||||
u64 shadow_hop_addr = shadow_addr & ~page_mask;
|
||||
u64 pte_offset = shadow_addr & page_mask;
|
||||
u64 phys_hop_addr;
|
||||
|
||||
if (shadow_hop_addr != get_hop0_addr(ctx))
|
||||
phys_hop_addr = get_pgt_info(ctx, shadow_hop_addr)->phys_addr;
|
||||
else
|
||||
phys_hop_addr = get_phys_hop0_addr(ctx);
|
||||
|
||||
return phys_hop_addr + pte_offset;
|
||||
}
|
||||
|
||||
static int dram_default_mapping_init(struct hl_ctx *ctx)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
|
||||
hop2_pte_addr, hop3_pte_addr, pte_val;
|
||||
int rc, i, j, hop3_allocated = 0;
|
||||
|
||||
if ((!hdev->dram_supports_virtual_memory) ||
|
||||
(!hdev->dram_default_page_mapping) ||
|
||||
(ctx->asid == HL_KERNEL_ASID_ID))
|
||||
return 0;
|
||||
|
||||
num_of_hop3 = prop->dram_size_for_default_page_mapping;
|
||||
do_div(num_of_hop3, prop->dram_page_size);
|
||||
do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
|
||||
|
||||
/* add hop1 and hop2 */
|
||||
total_hops = num_of_hop3 + 2;
|
||||
|
||||
ctx->dram_default_hops = kzalloc(HL_PTE_SIZE * total_hops, GFP_KERNEL);
|
||||
if (!ctx->dram_default_hops)
|
||||
return -ENOMEM;
|
||||
|
||||
hop0_addr = get_hop0_addr(ctx);
|
||||
|
||||
hop1_addr = alloc_hop(ctx);
|
||||
if (hop1_addr == ULLONG_MAX) {
|
||||
dev_err(hdev->dev, "failed to alloc hop 1\n");
|
||||
rc = -ENOMEM;
|
||||
goto hop1_err;
|
||||
}
|
||||
|
||||
ctx->dram_default_hops[total_hops - 1] = hop1_addr;
|
||||
|
||||
hop2_addr = alloc_hop(ctx);
|
||||
if (hop2_addr == ULLONG_MAX) {
|
||||
dev_err(hdev->dev, "failed to alloc hop 2\n");
|
||||
rc = -ENOMEM;
|
||||
goto hop2_err;
|
||||
}
|
||||
|
||||
ctx->dram_default_hops[total_hops - 2] = hop2_addr;
|
||||
|
||||
for (i = 0 ; i < num_of_hop3 ; i++) {
|
||||
ctx->dram_default_hops[i] = alloc_hop(ctx);
|
||||
if (ctx->dram_default_hops[i] == ULLONG_MAX) {
|
||||
dev_err(hdev->dev, "failed to alloc hop 3, i: %d\n", i);
|
||||
rc = -ENOMEM;
|
||||
goto hop3_err;
|
||||
}
|
||||
hop3_allocated++;
|
||||
}
|
||||
|
||||
/* need only pte 0 in hops 0 and 1 */
|
||||
pte_val = (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
|
||||
write_pte(ctx, hop0_addr, pte_val);
|
||||
|
||||
pte_val = (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
|
||||
write_pte(ctx, hop1_addr, pte_val);
|
||||
get_pte(ctx, hop1_addr);
|
||||
|
||||
hop2_pte_addr = hop2_addr;
|
||||
for (i = 0 ; i < num_of_hop3 ; i++) {
|
||||
pte_val = (ctx->dram_default_hops[i] & HOP_PHYS_ADDR_MASK) |
|
||||
PAGE_PRESENT_MASK;
|
||||
write_pte(ctx, hop2_pte_addr, pte_val);
|
||||
get_pte(ctx, hop2_addr);
|
||||
hop2_pte_addr += HL_PTE_SIZE;
|
||||
}
|
||||
|
||||
pte_val = (prop->mmu_dram_default_page_addr & HOP_PHYS_ADDR_MASK) |
|
||||
LAST_MASK | PAGE_PRESENT_MASK;
|
||||
|
||||
for (i = 0 ; i < num_of_hop3 ; i++) {
|
||||
hop3_pte_addr = ctx->dram_default_hops[i];
|
||||
for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
|
||||
write_final_pte(ctx, hop3_pte_addr, pte_val);
|
||||
get_pte(ctx, ctx->dram_default_hops[i]);
|
||||
hop3_pte_addr += HL_PTE_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
flush(ctx);
|
||||
|
||||
return 0;
|
||||
|
||||
hop3_err:
|
||||
for (i = 0 ; i < hop3_allocated ; i++)
|
||||
free_hop(ctx, ctx->dram_default_hops[i]);
|
||||
|
||||
free_hop(ctx, hop2_addr);
|
||||
hop2_err:
|
||||
free_hop(ctx, hop1_addr);
|
||||
hop1_err:
|
||||
kfree(ctx->dram_default_hops);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void dram_default_mapping_fini(struct hl_ctx *ctx)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
|
||||
hop2_pte_addr, hop3_pte_addr;
|
||||
int i, j;
|
||||
|
||||
if ((!hdev->dram_supports_virtual_memory) ||
|
||||
(!hdev->dram_default_page_mapping) ||
|
||||
(ctx->asid == HL_KERNEL_ASID_ID))
|
||||
return;
|
||||
|
||||
num_of_hop3 = prop->dram_size_for_default_page_mapping;
|
||||
do_div(num_of_hop3, prop->dram_page_size);
|
||||
do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
|
||||
|
||||
hop0_addr = get_hop0_addr(ctx);
|
||||
/* add hop1 and hop2 */
|
||||
total_hops = num_of_hop3 + 2;
|
||||
hop1_addr = ctx->dram_default_hops[total_hops - 1];
|
||||
hop2_addr = ctx->dram_default_hops[total_hops - 2];
|
||||
|
||||
for (i = 0 ; i < num_of_hop3 ; i++) {
|
||||
hop3_pte_addr = ctx->dram_default_hops[i];
|
||||
for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
|
||||
clear_pte(ctx, hop3_pte_addr);
|
||||
put_pte(ctx, ctx->dram_default_hops[i]);
|
||||
hop3_pte_addr += HL_PTE_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
hop2_pte_addr = hop2_addr;
|
||||
hop2_pte_addr = hop2_addr;
|
||||
for (i = 0 ; i < num_of_hop3 ; i++) {
|
||||
clear_pte(ctx, hop2_pte_addr);
|
||||
put_pte(ctx, hop2_addr);
|
||||
hop2_pte_addr += HL_PTE_SIZE;
|
||||
}
|
||||
|
||||
clear_pte(ctx, hop1_addr);
|
||||
put_pte(ctx, hop1_addr);
|
||||
clear_pte(ctx, hop0_addr);
|
||||
|
||||
kfree(ctx->dram_default_hops);
|
||||
|
||||
flush(ctx);
|
||||
}
|
||||
|
||||
/**
|
||||
* hl_mmu_v1_init() - initialize the MMU module.
|
||||
* @hdev: habanalabs device structure.
|
||||
*
|
||||
* This function does the following:
|
||||
* - Create a pool of pages for pgt_infos.
|
||||
* - Create a shadow table for pgt
|
||||
*
|
||||
* Return: 0 for success, non-zero for failure.
|
||||
*/
|
||||
static int hl_mmu_v1_init(struct hl_device *hdev)
|
||||
{
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
int rc;
|
||||
|
||||
hdev->mmu_priv.mmu_pgt_pool =
|
||||
gen_pool_create(__ffs(prop->mmu_hop_table_size), -1);
|
||||
|
||||
if (!hdev->mmu_priv.mmu_pgt_pool) {
|
||||
dev_err(hdev->dev, "Failed to create page gen pool\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
rc = gen_pool_add(hdev->mmu_priv.mmu_pgt_pool, prop->mmu_pgt_addr +
|
||||
prop->mmu_hop0_tables_total_size,
|
||||
prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size,
|
||||
-1);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to add memory to page gen pool\n");
|
||||
goto err_pool_add;
|
||||
}
|
||||
|
||||
hdev->mmu_priv.mmu_shadow_hop0 = kvmalloc_array(prop->max_asid,
|
||||
prop->mmu_hop_table_size,
|
||||
GFP_KERNEL | __GFP_ZERO);
|
||||
if (ZERO_OR_NULL_PTR(hdev->mmu_priv.mmu_shadow_hop0)) {
|
||||
rc = -ENOMEM;
|
||||
goto err_pool_add;
|
||||
}
|
||||
|
||||
/* MMU H/W init will be done in device hw_init() */
|
||||
|
||||
return 0;
|
||||
|
||||
err_pool_add:
|
||||
gen_pool_destroy(hdev->mmu_priv.mmu_pgt_pool);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* hl_mmu_fini() - release the MMU module.
|
||||
* @hdev: habanalabs device structure.
|
||||
*
|
||||
* This function does the following:
|
||||
* - Disable MMU in H/W.
|
||||
* - Free the pgt_infos pool.
|
||||
*
|
||||
* All contexts should be freed before calling this function.
|
||||
*/
|
||||
static void hl_mmu_v1_fini(struct hl_device *hdev)
|
||||
{
|
||||
/* MMU H/W fini was already done in device hw_fini() */
|
||||
|
||||
kvfree(hdev->mmu_priv.mmu_shadow_hop0);
|
||||
gen_pool_destroy(hdev->mmu_priv.mmu_pgt_pool);
|
||||
}
|
||||
|
||||
/**
|
||||
* hl_mmu_ctx_init() - initialize a context for using the MMU module.
|
||||
* @ctx: pointer to the context structure to initialize.
|
||||
*
|
||||
* Initialize a mutex to protect the concurrent mapping flow, a hash to hold all
|
||||
* page tables hops related to this context.
|
||||
* Return: 0 on success, non-zero otherwise.
|
||||
*/
|
||||
static int hl_mmu_v1_ctx_init(struct hl_ctx *ctx)
|
||||
{
|
||||
mutex_init(&ctx->mmu_lock);
|
||||
hash_init(ctx->mmu_shadow_hash);
|
||||
|
||||
return dram_default_mapping_init(ctx);
|
||||
}
|
||||
|
||||
/*
|
||||
* hl_mmu_ctx_fini - disable a ctx from using the mmu module
|
||||
*
|
||||
* @ctx: pointer to the context structure
|
||||
*
|
||||
* This function does the following:
|
||||
* - Free any pgts which were not freed yet
|
||||
* - Free the mutex
|
||||
* - Free DRAM default page mapping hops
|
||||
*/
|
||||
static void hl_mmu_v1_ctx_fini(struct hl_ctx *ctx)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
struct pgt_info *pgt_info;
|
||||
struct hlist_node *tmp;
|
||||
int i;
|
||||
|
||||
dram_default_mapping_fini(ctx);
|
||||
|
||||
if (!hash_empty(ctx->mmu_shadow_hash))
|
||||
dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n",
|
||||
ctx->asid);
|
||||
|
||||
hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) {
|
||||
dev_err_ratelimited(hdev->dev,
|
||||
"pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
|
||||
pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
|
||||
_free_hop(ctx, pgt_info);
|
||||
}
|
||||
|
||||
mutex_destroy(&ctx->mmu_lock);
|
||||
}
|
||||
|
||||
static int _hl_mmu_v1_unmap(struct hl_ctx *ctx,
|
||||
u64 virt_addr, bool is_dram_addr)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
struct hl_mmu_properties *mmu_prop;
|
||||
u64 hop0_addr = 0, hop0_pte_addr = 0,
|
||||
hop1_addr = 0, hop1_pte_addr = 0,
|
||||
hop2_addr = 0, hop2_pte_addr = 0,
|
||||
hop3_addr = 0, hop3_pte_addr = 0,
|
||||
hop4_addr = 0, hop4_pte_addr = 0,
|
||||
curr_pte;
|
||||
bool is_huge, clear_hop3 = true;
|
||||
|
||||
/* shifts and masks are the same in PMMU and HPMMU, use one of them */
|
||||
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
|
||||
|
||||
hop0_addr = get_hop0_addr(ctx);
|
||||
hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
|
||||
|
||||
curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
|
||||
|
||||
hop1_addr = get_next_hop_addr(ctx, curr_pte);
|
||||
|
||||
if (hop1_addr == ULLONG_MAX)
|
||||
goto not_mapped;
|
||||
|
||||
hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
|
||||
|
||||
curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
|
||||
|
||||
hop2_addr = get_next_hop_addr(ctx, curr_pte);
|
||||
|
||||
if (hop2_addr == ULLONG_MAX)
|
||||
goto not_mapped;
|
||||
|
||||
hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
|
||||
|
||||
curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
|
||||
|
||||
hop3_addr = get_next_hop_addr(ctx, curr_pte);
|
||||
|
||||
if (hop3_addr == ULLONG_MAX)
|
||||
goto not_mapped;
|
||||
|
||||
hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
|
||||
|
||||
curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
|
||||
|
||||
is_huge = curr_pte & LAST_MASK;
|
||||
|
||||
if (is_dram_addr && !is_huge) {
|
||||
dev_err(hdev->dev,
|
||||
"DRAM unmapping should use huge pages only\n");
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
if (!is_huge) {
|
||||
hop4_addr = get_next_hop_addr(ctx, curr_pte);
|
||||
|
||||
if (hop4_addr == ULLONG_MAX)
|
||||
goto not_mapped;
|
||||
|
||||
hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
|
||||
virt_addr);
|
||||
|
||||
curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
|
||||
|
||||
clear_hop3 = false;
|
||||
}
|
||||
|
||||
if (hdev->dram_default_page_mapping && is_dram_addr) {
|
||||
u64 default_pte = (prop->mmu_dram_default_page_addr &
|
||||
HOP_PHYS_ADDR_MASK) | LAST_MASK |
|
||||
PAGE_PRESENT_MASK;
|
||||
if (curr_pte == default_pte) {
|
||||
dev_err(hdev->dev,
|
||||
"DRAM: hop3 PTE points to zero page, can't unmap, va: 0x%llx\n",
|
||||
virt_addr);
|
||||
goto not_mapped;
|
||||
}
|
||||
|
||||
if (!(curr_pte & PAGE_PRESENT_MASK)) {
|
||||
dev_err(hdev->dev,
|
||||
"DRAM: hop3 PTE is cleared! can't unmap, va: 0x%llx\n",
|
||||
virt_addr);
|
||||
goto not_mapped;
|
||||
}
|
||||
|
||||
write_final_pte(ctx, hop3_pte_addr, default_pte);
|
||||
put_pte(ctx, hop3_addr);
|
||||
} else {
|
||||
if (!(curr_pte & PAGE_PRESENT_MASK))
|
||||
goto not_mapped;
|
||||
|
||||
if (hop4_addr)
|
||||
clear_pte(ctx, hop4_pte_addr);
|
||||
else
|
||||
clear_pte(ctx, hop3_pte_addr);
|
||||
|
||||
if (hop4_addr && !put_pte(ctx, hop4_addr))
|
||||
clear_hop3 = true;
|
||||
|
||||
if (!clear_hop3)
|
||||
goto mapped;
|
||||
|
||||
clear_pte(ctx, hop3_pte_addr);
|
||||
|
||||
if (put_pte(ctx, hop3_addr))
|
||||
goto mapped;
|
||||
|
||||
clear_pte(ctx, hop2_pte_addr);
|
||||
|
||||
if (put_pte(ctx, hop2_addr))
|
||||
goto mapped;
|
||||
|
||||
clear_pte(ctx, hop1_pte_addr);
|
||||
|
||||
if (put_pte(ctx, hop1_addr))
|
||||
goto mapped;
|
||||
|
||||
clear_pte(ctx, hop0_pte_addr);
|
||||
}
|
||||
|
||||
mapped:
|
||||
return 0;
|
||||
|
||||
not_mapped:
|
||||
dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
|
||||
virt_addr);
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
|
||||
u32 page_size, bool is_dram_addr)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
struct hl_mmu_properties *mmu_prop;
|
||||
u64 hop0_addr = 0, hop0_pte_addr = 0,
|
||||
hop1_addr = 0, hop1_pte_addr = 0,
|
||||
hop2_addr = 0, hop2_pte_addr = 0,
|
||||
hop3_addr = 0, hop3_pte_addr = 0,
|
||||
hop4_addr = 0, hop4_pte_addr = 0,
|
||||
curr_pte = 0;
|
||||
bool hop1_new = false, hop2_new = false, hop3_new = false,
|
||||
hop4_new = false, is_huge;
|
||||
int rc = -ENOMEM;
|
||||
|
||||
/*
|
||||
* This mapping function can map a page or a huge page. For huge page
|
||||
* there are only 3 hops rather than 4. Currently the DRAM allocation
|
||||
* uses huge pages only but user memory could have been allocated with
|
||||
* one of the two page sizes. Since this is a common code for all the
|
||||
* three cases, we need this hugs page check.
|
||||
*/
|
||||
if (is_dram_addr) {
|
||||
mmu_prop = &prop->dmmu;
|
||||
is_huge = true;
|
||||
} else if (page_size == prop->pmmu_huge.page_size) {
|
||||
mmu_prop = &prop->pmmu_huge;
|
||||
is_huge = true;
|
||||
} else {
|
||||
mmu_prop = &prop->pmmu;
|
||||
is_huge = false;
|
||||
}
|
||||
|
||||
hop0_addr = get_hop0_addr(ctx);
|
||||
hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
|
||||
curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
|
||||
|
||||
hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new);
|
||||
if (hop1_addr == ULLONG_MAX)
|
||||
goto err;
|
||||
|
||||
hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
|
||||
curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
|
||||
|
||||
hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new);
|
||||
if (hop2_addr == ULLONG_MAX)
|
||||
goto err;
|
||||
|
||||
hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
|
||||
curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
|
||||
|
||||
hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new);
|
||||
if (hop3_addr == ULLONG_MAX)
|
||||
goto err;
|
||||
|
||||
hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
|
||||
curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
|
||||
|
||||
if (!is_huge) {
|
||||
hop4_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop4_new);
|
||||
if (hop4_addr == ULLONG_MAX)
|
||||
goto err;
|
||||
|
||||
hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
|
||||
virt_addr);
|
||||
curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
|
||||
}
|
||||
|
||||
if (hdev->dram_default_page_mapping && is_dram_addr) {
|
||||
u64 default_pte = (prop->mmu_dram_default_page_addr &
|
||||
HOP_PHYS_ADDR_MASK) | LAST_MASK |
|
||||
PAGE_PRESENT_MASK;
|
||||
|
||||
if (curr_pte != default_pte) {
|
||||
dev_err(hdev->dev,
|
||||
"DRAM: mapping already exists for virt_addr 0x%llx\n",
|
||||
virt_addr);
|
||||
rc = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (hop1_new || hop2_new || hop3_new || hop4_new) {
|
||||
dev_err(hdev->dev,
|
||||
"DRAM mapping should not allocate more hops\n");
|
||||
rc = -EFAULT;
|
||||
goto err;
|
||||
}
|
||||
} else if (curr_pte & PAGE_PRESENT_MASK) {
|
||||
dev_err(hdev->dev,
|
||||
"mapping already exists for virt_addr 0x%llx\n",
|
||||
virt_addr);
|
||||
|
||||
dev_dbg(hdev->dev, "hop0 pte: 0x%llx (0x%llx)\n",
|
||||
*(u64 *) (uintptr_t) hop0_pte_addr, hop0_pte_addr);
|
||||
dev_dbg(hdev->dev, "hop1 pte: 0x%llx (0x%llx)\n",
|
||||
*(u64 *) (uintptr_t) hop1_pte_addr, hop1_pte_addr);
|
||||
dev_dbg(hdev->dev, "hop2 pte: 0x%llx (0x%llx)\n",
|
||||
*(u64 *) (uintptr_t) hop2_pte_addr, hop2_pte_addr);
|
||||
dev_dbg(hdev->dev, "hop3 pte: 0x%llx (0x%llx)\n",
|
||||
*(u64 *) (uintptr_t) hop3_pte_addr, hop3_pte_addr);
|
||||
|
||||
if (!is_huge)
|
||||
dev_dbg(hdev->dev, "hop4 pte: 0x%llx (0x%llx)\n",
|
||||
*(u64 *) (uintptr_t) hop4_pte_addr,
|
||||
hop4_pte_addr);
|
||||
|
||||
rc = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | LAST_MASK
|
||||
| PAGE_PRESENT_MASK;
|
||||
|
||||
if (is_huge)
|
||||
write_final_pte(ctx, hop3_pte_addr, curr_pte);
|
||||
else
|
||||
write_final_pte(ctx, hop4_pte_addr, curr_pte);
|
||||
|
||||
if (hop1_new) {
|
||||
curr_pte =
|
||||
(hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
|
||||
write_pte(ctx, hop0_pte_addr, curr_pte);
|
||||
}
|
||||
if (hop2_new) {
|
||||
curr_pte =
|
||||
(hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
|
||||
write_pte(ctx, hop1_pte_addr, curr_pte);
|
||||
get_pte(ctx, hop1_addr);
|
||||
}
|
||||
if (hop3_new) {
|
||||
curr_pte =
|
||||
(hop3_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
|
||||
write_pte(ctx, hop2_pte_addr, curr_pte);
|
||||
get_pte(ctx, hop2_addr);
|
||||
}
|
||||
|
||||
if (!is_huge) {
|
||||
if (hop4_new) {
|
||||
curr_pte = (hop4_addr & HOP_PHYS_ADDR_MASK) |
|
||||
PAGE_PRESENT_MASK;
|
||||
write_pte(ctx, hop3_pte_addr, curr_pte);
|
||||
get_pte(ctx, hop3_addr);
|
||||
}
|
||||
|
||||
get_pte(ctx, hop4_addr);
|
||||
} else {
|
||||
get_pte(ctx, hop3_addr);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
if (hop4_new)
|
||||
free_hop(ctx, hop4_addr);
|
||||
if (hop3_new)
|
||||
free_hop(ctx, hop3_addr);
|
||||
if (hop2_new)
|
||||
free_hop(ctx, hop2_addr);
|
||||
if (hop1_new)
|
||||
free_hop(ctx, hop1_addr);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* hl_mmu_v1_swap_out - marks all mapping of the given ctx as swapped out
|
||||
*
|
||||
* @ctx: pointer to the context structure
|
||||
*
|
||||
*/
|
||||
static void hl_mmu_v1_swap_out(struct hl_ctx *ctx)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* hl_mmu_v1_swap_in - marks all mapping of the given ctx as swapped in
|
||||
*
|
||||
* @ctx: pointer to the context structure
|
||||
*
|
||||
*/
|
||||
static void hl_mmu_v1_swap_in(struct hl_ctx *ctx)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* hl_mmu_v1_prepare - prepare mmu for working with mmu v1
|
||||
*
|
||||
* @hdev: pointer to the device structure
|
||||
*/
|
||||
void hl_mmu_v1_set_funcs(struct hl_device *hdev)
|
||||
{
|
||||
struct hl_mmu_funcs *mmu = &hdev->mmu_func;
|
||||
|
||||
mmu->init = hl_mmu_v1_init;
|
||||
mmu->fini = hl_mmu_v1_fini;
|
||||
mmu->ctx_init = hl_mmu_v1_ctx_init;
|
||||
mmu->ctx_fini = hl_mmu_v1_ctx_fini;
|
||||
mmu->map = _hl_mmu_v1_map;
|
||||
mmu->unmap = _hl_mmu_v1_unmap;
|
||||
mmu->flush = flush;
|
||||
mmu->swap_out = hl_mmu_v1_swap_out;
|
||||
mmu->swap_in = hl_mmu_v1_swap_in;
|
||||
}
|
@ -9,7 +9,6 @@
|
||||
#include "../include/hw_ip/pci/pci_general.h"
|
||||
|
||||
#include <linux/pci.h>
|
||||
#include <linux/bitfield.h>
|
||||
|
||||
#define HL_PLDM_PCI_ELBI_TIMEOUT_MSEC (HL_PCI_ELBI_TIMEOUT_MSEC * 10)
|
||||
|
||||
@ -339,12 +338,17 @@ static int hl_pci_set_dma_mask(struct hl_device *hdev)
|
||||
/**
|
||||
* hl_pci_init() - PCI initialization code.
|
||||
* @hdev: Pointer to hl_device structure.
|
||||
* @cpu_boot_status_reg: status register of the device's CPU
|
||||
* @boot_err0_reg: boot error register of the device's CPU
|
||||
* @preboot_ver_timeout: how much to wait before bailing out on reading
|
||||
* the preboot version
|
||||
*
|
||||
* Set DMA masks, initialize the PCI controller and map the PCI BARs.
|
||||
*
|
||||
* Return: 0 on success, non-zero for failure.
|
||||
*/
|
||||
int hl_pci_init(struct hl_device *hdev)
|
||||
int hl_pci_init(struct hl_device *hdev, u32 cpu_boot_status_reg,
|
||||
u32 boot_err0_reg, u32 preboot_ver_timeout)
|
||||
{
|
||||
struct pci_dev *pdev = hdev->pdev;
|
||||
int rc;
|
||||
@ -376,6 +380,15 @@ int hl_pci_init(struct hl_device *hdev)
|
||||
if (rc)
|
||||
goto unmap_pci_bars;
|
||||
|
||||
/* Before continuing in the initialization, we need to read the preboot
|
||||
* version to determine whether we run with a security-enabled firmware
|
||||
* The check will be done in each ASIC's specific code
|
||||
*/
|
||||
rc = hl_fw_read_preboot_ver(hdev, cpu_boot_status_reg, boot_err0_reg,
|
||||
preboot_ver_timeout);
|
||||
if (rc)
|
||||
goto unmap_pci_bars;
|
||||
|
||||
return 0;
|
||||
|
||||
unmap_pci_bars:
|
||||
|
@ -11,18 +11,18 @@
|
||||
|
||||
long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
|
||||
{
|
||||
struct armcp_packet pkt;
|
||||
struct cpucp_packet pkt;
|
||||
long result;
|
||||
int rc;
|
||||
|
||||
memset(&pkt, 0, sizeof(pkt));
|
||||
|
||||
if (curr)
|
||||
pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_CURR_GET <<
|
||||
ARMCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_CURR_GET <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
else
|
||||
pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_GET <<
|
||||
ARMCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_GET <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.pll_index = cpu_to_le32(pll_index);
|
||||
|
||||
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
|
||||
@ -40,13 +40,13 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
|
||||
|
||||
void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
|
||||
{
|
||||
struct armcp_packet pkt;
|
||||
struct cpucp_packet pkt;
|
||||
int rc;
|
||||
|
||||
memset(&pkt, 0, sizeof(pkt));
|
||||
|
||||
pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_SET <<
|
||||
ARMCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_SET <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.pll_index = cpu_to_le32(pll_index);
|
||||
pkt.value = cpu_to_le64(freq);
|
||||
|
||||
@ -61,14 +61,14 @@ void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
|
||||
|
||||
u64 hl_get_max_power(struct hl_device *hdev)
|
||||
{
|
||||
struct armcp_packet pkt;
|
||||
struct cpucp_packet pkt;
|
||||
long result;
|
||||
int rc;
|
||||
|
||||
memset(&pkt, 0, sizeof(pkt));
|
||||
|
||||
pkt.ctl = cpu_to_le32(ARMCP_PACKET_MAX_POWER_GET <<
|
||||
ARMCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_GET <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
|
||||
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
|
||||
0, &result);
|
||||
@ -83,13 +83,13 @@ u64 hl_get_max_power(struct hl_device *hdev)
|
||||
|
||||
void hl_set_max_power(struct hl_device *hdev)
|
||||
{
|
||||
struct armcp_packet pkt;
|
||||
struct cpucp_packet pkt;
|
||||
int rc;
|
||||
|
||||
memset(&pkt, 0, sizeof(pkt));
|
||||
|
||||
pkt.ctl = cpu_to_le32(ARMCP_PACKET_MAX_POWER_SET <<
|
||||
ARMCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_SET <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.value = cpu_to_le64(hdev->max_power);
|
||||
|
||||
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
|
||||
@ -112,7 +112,7 @@ static ssize_t armcp_kernel_ver_show(struct device *dev,
|
||||
{
|
||||
struct hl_device *hdev = dev_get_drvdata(dev);
|
||||
|
||||
return sprintf(buf, "%s", hdev->asic_prop.armcp_info.kernel_version);
|
||||
return sprintf(buf, "%s", hdev->asic_prop.cpucp_info.kernel_version);
|
||||
}
|
||||
|
||||
static ssize_t armcp_ver_show(struct device *dev, struct device_attribute *attr,
|
||||
@ -120,7 +120,7 @@ static ssize_t armcp_ver_show(struct device *dev, struct device_attribute *attr,
|
||||
{
|
||||
struct hl_device *hdev = dev_get_drvdata(dev);
|
||||
|
||||
return sprintf(buf, "%s\n", hdev->asic_prop.armcp_info.armcp_version);
|
||||
return sprintf(buf, "%s\n", hdev->asic_prop.cpucp_info.cpucp_version);
|
||||
}
|
||||
|
||||
static ssize_t cpld_ver_show(struct device *dev, struct device_attribute *attr,
|
||||
@ -129,7 +129,23 @@ static ssize_t cpld_ver_show(struct device *dev, struct device_attribute *attr,
|
||||
struct hl_device *hdev = dev_get_drvdata(dev);
|
||||
|
||||
return sprintf(buf, "0x%08x\n",
|
||||
hdev->asic_prop.armcp_info.cpld_version);
|
||||
hdev->asic_prop.cpucp_info.cpld_version);
|
||||
}
|
||||
|
||||
static ssize_t cpucp_kernel_ver_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct hl_device *hdev = dev_get_drvdata(dev);
|
||||
|
||||
return sprintf(buf, "%s", hdev->asic_prop.cpucp_info.kernel_version);
|
||||
}
|
||||
|
||||
static ssize_t cpucp_ver_show(struct device *dev, struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct hl_device *hdev = dev_get_drvdata(dev);
|
||||
|
||||
return sprintf(buf, "%s\n", hdev->asic_prop.cpucp_info.cpucp_version);
|
||||
}
|
||||
|
||||
static ssize_t infineon_ver_show(struct device *dev,
|
||||
@ -138,7 +154,7 @@ static ssize_t infineon_ver_show(struct device *dev,
|
||||
struct hl_device *hdev = dev_get_drvdata(dev);
|
||||
|
||||
return sprintf(buf, "0x%04x\n",
|
||||
hdev->asic_prop.armcp_info.infineon_version);
|
||||
hdev->asic_prop.cpucp_info.infineon_version);
|
||||
}
|
||||
|
||||
static ssize_t fuse_ver_show(struct device *dev, struct device_attribute *attr,
|
||||
@ -146,7 +162,7 @@ static ssize_t fuse_ver_show(struct device *dev, struct device_attribute *attr,
|
||||
{
|
||||
struct hl_device *hdev = dev_get_drvdata(dev);
|
||||
|
||||
return sprintf(buf, "%s\n", hdev->asic_prop.armcp_info.fuse_version);
|
||||
return sprintf(buf, "%s\n", hdev->asic_prop.cpucp_info.fuse_version);
|
||||
}
|
||||
|
||||
static ssize_t thermal_ver_show(struct device *dev,
|
||||
@ -154,7 +170,7 @@ static ssize_t thermal_ver_show(struct device *dev,
|
||||
{
|
||||
struct hl_device *hdev = dev_get_drvdata(dev);
|
||||
|
||||
return sprintf(buf, "%s", hdev->asic_prop.armcp_info.thermal_version);
|
||||
return sprintf(buf, "%s", hdev->asic_prop.cpucp_info.thermal_version);
|
||||
}
|
||||
|
||||
static ssize_t preboot_btl_ver_show(struct device *dev,
|
||||
@ -356,6 +372,8 @@ static ssize_t eeprom_read_handler(struct file *filp, struct kobject *kobj,
|
||||
static DEVICE_ATTR_RO(armcp_kernel_ver);
|
||||
static DEVICE_ATTR_RO(armcp_ver);
|
||||
static DEVICE_ATTR_RO(cpld_ver);
|
||||
static DEVICE_ATTR_RO(cpucp_kernel_ver);
|
||||
static DEVICE_ATTR_RO(cpucp_ver);
|
||||
static DEVICE_ATTR_RO(device_type);
|
||||
static DEVICE_ATTR_RO(fuse_ver);
|
||||
static DEVICE_ATTR_WO(hard_reset);
|
||||
@ -380,6 +398,8 @@ static struct attribute *hl_dev_attrs[] = {
|
||||
&dev_attr_armcp_kernel_ver.attr,
|
||||
&dev_attr_armcp_ver.attr,
|
||||
&dev_attr_cpld_ver.attr,
|
||||
&dev_attr_cpucp_kernel_ver.attr,
|
||||
&dev_attr_cpucp_ver.attr,
|
||||
&dev_attr_device_type.attr,
|
||||
&dev_attr_fuse_ver.attr,
|
||||
&dev_attr_hard_reset.attr,
|
||||
|
@ -21,7 +21,6 @@
|
||||
#include <linux/io-64-nonatomic-lo-hi.h>
|
||||
#include <linux/iommu.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/bitfield.h>
|
||||
|
||||
/*
|
||||
* Gaudi security scheme:
|
||||
@ -360,13 +359,14 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
|
||||
static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
|
||||
u32 tpc_id);
|
||||
static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
|
||||
static int gaudi_armcp_info_get(struct hl_device *hdev);
|
||||
static int gaudi_cpucp_info_get(struct hl_device *hdev);
|
||||
static void gaudi_disable_clock_gating(struct hl_device *hdev);
|
||||
static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
|
||||
|
||||
static int gaudi_get_fixed_properties(struct hl_device *hdev)
|
||||
{
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
u32 num_sync_stream_queues = 0;
|
||||
int i;
|
||||
|
||||
prop->max_queues = GAUDI_QUEUE_ID_SIZE;
|
||||
@ -383,6 +383,7 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
|
||||
prop->hw_queues_props[i].driver_only = 0;
|
||||
prop->hw_queues_props[i].requires_kernel_cb = 1;
|
||||
prop->hw_queues_props[i].supports_sync_stream = 1;
|
||||
num_sync_stream_queues++;
|
||||
} else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
|
||||
prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
|
||||
prop->hw_queues_props[i].driver_only = 1;
|
||||
@ -440,6 +441,7 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
|
||||
prop->pmmu.end_addr =
|
||||
(VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
|
||||
prop->pmmu.page_size = PAGE_SIZE_4KB;
|
||||
prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
|
||||
|
||||
/* PMMU and HPMMU are the same except of page size */
|
||||
memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
|
||||
@ -464,11 +466,16 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
|
||||
prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
|
||||
prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
|
||||
|
||||
strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
|
||||
strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
|
||||
CARD_NAME_MAX_LEN);
|
||||
|
||||
prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
|
||||
|
||||
prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
|
||||
num_sync_stream_queues * HL_RSVD_SOBS;
|
||||
prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
|
||||
num_sync_stream_queues * HL_RSVD_MONS;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -592,10 +599,15 @@ static int gaudi_early_init(struct hl_device *hdev)
|
||||
|
||||
prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
|
||||
|
||||
rc = hl_pci_init(hdev);
|
||||
rc = hl_pci_init(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
|
||||
mmCPU_BOOT_ERR0, GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
|
||||
if (rc)
|
||||
goto free_queue_props;
|
||||
|
||||
/* GAUDI Firmware does not yet support security */
|
||||
prop->fw_security_disabled = true;
|
||||
dev_info(hdev->dev, "firmware-level security is disabled\n");
|
||||
|
||||
return 0;
|
||||
|
||||
free_queue_props:
|
||||
@ -675,10 +687,10 @@ static int _gaudi_init_tpc_mem(struct hl_device *hdev,
|
||||
|
||||
init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
|
||||
|
||||
ctl = ((PACKET_LIN_DMA << GAUDI_PKT_CTL_OPCODE_SHIFT) |
|
||||
(1 << GAUDI_PKT_LIN_DMA_CTL_LIN_SHIFT) |
|
||||
(1 << GAUDI_PKT_CTL_RB_SHIFT) |
|
||||
(1 << GAUDI_PKT_CTL_MB_SHIFT));
|
||||
ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
|
||||
ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
|
||||
ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
|
||||
ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
|
||||
|
||||
init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
|
||||
|
||||
@ -780,13 +792,13 @@ static int gaudi_late_init(struct hl_device *hdev)
|
||||
struct gaudi_device *gaudi = hdev->asic_specific;
|
||||
int rc;
|
||||
|
||||
rc = gaudi->armcp_info_get(hdev);
|
||||
rc = gaudi->cpucp_info_get(hdev);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to get armcp info\n");
|
||||
dev_err(hdev->dev, "Failed to get cpucp info\n");
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS);
|
||||
rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
|
||||
return rc;
|
||||
@ -811,7 +823,7 @@ static int gaudi_late_init(struct hl_device *hdev)
|
||||
return 0;
|
||||
|
||||
disable_pci_access:
|
||||
hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
|
||||
hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
|
||||
|
||||
return rc;
|
||||
}
|
||||
@ -981,7 +993,7 @@ static int gaudi_sw_init(struct hl_device *hdev)
|
||||
}
|
||||
}
|
||||
|
||||
gaudi->armcp_info_get = gaudi_armcp_info_get;
|
||||
gaudi->cpucp_info_get = gaudi_cpucp_info_get;
|
||||
|
||||
gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
|
||||
|
||||
@ -1911,6 +1923,9 @@ static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
|
||||
WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
|
||||
WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
|
||||
|
||||
/* WA for H/W bug H3-2116 */
|
||||
WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
|
||||
|
||||
/* STOP_ON bit implies no completion to operation in case of RAZWI */
|
||||
if (hdev->stop_on_err)
|
||||
dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
|
||||
@ -2321,7 +2336,8 @@ static void gaudi_init_tpc_qmans(struct hl_device *hdev)
|
||||
|
||||
tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
|
||||
|
||||
gaudi->hw_cap_initialized |= 1 << (HW_CAP_TPC_SHIFT + tpc_id);
|
||||
gaudi->hw_cap_initialized |=
|
||||
FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2847,7 +2863,7 @@ static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
|
||||
|
||||
if (err) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to communicate with ARM CPU (ArmCP timeout)\n");
|
||||
"Failed to communicate with Device CPU (CPU-CP timeout)\n");
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
@ -2860,6 +2876,18 @@ static void gaudi_pre_hw_init(struct hl_device *hdev)
|
||||
/* Perform read from the device to make sure device is up */
|
||||
RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
|
||||
|
||||
/* Set the access through PCI bars (Linux driver only) as
|
||||
* secured
|
||||
*/
|
||||
WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
|
||||
(PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
|
||||
PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
|
||||
|
||||
/* Perform read to flush the waiting writes to ensure
|
||||
* configuration was set in the device
|
||||
*/
|
||||
RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
|
||||
|
||||
/*
|
||||
* Let's mark in the H/W that we have reached this point. We check
|
||||
* this value in the reset_before_init function to understand whether
|
||||
@ -2868,31 +2896,6 @@ static void gaudi_pre_hw_init(struct hl_device *hdev)
|
||||
*/
|
||||
WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
|
||||
|
||||
/* Set the access through PCI bars (Linux driver only) as secured */
|
||||
WREG32(mmPCIE_WRAP_LBW_PROT_OVR, (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
|
||||
PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
|
||||
|
||||
/* Perform read to flush the waiting writes to ensure configuration
|
||||
* was set in the device
|
||||
*/
|
||||
RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
|
||||
|
||||
if (hdev->axi_drain) {
|
||||
WREG32(mmPCIE_WRAP_LBW_DRAIN_CFG,
|
||||
1 << PCIE_WRAP_LBW_DRAIN_CFG_EN_SHIFT);
|
||||
WREG32(mmPCIE_WRAP_HBW_DRAIN_CFG,
|
||||
1 << PCIE_WRAP_HBW_DRAIN_CFG_EN_SHIFT);
|
||||
|
||||
/* Perform read to flush the DRAIN cfg */
|
||||
RREG32(mmPCIE_WRAP_HBW_DRAIN_CFG);
|
||||
} else {
|
||||
WREG32(mmPCIE_WRAP_LBW_DRAIN_CFG, 0);
|
||||
WREG32(mmPCIE_WRAP_HBW_DRAIN_CFG, 0);
|
||||
|
||||
/* Perform read to flush the DRAIN cfg */
|
||||
RREG32(mmPCIE_WRAP_HBW_DRAIN_CFG);
|
||||
}
|
||||
|
||||
/* Configure the reset registers. Must be done as early as possible
|
||||
* in case we fail during H/W initialization
|
||||
*/
|
||||
@ -2900,13 +2903,13 @@ static void gaudi_pre_hw_init(struct hl_device *hdev)
|
||||
(CFG_RST_H_DMA_MASK |
|
||||
CFG_RST_H_MME_MASK |
|
||||
CFG_RST_H_SM_MASK |
|
||||
CFG_RST_H_TPC_MASK));
|
||||
CFG_RST_H_TPC_7_MASK));
|
||||
|
||||
WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
|
||||
|
||||
WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
|
||||
(CFG_RST_H_HBM_MASK |
|
||||
CFG_RST_H_TPC_MASK |
|
||||
CFG_RST_H_TPC_7_MASK |
|
||||
CFG_RST_H_NIC_MASK |
|
||||
CFG_RST_H_SM_MASK |
|
||||
CFG_RST_H_DMA_MASK |
|
||||
@ -3071,7 +3074,7 @@ static int gaudi_suspend(struct hl_device *hdev)
|
||||
{
|
||||
int rc;
|
||||
|
||||
rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
|
||||
rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
|
||||
if (rc)
|
||||
dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
|
||||
|
||||
@ -3084,17 +3087,16 @@ static int gaudi_resume(struct hl_device *hdev)
|
||||
}
|
||||
|
||||
static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
|
||||
u64 kaddress, phys_addr_t paddress, u32 size)
|
||||
void *cpu_addr, dma_addr_t dma_addr, size_t size)
|
||||
{
|
||||
int rc;
|
||||
|
||||
vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
|
||||
VM_DONTCOPY | VM_NORESERVE;
|
||||
|
||||
rc = remap_pfn_range(vma, vma->vm_start, paddress >> PAGE_SHIFT,
|
||||
size, vma->vm_page_prot);
|
||||
rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
|
||||
if (rc)
|
||||
dev_err(hdev->dev, "remap_pfn_range error %d", rc);
|
||||
dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
|
||||
|
||||
return rc;
|
||||
}
|
||||
@ -3441,7 +3443,8 @@ static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
|
||||
&fence_dma_addr);
|
||||
if (!fence_ptr) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to allocate memory for queue testing\n");
|
||||
"Failed to allocate memory for H/W queue %d testing\n",
|
||||
hw_queue_id);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
@ -3452,14 +3455,16 @@ static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
|
||||
GFP_KERNEL, &pkt_dma_addr);
|
||||
if (!fence_pkt) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to allocate packet for queue testing\n");
|
||||
"Failed to allocate packet for H/W queue %d testing\n",
|
||||
hw_queue_id);
|
||||
rc = -ENOMEM;
|
||||
goto free_fence_ptr;
|
||||
}
|
||||
|
||||
tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
|
||||
(1 << GAUDI_PKT_CTL_EB_SHIFT) |
|
||||
(1 << GAUDI_PKT_CTL_MB_SHIFT);
|
||||
tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
|
||||
tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
|
||||
tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
|
||||
|
||||
fence_pkt->ctl = cpu_to_le32(tmp);
|
||||
fence_pkt->value = cpu_to_le32(fence_val);
|
||||
fence_pkt->addr = cpu_to_le64(fence_dma_addr);
|
||||
@ -3469,7 +3474,8 @@ static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
|
||||
pkt_dma_addr);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to send fence packet\n");
|
||||
"Failed to send fence packet to H/W queue %d\n",
|
||||
hw_queue_id);
|
||||
goto free_pkt;
|
||||
}
|
||||
|
||||
@ -3959,8 +3965,6 @@ static int gaudi_patch_dma_packet(struct hl_device *hdev,
|
||||
}
|
||||
}
|
||||
|
||||
new_dma_pkt->ctl = user_dma_pkt->ctl;
|
||||
|
||||
ctl = le32_to_cpu(user_dma_pkt->ctl);
|
||||
if (likely(dma_desc_cnt))
|
||||
ctl &= ~GAUDI_PKT_CTL_EB_MASK;
|
||||
@ -4105,8 +4109,9 @@ static int gaudi_parse_cb_mmu(struct hl_device *hdev,
|
||||
parser->patched_cb_size = parser->user_cb_size +
|
||||
sizeof(struct packet_msg_prot) * 2;
|
||||
|
||||
rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size,
|
||||
&patched_cb_handle, HL_KERNEL_ASID_ID, false);
|
||||
rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
|
||||
parser->patched_cb_size, false, false,
|
||||
&patched_cb_handle);
|
||||
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
@ -4178,8 +4183,9 @@ static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
|
||||
if (rc)
|
||||
goto free_userptr;
|
||||
|
||||
rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size,
|
||||
&patched_cb_handle, HL_KERNEL_ASID_ID, false);
|
||||
rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
|
||||
parser->patched_cb_size, false, false,
|
||||
&patched_cb_handle);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to allocate patched CB for DMA CS %d\n", rc);
|
||||
@ -4275,11 +4281,11 @@ static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
|
||||
cq_pkt = (struct packet_msg_prot *) (uintptr_t)
|
||||
(kernel_address + len - (sizeof(struct packet_msg_prot) * 2));
|
||||
|
||||
tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
|
||||
(1 << GAUDI_PKT_CTL_MB_SHIFT);
|
||||
tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
|
||||
tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
|
||||
|
||||
if (eb)
|
||||
tmp |= (1 << GAUDI_PKT_CTL_EB_SHIFT);
|
||||
tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
|
||||
|
||||
cq_pkt->ctl = cpu_to_le32(tmp);
|
||||
cq_pkt->value = cpu_to_le32(cq_val);
|
||||
@ -4287,8 +4293,8 @@ static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
|
||||
|
||||
cq_pkt++;
|
||||
|
||||
tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
|
||||
(1 << GAUDI_PKT_CTL_MB_SHIFT);
|
||||
tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
|
||||
tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
|
||||
cq_pkt->ctl = cpu_to_le32(tmp);
|
||||
cq_pkt->value = cpu_to_le32(1);
|
||||
|
||||
@ -4320,11 +4326,12 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
|
||||
memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
|
||||
cb_size = sizeof(*lin_dma_pkt);
|
||||
|
||||
ctl = ((PACKET_LIN_DMA << GAUDI_PKT_CTL_OPCODE_SHIFT) |
|
||||
(1 << GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
|
||||
(1 << GAUDI_PKT_LIN_DMA_CTL_LIN_SHIFT) |
|
||||
(1 << GAUDI_PKT_CTL_RB_SHIFT) |
|
||||
(1 << GAUDI_PKT_CTL_MB_SHIFT));
|
||||
ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
|
||||
ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
|
||||
ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
|
||||
ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
|
||||
ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
|
||||
|
||||
lin_dma_pkt->ctl = cpu_to_le32(ctl);
|
||||
lin_dma_pkt->src_addr = cpu_to_le64(val);
|
||||
lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
|
||||
@ -4930,9 +4937,10 @@ static int gaudi_send_job_on_qman0(struct hl_device *hdev,
|
||||
fence_pkt = (struct packet_msg_prot *) (uintptr_t) (cb->kernel_address +
|
||||
job->job_cb_size - sizeof(struct packet_msg_prot));
|
||||
|
||||
tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
|
||||
(1 << GAUDI_PKT_CTL_EB_SHIFT) |
|
||||
(1 << GAUDI_PKT_CTL_MB_SHIFT);
|
||||
tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
|
||||
tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
|
||||
tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
|
||||
|
||||
fence_pkt->ctl = cpu_to_le32(tmp);
|
||||
fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
|
||||
fence_pkt->addr = cpu_to_le64(fence_dma_addr);
|
||||
@ -5606,7 +5614,7 @@ static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
|
||||
bool soft_reset_required = false;
|
||||
|
||||
/* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
|
||||
* gating, and thus cannot be done in ArmCP and should be done instead
|
||||
* gating, and thus cannot be done in CPU-CP and should be done instead
|
||||
* by the driver.
|
||||
*/
|
||||
|
||||
@ -5653,21 +5661,25 @@ static void gaudi_print_clk_change_info(struct hl_device *hdev,
|
||||
{
|
||||
switch (event_type) {
|
||||
case GAUDI_EVENT_FIX_POWER_ENV_S:
|
||||
hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
|
||||
dev_info_ratelimited(hdev->dev,
|
||||
"Clock throttling due to power consumption\n");
|
||||
break;
|
||||
|
||||
case GAUDI_EVENT_FIX_POWER_ENV_E:
|
||||
hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
|
||||
dev_info_ratelimited(hdev->dev,
|
||||
"Power envelop is safe, back to optimal clock\n");
|
||||
break;
|
||||
|
||||
case GAUDI_EVENT_FIX_THERMAL_ENV_S:
|
||||
hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
|
||||
dev_info_ratelimited(hdev->dev,
|
||||
"Clock throttling due to overheating\n");
|
||||
break;
|
||||
|
||||
case GAUDI_EVENT_FIX_THERMAL_ENV_E:
|
||||
hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
|
||||
dev_info_ratelimited(hdev->dev,
|
||||
"Thermal envelop is safe, back to optimal clock\n");
|
||||
break;
|
||||
@ -6038,7 +6050,7 @@ static int gaudi_send_heartbeat(struct hl_device *hdev)
|
||||
return hl_fw_send_heartbeat(hdev);
|
||||
}
|
||||
|
||||
static int gaudi_armcp_info_get(struct hl_device *hdev)
|
||||
static int gaudi_cpucp_info_get(struct hl_device *hdev)
|
||||
{
|
||||
struct gaudi_device *gaudi = hdev->asic_specific;
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
@ -6047,19 +6059,19 @@ static int gaudi_armcp_info_get(struct hl_device *hdev)
|
||||
if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
|
||||
return 0;
|
||||
|
||||
rc = hl_fw_armcp_info_get(hdev);
|
||||
rc = hl_fw_cpucp_info_get(hdev);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
if (!strlen(prop->armcp_info.card_name))
|
||||
strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
|
||||
if (!strlen(prop->cpucp_info.card_name))
|
||||
strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
|
||||
CARD_NAME_MAX_LEN);
|
||||
|
||||
hdev->card_type = le32_to_cpu(hdev->asic_prop.armcp_info.card_type);
|
||||
hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
|
||||
|
||||
if (hdev->card_type == armcp_card_type_pci)
|
||||
if (hdev->card_type == cpucp_card_type_pci)
|
||||
prop->max_power_default = MAX_POWER_DEFAULT_PCI;
|
||||
else if (hdev->card_type == armcp_card_type_pmc)
|
||||
else if (hdev->card_type == cpucp_card_type_pmc)
|
||||
prop->max_power_default = MAX_POWER_DEFAULT_PMC;
|
||||
|
||||
hdev->max_power = prop->max_power_default;
|
||||
@ -6067,7 +6079,7 @@ static int gaudi_armcp_info_get(struct hl_device *hdev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool gaudi_is_device_idle(struct hl_device *hdev, u32 *mask,
|
||||
static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask,
|
||||
struct seq_file *s)
|
||||
{
|
||||
struct gaudi_device *gaudi = hdev->asic_specific;
|
||||
@ -6099,7 +6111,7 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u32 *mask,
|
||||
is_idle &= is_eng_idle;
|
||||
|
||||
if (mask)
|
||||
*mask |= !is_eng_idle <<
|
||||
*mask |= ((u64) !is_eng_idle) <<
|
||||
(GAUDI_ENGINE_ID_DMA_0 + dma_id);
|
||||
if (s)
|
||||
seq_printf(s, fmt, dma_id,
|
||||
@ -6122,7 +6134,8 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u32 *mask,
|
||||
is_idle &= is_eng_idle;
|
||||
|
||||
if (mask)
|
||||
*mask |= !is_eng_idle << (GAUDI_ENGINE_ID_TPC_0 + i);
|
||||
*mask |= ((u64) !is_eng_idle) <<
|
||||
(GAUDI_ENGINE_ID_TPC_0 + i);
|
||||
if (s)
|
||||
seq_printf(s, fmt, i,
|
||||
is_eng_idle ? "Y" : "N",
|
||||
@ -6150,7 +6163,8 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u32 *mask,
|
||||
is_idle &= is_eng_idle;
|
||||
|
||||
if (mask)
|
||||
*mask |= !is_eng_idle << (GAUDI_ENGINE_ID_MME_0 + i);
|
||||
*mask |= ((u64) !is_eng_idle) <<
|
||||
(GAUDI_ENGINE_ID_MME_0 + i);
|
||||
if (s) {
|
||||
if (!is_slave)
|
||||
seq_printf(s, fmt, i,
|
||||
@ -6288,6 +6302,15 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
|
||||
1000,
|
||||
kernel_timeout);
|
||||
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"Timeout while waiting for TPC%d vector pipe\n",
|
||||
tpc_id);
|
||||
hdev->asic_funcs->set_clock_gating(hdev);
|
||||
mutex_unlock(&gaudi->clk_gate_mutex);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
rc = hl_poll_timeout(
|
||||
hdev,
|
||||
mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
|
||||
@ -6617,7 +6640,6 @@ static const struct hl_asic_funcs gaudi_funcs = {
|
||||
.send_cpu_message = gaudi_send_cpu_message,
|
||||
.get_hw_state = gaudi_get_hw_state,
|
||||
.pci_bars_map = gaudi_pci_bars_map,
|
||||
.set_dram_bar_base = gaudi_set_hbm_bar_base,
|
||||
.init_iatu = gaudi_init_iatu,
|
||||
.rreg = hl_rreg,
|
||||
.wreg = hl_wreg,
|
||||
|
@ -35,8 +35,6 @@
|
||||
#error "Number of MSI interrupts must be smaller or equal to GAUDI_MSI_ENTRIES"
|
||||
#endif
|
||||
|
||||
#define QMAN_FENCE_TIMEOUT_USEC 10000 /* 10 ms */
|
||||
|
||||
#define CORESIGHT_TIMEOUT_USEC 100000 /* 100 ms */
|
||||
|
||||
#define GAUDI_MAX_CLK_FREQ 2200000000ull /* 2200 MHz */
|
||||
@ -44,7 +42,7 @@
|
||||
#define MAX_POWER_DEFAULT_PCI 200000 /* 200W */
|
||||
#define MAX_POWER_DEFAULT_PMC 350000 /* 350W */
|
||||
|
||||
#define GAUDI_CPU_TIMEOUT_USEC 15000000 /* 15s */
|
||||
#define GAUDI_CPU_TIMEOUT_USEC 30000000 /* 30s */
|
||||
|
||||
#define TPC_ENABLED_MASK 0xFF
|
||||
|
||||
@ -142,28 +140,28 @@
|
||||
#define VA_HOST_SPACE_SIZE (VA_HOST_SPACE_END - \
|
||||
VA_HOST_SPACE_START) /* 767TB */
|
||||
|
||||
#define HW_CAP_PLL 0x00000001
|
||||
#define HW_CAP_HBM 0x00000002
|
||||
#define HW_CAP_MMU 0x00000004
|
||||
#define HW_CAP_MME 0x00000008
|
||||
#define HW_CAP_CPU 0x00000010
|
||||
#define HW_CAP_PCI_DMA 0x00000020
|
||||
#define HW_CAP_MSI 0x00000040
|
||||
#define HW_CAP_CPU_Q 0x00000080
|
||||
#define HW_CAP_HBM_DMA 0x00000100
|
||||
#define HW_CAP_CLK_GATE 0x00000200
|
||||
#define HW_CAP_SRAM_SCRAMBLER 0x00000400
|
||||
#define HW_CAP_HBM_SCRAMBLER 0x00000800
|
||||
#define HW_CAP_PLL BIT(0)
|
||||
#define HW_CAP_HBM BIT(1)
|
||||
#define HW_CAP_MMU BIT(2)
|
||||
#define HW_CAP_MME BIT(3)
|
||||
#define HW_CAP_CPU BIT(4)
|
||||
#define HW_CAP_PCI_DMA BIT(5)
|
||||
#define HW_CAP_MSI BIT(6)
|
||||
#define HW_CAP_CPU_Q BIT(7)
|
||||
#define HW_CAP_HBM_DMA BIT(8)
|
||||
#define HW_CAP_CLK_GATE BIT(9)
|
||||
#define HW_CAP_SRAM_SCRAMBLER BIT(10)
|
||||
#define HW_CAP_HBM_SCRAMBLER BIT(11)
|
||||
|
||||
#define HW_CAP_TPC0 0x01000000
|
||||
#define HW_CAP_TPC1 0x02000000
|
||||
#define HW_CAP_TPC2 0x04000000
|
||||
#define HW_CAP_TPC3 0x08000000
|
||||
#define HW_CAP_TPC4 0x10000000
|
||||
#define HW_CAP_TPC5 0x20000000
|
||||
#define HW_CAP_TPC6 0x40000000
|
||||
#define HW_CAP_TPC7 0x80000000
|
||||
#define HW_CAP_TPC_MASK 0xFF000000
|
||||
#define HW_CAP_TPC0 BIT(24)
|
||||
#define HW_CAP_TPC1 BIT(25)
|
||||
#define HW_CAP_TPC2 BIT(26)
|
||||
#define HW_CAP_TPC3 BIT(27)
|
||||
#define HW_CAP_TPC4 BIT(28)
|
||||
#define HW_CAP_TPC5 BIT(29)
|
||||
#define HW_CAP_TPC6 BIT(30)
|
||||
#define HW_CAP_TPC7 BIT(31)
|
||||
#define HW_CAP_TPC_MASK GENMASK(31, 24)
|
||||
#define HW_CAP_TPC_SHIFT 24
|
||||
|
||||
#define GAUDI_CPU_PCI_MSB_ADDR(addr) (((addr) & GENMASK_ULL(49, 39)) >> 39)
|
||||
@ -216,7 +214,7 @@ struct gaudi_internal_qman_info {
|
||||
|
||||
/**
|
||||
* struct gaudi_device - ASIC specific manage structure.
|
||||
* @armcp_info_get: get information on device from ArmCP
|
||||
* @cpucp_info_get: get information on device from CPU-CP
|
||||
* @hw_queues_lock: protects the H/W queues from concurrent access.
|
||||
* @clk_gate_mutex: protects code areas that require clock gating to be disabled
|
||||
* temporarily
|
||||
@ -239,7 +237,7 @@ struct gaudi_internal_qman_info {
|
||||
* 8-bit value so use u8.
|
||||
*/
|
||||
struct gaudi_device {
|
||||
int (*armcp_info_get)(struct hl_device *hdev);
|
||||
int (*cpucp_info_get)(struct hl_device *hdev);
|
||||
|
||||
/* TODO: remove hw_queues_lock after moving to scheduler code */
|
||||
spinlock_t hw_queues_lock;
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -426,12 +426,14 @@ int goya_get_fixed_properties(struct hl_device *hdev)
|
||||
prop->dmmu.start_addr = VA_DDR_SPACE_START;
|
||||
prop->dmmu.end_addr = VA_DDR_SPACE_END;
|
||||
prop->dmmu.page_size = PAGE_SIZE_2MB;
|
||||
prop->dmmu.num_hops = MMU_ARCH_5_HOPS;
|
||||
|
||||
/* shifts and masks are the same in PMMU and DMMU */
|
||||
memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
|
||||
prop->pmmu.start_addr = VA_HOST_SPACE_START;
|
||||
prop->pmmu.end_addr = VA_HOST_SPACE_END;
|
||||
prop->pmmu.page_size = PAGE_SIZE_4KB;
|
||||
prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
|
||||
|
||||
/* PMMU and HPMMU are the same except of page size */
|
||||
memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
|
||||
@ -449,7 +451,7 @@ int goya_get_fixed_properties(struct hl_device *hdev)
|
||||
prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
|
||||
prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
|
||||
|
||||
strncpy(prop->armcp_info.card_name, GOYA_DEFAULT_CARD_NAME,
|
||||
strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
|
||||
CARD_NAME_MAX_LEN);
|
||||
|
||||
prop->max_pending_cs = GOYA_MAX_PENDING_CS;
|
||||
@ -598,10 +600,15 @@ static int goya_early_init(struct hl_device *hdev)
|
||||
|
||||
prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
|
||||
|
||||
rc = hl_pci_init(hdev);
|
||||
rc = hl_pci_init(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
|
||||
mmCPU_BOOT_ERR0, GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
|
||||
if (rc)
|
||||
goto free_queue_props;
|
||||
|
||||
/* Goya Firmware does not support security */
|
||||
prop->fw_security_disabled = true;
|
||||
dev_info(hdev->dev, "firmware-level security is disabled\n");
|
||||
|
||||
if (!hdev->pldm) {
|
||||
val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
|
||||
if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK)
|
||||
@ -727,9 +734,9 @@ int goya_late_init(struct hl_device *hdev)
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
rc = goya_armcp_info_get(hdev);
|
||||
rc = goya_cpucp_info_get(hdev);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to get armcp info %d\n", rc);
|
||||
dev_err(hdev->dev, "Failed to get cpucp info %d\n", rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -739,7 +746,7 @@ int goya_late_init(struct hl_device *hdev)
|
||||
*/
|
||||
WREG32(mmMMU_LOG2_DDR_SIZE, ilog2(prop->dram_size));
|
||||
|
||||
rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS);
|
||||
rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to enable PCI access from CPU %d\n", rc);
|
||||
@ -2648,7 +2655,7 @@ int goya_suspend(struct hl_device *hdev)
|
||||
{
|
||||
int rc;
|
||||
|
||||
rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
|
||||
rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
|
||||
if (rc)
|
||||
dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
|
||||
|
||||
@ -2661,17 +2668,16 @@ int goya_resume(struct hl_device *hdev)
|
||||
}
|
||||
|
||||
static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
|
||||
u64 kaddress, phys_addr_t paddress, u32 size)
|
||||
void *cpu_addr, dma_addr_t dma_addr, size_t size)
|
||||
{
|
||||
int rc;
|
||||
|
||||
vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
|
||||
VM_DONTCOPY | VM_NORESERVE;
|
||||
|
||||
rc = remap_pfn_range(vma, vma->vm_start, paddress >> PAGE_SHIFT,
|
||||
size, vma->vm_page_prot);
|
||||
rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
|
||||
if (rc)
|
||||
dev_err(hdev->dev, "remap_pfn_range error %d", rc);
|
||||
dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
|
||||
|
||||
return rc;
|
||||
}
|
||||
@ -2946,7 +2952,8 @@ int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
|
||||
&fence_dma_addr);
|
||||
if (!fence_ptr) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to allocate memory for queue testing\n");
|
||||
"Failed to allocate memory for H/W queue %d testing\n",
|
||||
hw_queue_id);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
@ -2957,7 +2964,8 @@ int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
|
||||
GFP_KERNEL, &pkt_dma_addr);
|
||||
if (!fence_pkt) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to allocate packet for queue testing\n");
|
||||
"Failed to allocate packet for H/W queue %d testing\n",
|
||||
hw_queue_id);
|
||||
rc = -ENOMEM;
|
||||
goto free_fence_ptr;
|
||||
}
|
||||
@ -2974,7 +2982,8 @@ int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
|
||||
pkt_dma_addr);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to send fence packet\n");
|
||||
"Failed to send fence packet to H/W queue %d\n",
|
||||
hw_queue_id);
|
||||
goto free_pkt;
|
||||
}
|
||||
|
||||
@ -3806,8 +3815,9 @@ static int goya_parse_cb_mmu(struct hl_device *hdev,
|
||||
parser->patched_cb_size = parser->user_cb_size +
|
||||
sizeof(struct packet_msg_prot) * 2;
|
||||
|
||||
rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size,
|
||||
&patched_cb_handle, HL_KERNEL_ASID_ID, false);
|
||||
rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
|
||||
parser->patched_cb_size, false, false,
|
||||
&patched_cb_handle);
|
||||
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
@ -3879,8 +3889,9 @@ static int goya_parse_cb_no_mmu(struct hl_device *hdev,
|
||||
if (rc)
|
||||
goto free_userptr;
|
||||
|
||||
rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size,
|
||||
&patched_cb_handle, HL_KERNEL_ASID_ID, false);
|
||||
rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
|
||||
parser->patched_cb_size, false, false,
|
||||
&patched_cb_handle);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to allocate patched CB for DMA CS %d\n", rc);
|
||||
@ -4497,17 +4508,17 @@ static void goya_print_irq_info(struct hl_device *hdev, u16 event_type,
|
||||
static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
|
||||
size_t irq_arr_size)
|
||||
{
|
||||
struct armcp_unmask_irq_arr_packet *pkt;
|
||||
struct cpucp_unmask_irq_arr_packet *pkt;
|
||||
size_t total_pkt_size;
|
||||
long result;
|
||||
int rc;
|
||||
int irq_num_entries, irq_arr_index;
|
||||
__le32 *goya_irq_arr;
|
||||
|
||||
total_pkt_size = sizeof(struct armcp_unmask_irq_arr_packet) +
|
||||
total_pkt_size = sizeof(struct cpucp_unmask_irq_arr_packet) +
|
||||
irq_arr_size;
|
||||
|
||||
/* data should be aligned to 8 bytes in order to ArmCP to copy it */
|
||||
/* data should be aligned to 8 bytes in order to CPU-CP to copy it */
|
||||
total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
|
||||
|
||||
/* total_pkt_size is casted to u16 later on */
|
||||
@ -4531,8 +4542,8 @@ static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
|
||||
goya_irq_arr[irq_arr_index] =
|
||||
cpu_to_le32(irq_arr[irq_arr_index]);
|
||||
|
||||
pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
|
||||
ARMCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
|
||||
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
|
||||
total_pkt_size, 0, &result);
|
||||
@ -4557,14 +4568,14 @@ static int goya_soft_reset_late_init(struct hl_device *hdev)
|
||||
|
||||
static int goya_unmask_irq(struct hl_device *hdev, u16 event_type)
|
||||
{
|
||||
struct armcp_packet pkt;
|
||||
struct cpucp_packet pkt;
|
||||
long result;
|
||||
int rc;
|
||||
|
||||
memset(&pkt, 0, sizeof(pkt));
|
||||
|
||||
pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ <<
|
||||
ARMCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.value = cpu_to_le64(event_type);
|
||||
|
||||
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
|
||||
@ -4580,18 +4591,22 @@ static void goya_print_clk_change_info(struct hl_device *hdev, u16 event_type)
|
||||
{
|
||||
switch (event_type) {
|
||||
case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
|
||||
hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
|
||||
dev_info_ratelimited(hdev->dev,
|
||||
"Clock throttling due to power consumption\n");
|
||||
break;
|
||||
case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
|
||||
hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
|
||||
dev_info_ratelimited(hdev->dev,
|
||||
"Power envelop is safe, back to optimal clock\n");
|
||||
break;
|
||||
case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
|
||||
hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
|
||||
dev_info_ratelimited(hdev->dev,
|
||||
"Clock throttling due to overheating\n");
|
||||
break;
|
||||
case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
|
||||
hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
|
||||
dev_info_ratelimited(hdev->dev,
|
||||
"Thermal envelop is safe, back to optimal clock\n");
|
||||
break;
|
||||
@ -4638,7 +4653,8 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
|
||||
case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
|
||||
case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
|
||||
goya_print_irq_info(hdev, event_type, false);
|
||||
hl_device_reset(hdev, true, false);
|
||||
if (hdev->hard_reset_on_fw_events)
|
||||
hl_device_reset(hdev, true, false);
|
||||
break;
|
||||
|
||||
case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
|
||||
@ -5096,7 +5112,7 @@ int goya_send_heartbeat(struct hl_device *hdev)
|
||||
return hl_fw_send_heartbeat(hdev);
|
||||
}
|
||||
|
||||
int goya_armcp_info_get(struct hl_device *hdev)
|
||||
int goya_cpucp_info_get(struct hl_device *hdev)
|
||||
{
|
||||
struct goya_device *goya = hdev->asic_specific;
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
@ -5106,11 +5122,11 @@ int goya_armcp_info_get(struct hl_device *hdev)
|
||||
if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
|
||||
return 0;
|
||||
|
||||
rc = hl_fw_armcp_info_get(hdev);
|
||||
rc = hl_fw_cpucp_info_get(hdev);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
dram_size = le64_to_cpu(prop->armcp_info.dram_size);
|
||||
dram_size = le64_to_cpu(prop->cpucp_info.dram_size);
|
||||
if (dram_size) {
|
||||
if ((!is_power_of_2(dram_size)) ||
|
||||
(dram_size < DRAM_PHYS_DEFAULT_SIZE)) {
|
||||
@ -5124,8 +5140,8 @@ int goya_armcp_info_get(struct hl_device *hdev)
|
||||
prop->dram_end_address = prop->dram_base_address + dram_size;
|
||||
}
|
||||
|
||||
if (!strlen(prop->armcp_info.card_name))
|
||||
strncpy(prop->armcp_info.card_name, GOYA_DEFAULT_CARD_NAME,
|
||||
if (!strlen(prop->cpucp_info.card_name))
|
||||
strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
|
||||
CARD_NAME_MAX_LEN);
|
||||
|
||||
return 0;
|
||||
@ -5141,7 +5157,7 @@ static void goya_disable_clock_gating(struct hl_device *hdev)
|
||||
/* clock gating not supported in Goya */
|
||||
}
|
||||
|
||||
static bool goya_is_device_idle(struct hl_device *hdev, u32 *mask,
|
||||
static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask,
|
||||
struct seq_file *s)
|
||||
{
|
||||
const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n";
|
||||
@ -5166,7 +5182,8 @@ static bool goya_is_device_idle(struct hl_device *hdev, u32 *mask,
|
||||
is_idle &= is_eng_idle;
|
||||
|
||||
if (mask)
|
||||
*mask |= !is_eng_idle << (GOYA_ENGINE_ID_DMA_0 + i);
|
||||
*mask |= ((u64) !is_eng_idle) <<
|
||||
(GOYA_ENGINE_ID_DMA_0 + i);
|
||||
if (s)
|
||||
seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N",
|
||||
qm_glbl_sts0, dma_core_sts0);
|
||||
@ -5189,7 +5206,8 @@ static bool goya_is_device_idle(struct hl_device *hdev, u32 *mask,
|
||||
is_idle &= is_eng_idle;
|
||||
|
||||
if (mask)
|
||||
*mask |= !is_eng_idle << (GOYA_ENGINE_ID_TPC_0 + i);
|
||||
*mask |= ((u64) !is_eng_idle) <<
|
||||
(GOYA_ENGINE_ID_TPC_0 + i);
|
||||
if (s)
|
||||
seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N",
|
||||
qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts);
|
||||
@ -5209,7 +5227,7 @@ static bool goya_is_device_idle(struct hl_device *hdev, u32 *mask,
|
||||
is_idle &= is_eng_idle;
|
||||
|
||||
if (mask)
|
||||
*mask |= !is_eng_idle << GOYA_ENGINE_ID_MME_0;
|
||||
*mask |= ((u64) !is_eng_idle) << GOYA_ENGINE_ID_MME_0;
|
||||
if (s) {
|
||||
seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
|
||||
cmdq_glbl_sts0, mme_arch_sts);
|
||||
@ -5369,7 +5387,6 @@ static const struct hl_asic_funcs goya_funcs = {
|
||||
.send_cpu_message = goya_send_cpu_message,
|
||||
.get_hw_state = goya_get_hw_state,
|
||||
.pci_bars_map = goya_pci_bars_map,
|
||||
.set_dram_bar_base = goya_set_ddr_bar_base,
|
||||
.init_iatu = goya_init_iatu,
|
||||
.rreg = hl_rreg,
|
||||
.wreg = hl_wreg,
|
||||
|
@ -207,7 +207,7 @@ void goya_set_max_power(struct hl_device *hdev, u64 value);
|
||||
void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq);
|
||||
void goya_add_device_attr(struct hl_device *hdev,
|
||||
struct attribute_group *dev_attr_grp);
|
||||
int goya_armcp_info_get(struct hl_device *hdev);
|
||||
int goya_cpucp_info_get(struct hl_device *hdev);
|
||||
int goya_debug_coresight(struct hl_device *hdev, void *data);
|
||||
void goya_halt_coresight(struct hl_device *hdev);
|
||||
|
||||
|
@ -1,12 +1,12 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
* Copyright 2016-2020 HabanaLabs, Ltd.
|
||||
* Copyright 2020 HabanaLabs, Ltd.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef ARMCP_IF_H
|
||||
#define ARMCP_IF_H
|
||||
#ifndef CPUCP_IF_H
|
||||
#define CPUCP_IF_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
@ -50,16 +50,16 @@ enum pq_init_status {
|
||||
};
|
||||
|
||||
/*
|
||||
* ArmCP Primary Queue Packets
|
||||
* CpuCP Primary Queue Packets
|
||||
*
|
||||
* During normal operation, the host's kernel driver needs to send various
|
||||
* messages to ArmCP, usually either to SET some value into a H/W periphery or
|
||||
* messages to CpuCP, usually either to SET some value into a H/W periphery or
|
||||
* to GET the current value of some H/W periphery. For example, SET the
|
||||
* frequency of MME/TPC and GET the value of the thermal sensor.
|
||||
*
|
||||
* These messages can be initiated either by the User application or by the
|
||||
* host's driver itself, e.g. power management code. In either case, the
|
||||
* communication from the host's driver to ArmCP will *always* be in
|
||||
* communication from the host's driver to CpuCP will *always* be in
|
||||
* synchronous mode, meaning that the host will send a single message and poll
|
||||
* until the message was acknowledged and the results are ready (if results are
|
||||
* needed).
|
||||
@ -73,21 +73,20 @@ enum pq_init_status {
|
||||
*
|
||||
* The message, inputs/outputs (if relevant) and fence object will be located
|
||||
* on the device DDR at an address that will be determined by the host's driver.
|
||||
* During device initialization phase, the host will pass to ArmCP that address.
|
||||
* During device initialization phase, the host will pass to CpuCP that address.
|
||||
* Most of the message types will contain inputs/outputs inside the message
|
||||
* itself. The common part of each message will contain the opcode of the
|
||||
* message (its type) and a field representing a fence object.
|
||||
*
|
||||
* When the host's driver wishes to send a message to ArmCP, it will write the
|
||||
* message contents to the device DDR, clear the fence object and then write the
|
||||
* value 484 to the mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR register to issue
|
||||
* the 484 interrupt-id to the ARM core.
|
||||
* When the host's driver wishes to send a message to CPU CP, it will write the
|
||||
* message contents to the device DDR, clear the fence object and then write to
|
||||
* the PSOC_ARC1_AUX_SW_INTR, to issue interrupt 121 to ARC Management CPU.
|
||||
*
|
||||
* Upon receiving the 484 interrupt-id, ArmCP will read the message from the
|
||||
* DDR. In case the message is a SET operation, ArmCP will first perform the
|
||||
* Upon receiving the interrupt (#121), CpuCP will read the message from the
|
||||
* DDR. In case the message is a SET operation, CpuCP will first perform the
|
||||
* operation and then write to the fence object on the device DDR. In case the
|
||||
* message is a GET operation, ArmCP will first fill the results section on the
|
||||
* device DDR and then write to the fence object. If an error occurred, ArmCP
|
||||
* message is a GET operation, CpuCP will first fill the results section on the
|
||||
* device DDR and then write to the fence object. If an error occurred, CpuCP
|
||||
* will fill the rc field with the right error code.
|
||||
*
|
||||
* In the meantime, the host's driver will poll on the fence object. Once the
|
||||
@ -96,164 +95,174 @@ enum pq_init_status {
|
||||
* driver.
|
||||
*
|
||||
* To use QMAN packets, the opcode must be the QMAN opcode, shifted by 8
|
||||
* so the value being put by the host's driver matches the value read by ArmCP
|
||||
* so the value being put by the host's driver matches the value read by CpuCP
|
||||
*
|
||||
* Non-QMAN packets should be limited to values 1 through (2^8 - 1)
|
||||
*
|
||||
* Detailed description:
|
||||
*
|
||||
* ARMCP_PACKET_DISABLE_PCI_ACCESS -
|
||||
* CPUCP_PACKET_DISABLE_PCI_ACCESS -
|
||||
* After receiving this packet the embedded CPU must NOT issue PCI
|
||||
* transactions (read/write) towards the Host CPU. This also include
|
||||
* sending MSI-X interrupts.
|
||||
* This packet is usually sent before the device is moved to D3Hot state.
|
||||
*
|
||||
* ARMCP_PACKET_ENABLE_PCI_ACCESS -
|
||||
* CPUCP_PACKET_ENABLE_PCI_ACCESS -
|
||||
* After receiving this packet the embedded CPU is allowed to issue PCI
|
||||
* transactions towards the Host CPU, including sending MSI-X interrupts.
|
||||
* This packet is usually send after the device is moved to D0 state.
|
||||
*
|
||||
* ARMCP_PACKET_TEMPERATURE_GET -
|
||||
* CPUCP_PACKET_TEMPERATURE_GET -
|
||||
* Fetch the current temperature / Max / Max Hyst / Critical /
|
||||
* Critical Hyst of a specified thermal sensor. The packet's
|
||||
* arguments specify the desired sensor and the field to get.
|
||||
*
|
||||
* ARMCP_PACKET_VOLTAGE_GET -
|
||||
* CPUCP_PACKET_VOLTAGE_GET -
|
||||
* Fetch the voltage / Max / Min of a specified sensor. The packet's
|
||||
* arguments specify the sensor and type.
|
||||
*
|
||||
* ARMCP_PACKET_CURRENT_GET -
|
||||
* CPUCP_PACKET_CURRENT_GET -
|
||||
* Fetch the current / Max / Min of a specified sensor. The packet's
|
||||
* arguments specify the sensor and type.
|
||||
*
|
||||
* ARMCP_PACKET_FAN_SPEED_GET -
|
||||
* CPUCP_PACKET_FAN_SPEED_GET -
|
||||
* Fetch the speed / Max / Min of a specified fan. The packet's
|
||||
* arguments specify the sensor and type.
|
||||
*
|
||||
* ARMCP_PACKET_PWM_GET -
|
||||
* CPUCP_PACKET_PWM_GET -
|
||||
* Fetch the pwm value / mode of a specified pwm. The packet's
|
||||
* arguments specify the sensor and type.
|
||||
*
|
||||
* ARMCP_PACKET_PWM_SET -
|
||||
* CPUCP_PACKET_PWM_SET -
|
||||
* Set the pwm value / mode of a specified pwm. The packet's
|
||||
* arguments specify the sensor, type and value.
|
||||
*
|
||||
* ARMCP_PACKET_FREQUENCY_SET -
|
||||
* CPUCP_PACKET_FREQUENCY_SET -
|
||||
* Set the frequency of a specified PLL. The packet's arguments specify
|
||||
* the PLL and the desired frequency. The actual frequency in the device
|
||||
* might differ from the requested frequency.
|
||||
*
|
||||
* ARMCP_PACKET_FREQUENCY_GET -
|
||||
* CPUCP_PACKET_FREQUENCY_GET -
|
||||
* Fetch the frequency of a specified PLL. The packet's arguments specify
|
||||
* the PLL.
|
||||
*
|
||||
* ARMCP_PACKET_LED_SET -
|
||||
* CPUCP_PACKET_LED_SET -
|
||||
* Set the state of a specified led. The packet's arguments
|
||||
* specify the led and the desired state.
|
||||
*
|
||||
* ARMCP_PACKET_I2C_WR -
|
||||
* CPUCP_PACKET_I2C_WR -
|
||||
* Write 32-bit value to I2C device. The packet's arguments specify the
|
||||
* I2C bus, address and value.
|
||||
*
|
||||
* ARMCP_PACKET_I2C_RD -
|
||||
* CPUCP_PACKET_I2C_RD -
|
||||
* Read 32-bit value from I2C device. The packet's arguments specify the
|
||||
* I2C bus and address.
|
||||
*
|
||||
* ARMCP_PACKET_INFO_GET -
|
||||
* CPUCP_PACKET_INFO_GET -
|
||||
* Fetch information from the device as specified in the packet's
|
||||
* structure. The host's driver passes the max size it allows the ArmCP to
|
||||
* structure. The host's driver passes the max size it allows the CpuCP to
|
||||
* write to the structure, to prevent data corruption in case of
|
||||
* mismatched driver/FW versions.
|
||||
*
|
||||
* ARMCP_PACKET_FLASH_PROGRAM_REMOVED - this packet was removed
|
||||
* CPUCP_PACKET_FLASH_PROGRAM_REMOVED - this packet was removed
|
||||
*
|
||||
* ARMCP_PACKET_UNMASK_RAZWI_IRQ -
|
||||
* CPUCP_PACKET_UNMASK_RAZWI_IRQ -
|
||||
* Unmask the given IRQ. The IRQ number is specified in the value field.
|
||||
* The packet is sent after receiving an interrupt and printing its
|
||||
* relevant information.
|
||||
*
|
||||
* ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY -
|
||||
* CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY -
|
||||
* Unmask the given IRQs. The IRQs numbers are specified in an array right
|
||||
* after the armcp_packet structure, where its first element is the array
|
||||
* after the cpucp_packet structure, where its first element is the array
|
||||
* length. The packet is sent after a soft reset was done in order to
|
||||
* handle any interrupts that were sent during the reset process.
|
||||
*
|
||||
* ARMCP_PACKET_TEST -
|
||||
* Test packet for ArmCP connectivity. The CPU will put the fence value
|
||||
* CPUCP_PACKET_TEST -
|
||||
* Test packet for CpuCP connectivity. The CPU will put the fence value
|
||||
* in the result field.
|
||||
*
|
||||
* ARMCP_PACKET_FREQUENCY_CURR_GET -
|
||||
* CPUCP_PACKET_FREQUENCY_CURR_GET -
|
||||
* Fetch the current frequency of a specified PLL. The packet's arguments
|
||||
* specify the PLL.
|
||||
*
|
||||
* ARMCP_PACKET_MAX_POWER_GET -
|
||||
* CPUCP_PACKET_MAX_POWER_GET -
|
||||
* Fetch the maximal power of the device.
|
||||
*
|
||||
* ARMCP_PACKET_MAX_POWER_SET -
|
||||
* CPUCP_PACKET_MAX_POWER_SET -
|
||||
* Set the maximal power of the device. The packet's arguments specify
|
||||
* the power.
|
||||
*
|
||||
* ARMCP_PACKET_EEPROM_DATA_GET -
|
||||
* Get EEPROM data from the ArmCP kernel. The buffer is specified in the
|
||||
* CPUCP_PACKET_EEPROM_DATA_GET -
|
||||
* Get EEPROM data from the CpuCP kernel. The buffer is specified in the
|
||||
* addr field. The CPU will put the returned data size in the result
|
||||
* field. In addition, the host's driver passes the max size it allows the
|
||||
* ArmCP to write to the structure, to prevent data corruption in case of
|
||||
* CpuCP to write to the structure, to prevent data corruption in case of
|
||||
* mismatched driver/FW versions.
|
||||
*
|
||||
* ARMCP_PACKET_TEMPERATURE_SET -
|
||||
* CPUCP_PACKET_TEMPERATURE_SET -
|
||||
* Set the value of the offset property of a specified thermal sensor.
|
||||
* The packet's arguments specify the desired sensor and the field to
|
||||
* set.
|
||||
*
|
||||
* ARMCP_PACKET_VOLTAGE_SET -
|
||||
* CPUCP_PACKET_VOLTAGE_SET -
|
||||
* Trigger the reset_history property of a specified voltage sensor.
|
||||
* The packet's arguments specify the desired sensor and the field to
|
||||
* set.
|
||||
*
|
||||
* ARMCP_PACKET_CURRENT_SET -
|
||||
* CPUCP_PACKET_CURRENT_SET -
|
||||
* Trigger the reset_history property of a specified current sensor.
|
||||
* The packet's arguments specify the desired sensor and the field to
|
||||
* set.
|
||||
*
|
||||
* CPUCP_PACKET_PLL_REG_GET
|
||||
* Fetch register of PLL from the required PLL IP.
|
||||
* The packet's arguments specify the PLL IP and the register to get.
|
||||
* Each register is 32-bit value which is returned in result field.
|
||||
*
|
||||
*/
|
||||
|
||||
enum armcp_packet_id {
|
||||
ARMCP_PACKET_DISABLE_PCI_ACCESS = 1, /* internal */
|
||||
ARMCP_PACKET_ENABLE_PCI_ACCESS, /* internal */
|
||||
ARMCP_PACKET_TEMPERATURE_GET, /* sysfs */
|
||||
ARMCP_PACKET_VOLTAGE_GET, /* sysfs */
|
||||
ARMCP_PACKET_CURRENT_GET, /* sysfs */
|
||||
ARMCP_PACKET_FAN_SPEED_GET, /* sysfs */
|
||||
ARMCP_PACKET_PWM_GET, /* sysfs */
|
||||
ARMCP_PACKET_PWM_SET, /* sysfs */
|
||||
ARMCP_PACKET_FREQUENCY_SET, /* sysfs */
|
||||
ARMCP_PACKET_FREQUENCY_GET, /* sysfs */
|
||||
ARMCP_PACKET_LED_SET, /* debugfs */
|
||||
ARMCP_PACKET_I2C_WR, /* debugfs */
|
||||
ARMCP_PACKET_I2C_RD, /* debugfs */
|
||||
ARMCP_PACKET_INFO_GET, /* IOCTL */
|
||||
ARMCP_PACKET_FLASH_PROGRAM_REMOVED,
|
||||
ARMCP_PACKET_UNMASK_RAZWI_IRQ, /* internal */
|
||||
ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY, /* internal */
|
||||
ARMCP_PACKET_TEST, /* internal */
|
||||
ARMCP_PACKET_FREQUENCY_CURR_GET, /* sysfs */
|
||||
ARMCP_PACKET_MAX_POWER_GET, /* sysfs */
|
||||
ARMCP_PACKET_MAX_POWER_SET, /* sysfs */
|
||||
ARMCP_PACKET_EEPROM_DATA_GET, /* sysfs */
|
||||
ARMCP_RESERVED,
|
||||
ARMCP_PACKET_TEMPERATURE_SET, /* sysfs */
|
||||
ARMCP_PACKET_VOLTAGE_SET, /* sysfs */
|
||||
ARMCP_PACKET_CURRENT_SET, /* sysfs */
|
||||
enum cpucp_packet_id {
|
||||
CPUCP_PACKET_DISABLE_PCI_ACCESS = 1, /* internal */
|
||||
CPUCP_PACKET_ENABLE_PCI_ACCESS, /* internal */
|
||||
CPUCP_PACKET_TEMPERATURE_GET, /* sysfs */
|
||||
CPUCP_PACKET_VOLTAGE_GET, /* sysfs */
|
||||
CPUCP_PACKET_CURRENT_GET, /* sysfs */
|
||||
CPUCP_PACKET_FAN_SPEED_GET, /* sysfs */
|
||||
CPUCP_PACKET_PWM_GET, /* sysfs */
|
||||
CPUCP_PACKET_PWM_SET, /* sysfs */
|
||||
CPUCP_PACKET_FREQUENCY_SET, /* sysfs */
|
||||
CPUCP_PACKET_FREQUENCY_GET, /* sysfs */
|
||||
CPUCP_PACKET_LED_SET, /* debugfs */
|
||||
CPUCP_PACKET_I2C_WR, /* debugfs */
|
||||
CPUCP_PACKET_I2C_RD, /* debugfs */
|
||||
CPUCP_PACKET_INFO_GET, /* IOCTL */
|
||||
CPUCP_PACKET_FLASH_PROGRAM_REMOVED,
|
||||
CPUCP_PACKET_UNMASK_RAZWI_IRQ, /* internal */
|
||||
CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY, /* internal */
|
||||
CPUCP_PACKET_TEST, /* internal */
|
||||
CPUCP_PACKET_FREQUENCY_CURR_GET, /* sysfs */
|
||||
CPUCP_PACKET_MAX_POWER_GET, /* sysfs */
|
||||
CPUCP_PACKET_MAX_POWER_SET, /* sysfs */
|
||||
CPUCP_PACKET_EEPROM_DATA_GET, /* sysfs */
|
||||
CPUCP_RESERVED,
|
||||
CPUCP_PACKET_TEMPERATURE_SET, /* sysfs */
|
||||
CPUCP_PACKET_VOLTAGE_SET, /* sysfs */
|
||||
CPUCP_PACKET_CURRENT_SET, /* sysfs */
|
||||
CPUCP_PACKET_PCIE_THROUGHPUT_GET, /* internal */
|
||||
CPUCP_PACKET_PCIE_REPLAY_CNT_GET, /* internal */
|
||||
CPUCP_PACKET_TOTAL_ENERGY_GET, /* internal */
|
||||
CPUCP_PACKET_PLL_REG_GET, /* internal */
|
||||
};
|
||||
|
||||
#define ARMCP_PACKET_FENCE_VAL 0xFE8CE7A5
|
||||
#define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5
|
||||
|
||||
#define ARMCP_PKT_CTL_RC_SHIFT 12
|
||||
#define ARMCP_PKT_CTL_RC_MASK 0x0000F000
|
||||
#define CPUCP_PKT_CTL_RC_SHIFT 12
|
||||
#define CPUCP_PKT_CTL_RC_MASK 0x0000F000
|
||||
|
||||
#define ARMCP_PKT_CTL_OPCODE_SHIFT 16
|
||||
#define ARMCP_PKT_CTL_OPCODE_MASK 0x1FFF0000
|
||||
#define CPUCP_PKT_CTL_OPCODE_SHIFT 16
|
||||
#define CPUCP_PKT_CTL_OPCODE_MASK 0x1FFF0000
|
||||
|
||||
struct armcp_packet {
|
||||
struct cpucp_packet {
|
||||
union {
|
||||
__le64 value; /* For SET packets */
|
||||
__le64 result; /* For GET packets */
|
||||
@ -277,71 +286,97 @@ struct armcp_packet {
|
||||
__u8 pad; /* unused */
|
||||
};
|
||||
|
||||
struct {/* For PLL register fetch */
|
||||
__le16 pll_type;
|
||||
__le16 pll_reg;
|
||||
};
|
||||
|
||||
/* For any general request */
|
||||
__le32 index;
|
||||
|
||||
/* For frequency get/set */
|
||||
__le32 pll_index;
|
||||
|
||||
/* For led set */
|
||||
__le32 led_index;
|
||||
|
||||
/* For get Armcp info/EEPROM data */
|
||||
/* For get CpuCP info/EEPROM data */
|
||||
__le32 data_max_size;
|
||||
};
|
||||
|
||||
__le32 reserved;
|
||||
};
|
||||
|
||||
struct armcp_unmask_irq_arr_packet {
|
||||
struct armcp_packet armcp_pkt;
|
||||
struct cpucp_unmask_irq_arr_packet {
|
||||
struct cpucp_packet cpucp_pkt;
|
||||
__le32 length;
|
||||
__le32 irqs[0];
|
||||
};
|
||||
|
||||
enum armcp_packet_rc {
|
||||
armcp_packet_success,
|
||||
armcp_packet_invalid,
|
||||
armcp_packet_fault
|
||||
enum cpucp_packet_rc {
|
||||
cpucp_packet_success,
|
||||
cpucp_packet_invalid,
|
||||
cpucp_packet_fault
|
||||
};
|
||||
|
||||
/*
|
||||
* armcp_temp_type should adhere to hwmon_temp_attributes
|
||||
* cpucp_temp_type should adhere to hwmon_temp_attributes
|
||||
* defined in Linux kernel hwmon.h file
|
||||
*/
|
||||
enum armcp_temp_type {
|
||||
armcp_temp_input,
|
||||
armcp_temp_max = 6,
|
||||
armcp_temp_max_hyst,
|
||||
armcp_temp_crit,
|
||||
armcp_temp_crit_hyst,
|
||||
armcp_temp_offset = 19,
|
||||
armcp_temp_highest = 22,
|
||||
armcp_temp_reset_history = 23
|
||||
enum cpucp_temp_type {
|
||||
cpucp_temp_input,
|
||||
cpucp_temp_max = 6,
|
||||
cpucp_temp_max_hyst,
|
||||
cpucp_temp_crit,
|
||||
cpucp_temp_crit_hyst,
|
||||
cpucp_temp_offset = 19,
|
||||
cpucp_temp_highest = 22,
|
||||
cpucp_temp_reset_history = 23
|
||||
};
|
||||
|
||||
enum armcp_in_attributes {
|
||||
armcp_in_input,
|
||||
armcp_in_min,
|
||||
armcp_in_max,
|
||||
armcp_in_highest = 7,
|
||||
armcp_in_reset_history
|
||||
enum cpucp_in_attributes {
|
||||
cpucp_in_input,
|
||||
cpucp_in_min,
|
||||
cpucp_in_max,
|
||||
cpucp_in_highest = 7,
|
||||
cpucp_in_reset_history
|
||||
};
|
||||
|
||||
enum armcp_curr_attributes {
|
||||
armcp_curr_input,
|
||||
armcp_curr_min,
|
||||
armcp_curr_max,
|
||||
armcp_curr_highest = 7,
|
||||
armcp_curr_reset_history
|
||||
enum cpucp_curr_attributes {
|
||||
cpucp_curr_input,
|
||||
cpucp_curr_min,
|
||||
cpucp_curr_max,
|
||||
cpucp_curr_highest = 7,
|
||||
cpucp_curr_reset_history
|
||||
};
|
||||
|
||||
enum armcp_fan_attributes {
|
||||
armcp_fan_input,
|
||||
armcp_fan_min = 2,
|
||||
armcp_fan_max
|
||||
enum cpucp_fan_attributes {
|
||||
cpucp_fan_input,
|
||||
cpucp_fan_min = 2,
|
||||
cpucp_fan_max
|
||||
};
|
||||
|
||||
enum armcp_pwm_attributes {
|
||||
armcp_pwm_input,
|
||||
armcp_pwm_enable
|
||||
enum cpucp_pwm_attributes {
|
||||
cpucp_pwm_input,
|
||||
cpucp_pwm_enable
|
||||
};
|
||||
|
||||
enum cpucp_pcie_throughput_attributes {
|
||||
cpucp_pcie_throughput_tx,
|
||||
cpucp_pcie_throughput_rx
|
||||
};
|
||||
|
||||
enum cpucp_pll_reg_attributes {
|
||||
cpucp_pll_nr_reg,
|
||||
cpucp_pll_nf_reg,
|
||||
cpucp_pll_od_reg,
|
||||
cpucp_pll_div_factor_reg,
|
||||
cpucp_pll_div_sel_reg
|
||||
};
|
||||
|
||||
enum cpucp_pll_type_attributes {
|
||||
cpucp_pll_cpu,
|
||||
cpucp_pll_pci,
|
||||
};
|
||||
|
||||
/* Event Queue Packets */
|
||||
@ -351,32 +386,32 @@ struct eq_generic_event {
|
||||
};
|
||||
|
||||
/*
|
||||
* ArmCP info
|
||||
* CpuCP info
|
||||
*/
|
||||
|
||||
#define CARD_NAME_MAX_LEN 16
|
||||
#define VERSION_MAX_LEN 128
|
||||
#define ARMCP_MAX_SENSORS 128
|
||||
#define CPUCP_MAX_SENSORS 128
|
||||
|
||||
struct armcp_sensor {
|
||||
struct cpucp_sensor {
|
||||
__le32 type;
|
||||
__le32 flags;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct armcp_card_types - ASIC card type.
|
||||
* @armcp_card_type_pci: PCI card.
|
||||
* @armcp_card_type_pmc: PCI Mezzanine Card.
|
||||
* struct cpucp_card_types - ASIC card type.
|
||||
* @cpucp_card_type_pci: PCI card.
|
||||
* @cpucp_card_type_pmc: PCI Mezzanine Card.
|
||||
*/
|
||||
enum armcp_card_types {
|
||||
armcp_card_type_pci,
|
||||
armcp_card_type_pmc
|
||||
enum cpucp_card_types {
|
||||
cpucp_card_type_pci,
|
||||
cpucp_card_type_pmc
|
||||
};
|
||||
|
||||
/**
|
||||
* struct armcp_info - Info from ArmCP that is necessary to the host's driver
|
||||
* struct cpucp_info - Info from CpuCP that is necessary to the host's driver
|
||||
* @sensors: available sensors description.
|
||||
* @kernel_version: ArmCP linux kernel version.
|
||||
* @kernel_version: CpuCP linux kernel version.
|
||||
* @reserved: reserved field.
|
||||
* @card_type: card configuration type.
|
||||
* @card_location: in a server, each card has different connections topology
|
||||
@ -385,12 +420,12 @@ enum armcp_card_types {
|
||||
* @infineon_version: Infineon main DC-DC version.
|
||||
* @fuse_version: silicon production FUSE information.
|
||||
* @thermal_version: thermald S/W version.
|
||||
* @armcp_version: ArmCP S/W version.
|
||||
* @cpucp_version: CpuCP S/W version.
|
||||
* @dram_size: available DRAM size.
|
||||
* @card_name: card name that will be displayed in HWMON subsystem on the host
|
||||
*/
|
||||
struct armcp_info {
|
||||
struct armcp_sensor sensors[ARMCP_MAX_SENSORS];
|
||||
struct cpucp_info {
|
||||
struct cpucp_sensor sensors[CPUCP_MAX_SENSORS];
|
||||
__u8 kernel_version[VERSION_MAX_LEN];
|
||||
__le32 reserved;
|
||||
__le32 card_type;
|
||||
@ -399,9 +434,10 @@ struct armcp_info {
|
||||
__le32 infineon_version;
|
||||
__u8 fuse_version[VERSION_MAX_LEN];
|
||||
__u8 thermal_version[VERSION_MAX_LEN];
|
||||
__u8 armcp_version[VERSION_MAX_LEN];
|
||||
__u8 cpucp_version[VERSION_MAX_LEN];
|
||||
__le32 reserved2;
|
||||
__le64 dram_size;
|
||||
char card_name[CARD_NAME_MAX_LEN];
|
||||
};
|
||||
|
||||
#endif /* ARMCP_IF_H */
|
||||
#endif /* CPUCP_IF_H */
|
@ -40,7 +40,7 @@ struct hl_bd {
|
||||
*/
|
||||
|
||||
#define BD_CTL_COMP_OFFSET_SHIFT 16
|
||||
#define BD_CTL_COMP_OFFSET_MASK 0x00FF0000
|
||||
#define BD_CTL_COMP_OFFSET_MASK 0x0FFF0000
|
||||
|
||||
#define BD_CTL_COMP_DATA_SHIFT 0
|
||||
#define BD_CTL_COMP_DATA_MASK 0x0000FFFF
|
||||
|
@ -44,6 +44,8 @@
|
||||
|
||||
#define MME_NUMBER_OF_MASTER_ENGINES 2
|
||||
|
||||
#define MME_NUMBER_OF_SLAVE_ENGINES 2
|
||||
|
||||
#define TPC_NUMBER_OF_ENGINES 8
|
||||
|
||||
#define DMA_NUMBER_OF_CHANNELS 8
|
||||
|
@ -12,191 +12,160 @@
|
||||
|
||||
/* Useful masks for bits in various registers */
|
||||
#define PCI_DMA_QMAN_ENABLE (\
|
||||
(0xF << DMA0_QM_GLBL_CFG0_PQF_EN_SHIFT) | \
|
||||
(0xF << DMA0_QM_GLBL_CFG0_CQF_EN_SHIFT) | \
|
||||
(0xF << DMA0_QM_GLBL_CFG0_CP_EN_SHIFT))
|
||||
(FIELD_PREP(DMA0_QM_GLBL_CFG0_PQF_EN_MASK, 0xF)) | \
|
||||
(FIELD_PREP(DMA0_QM_GLBL_CFG0_CQF_EN_MASK, 0xF)) | \
|
||||
(FIELD_PREP(DMA0_QM_GLBL_CFG0_CP_EN_MASK, 0xF)))
|
||||
|
||||
#define QMAN_EXTERNAL_MAKE_TRUSTED (\
|
||||
(0xF << DMA0_QM_GLBL_PROT_PQF_SHIFT) | \
|
||||
(0xF << DMA0_QM_GLBL_PROT_CQF_SHIFT) | \
|
||||
(0xF << DMA0_QM_GLBL_PROT_CP_SHIFT) | \
|
||||
(0x1 << DMA0_QM_GLBL_PROT_ERR_SHIFT))
|
||||
(FIELD_PREP(DMA0_QM_GLBL_PROT_PQF_MASK, 0xF)) | \
|
||||
(FIELD_PREP(DMA0_QM_GLBL_PROT_CQF_MASK, 0xF)) | \
|
||||
(FIELD_PREP(DMA0_QM_GLBL_PROT_CP_MASK, 0xF)) | \
|
||||
(FIELD_PREP(DMA0_QM_GLBL_PROT_ERR_MASK, 0x1)))
|
||||
|
||||
#define QMAN_INTERNAL_MAKE_TRUSTED (\
|
||||
(0xF << DMA0_QM_GLBL_PROT_PQF_SHIFT) | \
|
||||
(0x1 << DMA0_QM_GLBL_PROT_ERR_SHIFT))
|
||||
(FIELD_PREP(DMA0_QM_GLBL_PROT_PQF_MASK, 0xF)) | \
|
||||
(FIELD_PREP(DMA0_QM_GLBL_PROT_ERR_MASK, 0x1)))
|
||||
|
||||
#define HBM_DMA_QMAN_ENABLE (\
|
||||
(0xF << DMA0_QM_GLBL_CFG0_PQF_EN_SHIFT) | \
|
||||
(0x1F << DMA0_QM_GLBL_CFG0_CQF_EN_SHIFT) | \
|
||||
(0x1F << DMA0_QM_GLBL_CFG0_CP_EN_SHIFT))
|
||||
(FIELD_PREP(DMA0_QM_GLBL_CFG0_PQF_EN_MASK, 0xF)) | \
|
||||
(FIELD_PREP(DMA0_QM_GLBL_CFG0_CQF_EN_MASK, 0x1F)) | \
|
||||
(FIELD_PREP(DMA0_QM_GLBL_CFG0_CP_EN_MASK, 0x1F)))
|
||||
|
||||
#define QMAN_MME_ENABLE (\
|
||||
(0xF << MME0_QM_GLBL_CFG0_PQF_EN_SHIFT) | \
|
||||
(0x1F << MME0_QM_GLBL_CFG0_CQF_EN_SHIFT) | \
|
||||
(0x1F << MME0_QM_GLBL_CFG0_CP_EN_SHIFT))
|
||||
(FIELD_PREP(MME0_QM_GLBL_CFG0_PQF_EN_MASK, 0xF)) | \
|
||||
(FIELD_PREP(MME0_QM_GLBL_CFG0_CQF_EN_MASK, 0x1F)) | \
|
||||
(FIELD_PREP(MME0_QM_GLBL_CFG0_CP_EN_MASK, 0x1F)))
|
||||
|
||||
#define QMAN_TPC_ENABLE (\
|
||||
(0xF << TPC0_QM_GLBL_CFG0_PQF_EN_SHIFT) | \
|
||||
(0x1F << TPC0_QM_GLBL_CFG0_CQF_EN_SHIFT) | \
|
||||
(0x1F << TPC0_QM_GLBL_CFG0_CP_EN_SHIFT))
|
||||
(FIELD_PREP(TPC0_QM_GLBL_CFG0_PQF_EN_MASK, 0xF)) | \
|
||||
(FIELD_PREP(TPC0_QM_GLBL_CFG0_CQF_EN_MASK, 0x1F)) | \
|
||||
(FIELD_PREP(TPC0_QM_GLBL_CFG0_CP_EN_MASK, 0x1F)))
|
||||
|
||||
#define QMAN_UPPER_CP_CGM_PWR_GATE_EN (\
|
||||
(0x20 << DMA0_QM_CGM_CFG_IDLE_TH_SHIFT) | \
|
||||
(0xA << DMA0_QM_CGM_CFG_G2F_TH_SHIFT) | \
|
||||
(0x10 << DMA0_QM_CGM_CFG_CP_IDLE_MASK_SHIFT) | \
|
||||
(1 << DMA0_QM_CGM_CFG_EN_SHIFT))
|
||||
(FIELD_PREP(DMA0_QM_CGM_CFG_IDLE_TH_MASK, 0x20)) | \
|
||||
(FIELD_PREP(DMA0_QM_CGM_CFG_G2F_TH_MASK, 0xA)) | \
|
||||
(FIELD_PREP(DMA0_QM_CGM_CFG_CP_IDLE_MASK_MASK, 0x10)) | \
|
||||
(FIELD_PREP(DMA0_QM_CGM_CFG_EN_MASK, 0x1)))
|
||||
|
||||
#define QMAN_COMMON_CP_CGM_PWR_GATE_EN (\
|
||||
(0x20 << DMA0_QM_CGM_CFG_IDLE_TH_SHIFT) | \
|
||||
(0xA << DMA0_QM_CGM_CFG_G2F_TH_SHIFT) | \
|
||||
(0xF << DMA0_QM_CGM_CFG_CP_IDLE_MASK_SHIFT) | \
|
||||
(1 << DMA0_QM_CGM_CFG_EN_SHIFT))
|
||||
(FIELD_PREP(DMA0_QM_CGM_CFG_IDLE_TH_MASK, 0x20)) | \
|
||||
(FIELD_PREP(DMA0_QM_CGM_CFG_G2F_TH_MASK, 0xA)) | \
|
||||
(FIELD_PREP(DMA0_QM_CGM_CFG_CP_IDLE_MASK_MASK, 0xF)) | \
|
||||
(FIELD_PREP(DMA0_QM_CGM_CFG_EN_MASK, 0x1)))
|
||||
|
||||
#define PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK (\
|
||||
(0xF << DMA0_QM_GLBL_ERR_CFG_PQF_ERR_MSG_EN_SHIFT) | \
|
||||
(0xF << DMA0_QM_GLBL_ERR_CFG_CQF_ERR_MSG_EN_SHIFT) | \
|
||||
(0xF << DMA0_QM_GLBL_ERR_CFG_CP_ERR_MSG_EN_SHIFT))
|
||||
(FIELD_PREP(DMA0_QM_GLBL_ERR_CFG_PQF_ERR_MSG_EN_MASK, 0xF)) | \
|
||||
(FIELD_PREP(DMA0_QM_GLBL_ERR_CFG_CQF_ERR_MSG_EN_MASK, 0xF)) | \
|
||||
(FIELD_PREP(DMA0_QM_GLBL_ERR_CFG_CP_ERR_MSG_EN_MASK, 0xF)))
|
||||
|
||||
#define PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK (\
|
||||
(0xF << DMA0_QM_GLBL_ERR_CFG_PQF_STOP_ON_ERR_SHIFT) | \
|
||||
(0xF << DMA0_QM_GLBL_ERR_CFG_CQF_STOP_ON_ERR_SHIFT) | \
|
||||
(0xF << DMA0_QM_GLBL_ERR_CFG_CP_STOP_ON_ERR_SHIFT))
|
||||
(FIELD_PREP(DMA0_QM_GLBL_ERR_CFG_PQF_STOP_ON_ERR_MASK, 0xF)) | \
|
||||
(FIELD_PREP(DMA0_QM_GLBL_ERR_CFG_CQF_STOP_ON_ERR_MASK, 0xF)) | \
|
||||
(FIELD_PREP(DMA0_QM_GLBL_ERR_CFG_CP_STOP_ON_ERR_MASK, 0xF)))
|
||||
|
||||
#define HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK (\
|
||||
(0xF << DMA0_QM_GLBL_ERR_CFG_PQF_ERR_MSG_EN_SHIFT) | \
|
||||
(0x1F << DMA0_QM_GLBL_ERR_CFG_CQF_ERR_MSG_EN_SHIFT) | \
|
||||
(0x1F << DMA0_QM_GLBL_ERR_CFG_CP_ERR_MSG_EN_SHIFT))
|
||||
(FIELD_PREP(DMA0_QM_GLBL_ERR_CFG_PQF_ERR_MSG_EN_MASK, 0xF)) | \
|
||||
(FIELD_PREP(DMA0_QM_GLBL_ERR_CFG_CQF_ERR_MSG_EN_MASK, 0x1F)) | \
|
||||
(FIELD_PREP(DMA0_QM_GLBL_ERR_CFG_CP_ERR_MSG_EN_MASK, 0x1F)))
|
||||
|
||||
#define HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK (\
|
||||
(0xF << DMA0_QM_GLBL_ERR_CFG_PQF_STOP_ON_ERR_SHIFT) | \
|
||||
(0x1F << DMA0_QM_GLBL_ERR_CFG_CQF_STOP_ON_ERR_SHIFT) | \
|
||||
(0x1F << DMA0_QM_GLBL_ERR_CFG_CP_STOP_ON_ERR_SHIFT))
|
||||
(FIELD_PREP(DMA0_QM_GLBL_ERR_CFG_PQF_STOP_ON_ERR_MASK, 0xF)) | \
|
||||
(FIELD_PREP(DMA0_QM_GLBL_ERR_CFG_CQF_STOP_ON_ERR_MASK, 0x1F)) | \
|
||||
(FIELD_PREP(DMA0_QM_GLBL_ERR_CFG_CP_STOP_ON_ERR_MASK, 0x1F)))
|
||||
|
||||
#define TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK (\
|
||||
(0xF << TPC0_QM_GLBL_ERR_CFG_PQF_ERR_MSG_EN_SHIFT) | \
|
||||
(0x1F << TPC0_QM_GLBL_ERR_CFG_CQF_ERR_MSG_EN_SHIFT) | \
|
||||
(0x1F << TPC0_QM_GLBL_ERR_CFG_CP_ERR_MSG_EN_SHIFT))
|
||||
(FIELD_PREP(TPC0_QM_GLBL_ERR_CFG_PQF_ERR_MSG_EN_MASK, 0xF)) | \
|
||||
(FIELD_PREP(TPC0_QM_GLBL_ERR_CFG_CQF_ERR_MSG_EN_MASK, 0x1F)) | \
|
||||
(FIELD_PREP(TPC0_QM_GLBL_ERR_CFG_CP_ERR_MSG_EN_MASK, 0x1F)))
|
||||
|
||||
#define TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK (\
|
||||
(0xF << TPC0_QM_GLBL_ERR_CFG_PQF_STOP_ON_ERR_SHIFT) | \
|
||||
(0x1F << TPC0_QM_GLBL_ERR_CFG_CQF_STOP_ON_ERR_SHIFT) | \
|
||||
(0x1F << TPC0_QM_GLBL_ERR_CFG_CP_STOP_ON_ERR_SHIFT))
|
||||
(FIELD_PREP(TPC0_QM_GLBL_ERR_CFG_PQF_STOP_ON_ERR_MASK, 0xF)) | \
|
||||
(FIELD_PREP(TPC0_QM_GLBL_ERR_CFG_CQF_STOP_ON_ERR_MASK, 0x1F)) | \
|
||||
(FIELD_PREP(TPC0_QM_GLBL_ERR_CFG_CP_STOP_ON_ERR_MASK, 0x1F)))
|
||||
|
||||
#define MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK (\
|
||||
(0xF << MME0_QM_GLBL_ERR_CFG_PQF_ERR_MSG_EN_SHIFT) | \
|
||||
(0x1F << MME0_QM_GLBL_ERR_CFG_CQF_ERR_MSG_EN_SHIFT) | \
|
||||
(0x1F << MME0_QM_GLBL_ERR_CFG_CP_ERR_MSG_EN_SHIFT))
|
||||
(FIELD_PREP(MME0_QM_GLBL_ERR_CFG_PQF_ERR_MSG_EN_MASK, 0xF)) | \
|
||||
(FIELD_PREP(MME0_QM_GLBL_ERR_CFG_CQF_ERR_MSG_EN_MASK, 0x1F)) | \
|
||||
(FIELD_PREP(MME0_QM_GLBL_ERR_CFG_CP_ERR_MSG_EN_MASK, 0x1F)))
|
||||
|
||||
#define MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK (\
|
||||
(0xF << MME0_QM_GLBL_ERR_CFG_PQF_STOP_ON_ERR_SHIFT) | \
|
||||
(0x1F << MME0_QM_GLBL_ERR_CFG_CQF_STOP_ON_ERR_SHIFT) | \
|
||||
(0x1F << MME0_QM_GLBL_ERR_CFG_CP_STOP_ON_ERR_SHIFT))
|
||||
(FIELD_PREP(MME0_QM_GLBL_ERR_CFG_PQF_STOP_ON_ERR_MASK, 0xF)) | \
|
||||
(FIELD_PREP(MME0_QM_GLBL_ERR_CFG_CQF_STOP_ON_ERR_MASK, 0x1F)) | \
|
||||
(FIELD_PREP(MME0_QM_GLBL_ERR_CFG_CP_STOP_ON_ERR_MASK, 0x1F)))
|
||||
|
||||
#define QMAN_CGM1_PWR_GATE_EN (0xA << DMA0_QM_CGM_CFG1_MASK_TH_SHIFT)
|
||||
#define QMAN_CGM1_PWR_GATE_EN (FIELD_PREP(DMA0_QM_CGM_CFG1_MASK_TH_MASK, 0xA))
|
||||
|
||||
/* RESET registers configuration */
|
||||
#define CFG_RST_L_PSOC_SHIFT 0
|
||||
#define CFG_RST_L_PCIE_SHIFT 1
|
||||
#define CFG_RST_L_PCIE_IF_SHIFT 2
|
||||
#define CFG_RST_L_HBM_S_PLL_SHIFT 3
|
||||
#define CFG_RST_L_TPC_S_PLL_SHIFT 4
|
||||
#define CFG_RST_L_MME_S_PLL_SHIFT 5
|
||||
#define CFG_RST_L_CPU_PLL_SHIFT 6
|
||||
#define CFG_RST_L_PCIE_PLL_SHIFT 7
|
||||
#define CFG_RST_L_NIC_S_PLL_SHIFT 8
|
||||
#define CFG_RST_L_HBM_N_PLL_SHIFT 9
|
||||
#define CFG_RST_L_TPC_N_PLL_SHIFT 10
|
||||
#define CFG_RST_L_MME_N_PLL_SHIFT 11
|
||||
#define CFG_RST_L_NIC_N_PLL_SHIFT 12
|
||||
#define CFG_RST_L_DMA_W_PLL_SHIFT 13
|
||||
#define CFG_RST_L_SIF_W_PLL_SHIFT 14
|
||||
#define CFG_RST_L_MESH_W_PLL_SHIFT 15
|
||||
#define CFG_RST_L_SRAM_W_PLL_SHIFT 16
|
||||
#define CFG_RST_L_DMA_E_PLL_SHIFT 17
|
||||
#define CFG_RST_L_SIF_E_PLL_SHIFT 18
|
||||
#define CFG_RST_L_MESH_E_PLL_SHIFT 19
|
||||
#define CFG_RST_L_SRAM_E_PLL_SHIFT 20
|
||||
#define CFG_RST_L_IF_1_SHIFT 21
|
||||
#define CFG_RST_L_IF_0_SHIFT 22
|
||||
#define CFG_RST_L_IF_2_SHIFT 23
|
||||
#define CFG_RST_L_IF_3_SHIFT 24
|
||||
#define CFG_RST_L_TPC_0_SHIFT 25
|
||||
#define CFG_RST_L_TPC_1_SHIFT 26
|
||||
#define CFG_RST_L_TPC_2_SHIFT 27
|
||||
#define CFG_RST_L_TPC_3_SHIFT 28
|
||||
#define CFG_RST_L_TPC_4_SHIFT 29
|
||||
#define CFG_RST_L_TPC_5_SHIFT 30
|
||||
#define CFG_RST_L_TPC_6_SHIFT 31
|
||||
#define CFG_RST_H_TPC_7_SHIFT 0
|
||||
#define CFG_RST_H_MME_0_SHIFT 1
|
||||
#define CFG_RST_H_MME_1_SHIFT 2
|
||||
#define CFG_RST_H_MME_2_SHIFT 3
|
||||
#define CFG_RST_H_MME_3_SHIFT 4
|
||||
#define CFG_RST_H_HBM_0_SHIFT 5
|
||||
#define CFG_RST_H_HBM_1_SHIFT 6
|
||||
#define CFG_RST_H_HBM_2_SHIFT 7
|
||||
#define CFG_RST_H_HBM_3_SHIFT 8
|
||||
#define CFG_RST_H_NIC_0_SHIFT 9
|
||||
#define CFG_RST_H_NIC_1_SHIFT 10
|
||||
#define CFG_RST_H_NIC_2_SHIFT 11
|
||||
#define CFG_RST_H_NIC_3_SHIFT 12
|
||||
#define CFG_RST_H_NIC_4_SHIFT 13
|
||||
#define CFG_RST_H_SM_0_SHIFT 14
|
||||
#define CFG_RST_H_SM_1_SHIFT 15
|
||||
#define CFG_RST_H_SM_2_SHIFT 16
|
||||
#define CFG_RST_H_SM_3_SHIFT 17
|
||||
#define CFG_RST_H_DMA_0_SHIFT 18
|
||||
#define CFG_RST_H_DMA_1_SHIFT 19
|
||||
#define CFG_RST_H_CPU_SHIFT 20
|
||||
#define CFG_RST_H_MMU_SHIFT 21
|
||||
#define CFG_RST_L_PSOC_MASK BIT_MASK(0)
|
||||
#define CFG_RST_L_PCIE_MASK BIT_MASK(1)
|
||||
#define CFG_RST_L_PCIE_IF_MASK BIT_MASK(2)
|
||||
#define CFG_RST_L_HBM_S_PLL_MASK BIT_MASK(3)
|
||||
#define CFG_RST_L_TPC_S_PLL_MASK BIT_MASK(4)
|
||||
#define CFG_RST_L_MME_S_PLL_MASK BIT_MASK(5)
|
||||
#define CFG_RST_L_CPU_PLL_MASK BIT_MASK(6)
|
||||
#define CFG_RST_L_PCIE_PLL_MASK BIT_MASK(7)
|
||||
#define CFG_RST_L_NIC_S_PLL_MASK BIT_MASK(8)
|
||||
#define CFG_RST_L_HBM_N_PLL_MASK BIT_MASK(9)
|
||||
#define CFG_RST_L_TPC_N_PLL_MASK BIT_MASK(10)
|
||||
#define CFG_RST_L_MME_N_PLL_MASK BIT_MASK(11)
|
||||
#define CFG_RST_L_NIC_N_PLL_MASK BIT_MASK(12)
|
||||
#define CFG_RST_L_DMA_W_PLL_MASK BIT_MASK(13)
|
||||
#define CFG_RST_L_SIF_W_PLL_MASK BIT_MASK(14)
|
||||
#define CFG_RST_L_MESH_W_PLL_MASK BIT_MASK(15)
|
||||
#define CFG_RST_L_SRAM_W_PLL_MASK BIT_MASK(16)
|
||||
#define CFG_RST_L_DMA_E_PLL_MASK BIT_MASK(17)
|
||||
#define CFG_RST_L_SIF_E_PLL_MASK BIT_MASK(18)
|
||||
#define CFG_RST_L_MESH_E_PLL_MASK BIT_MASK(19)
|
||||
#define CFG_RST_L_SRAM_E_PLL_MASK BIT_MASK(20)
|
||||
|
||||
#define CFG_RST_L_IF_1_MASK BIT_MASK(21)
|
||||
#define CFG_RST_L_IF_0_MASK BIT_MASK(22)
|
||||
#define CFG_RST_L_IF_2_MASK BIT_MASK(23)
|
||||
#define CFG_RST_L_IF_3_MASK BIT_MASK(24)
|
||||
#define CFG_RST_L_IF_MASK GENMASK(24, 21)
|
||||
|
||||
#define CFG_RST_H_DMA_MASK ((1 << CFG_RST_H_DMA_0_SHIFT) | \
|
||||
(1 << CFG_RST_H_DMA_1_SHIFT))
|
||||
#define CFG_RST_L_TPC_0_MASK BIT_MASK(25)
|
||||
#define CFG_RST_L_TPC_1_MASK BIT_MASK(26)
|
||||
#define CFG_RST_L_TPC_2_MASK BIT_MASK(27)
|
||||
#define CFG_RST_L_TPC_3_MASK BIT_MASK(28)
|
||||
#define CFG_RST_L_TPC_4_MASK BIT_MASK(29)
|
||||
#define CFG_RST_L_TPC_5_MASK BIT_MASK(30)
|
||||
#define CFG_RST_L_TPC_6_MASK BIT_MASK(31)
|
||||
#define CFG_RST_L_TPC_MASK GENMASK(31, 25)
|
||||
|
||||
#define CFG_RST_H_CPU_MASK (1 << CFG_RST_H_CPU_SHIFT)
|
||||
#define CFG_RST_H_MMU_MASK (1 << CFG_RST_H_MMU_SHIFT)
|
||||
#define CFG_RST_H_TPC_7_MASK BIT_MASK(0)
|
||||
|
||||
#define CFG_RST_H_HBM_MASK ((1 << CFG_RST_H_HBM_0_SHIFT) | \
|
||||
(1 << CFG_RST_H_HBM_1_SHIFT) | \
|
||||
(1 << CFG_RST_H_HBM_2_SHIFT) | \
|
||||
(1 << CFG_RST_H_HBM_3_SHIFT))
|
||||
#define CFG_RST_H_MME_0_MASK BIT_MASK(1)
|
||||
#define CFG_RST_H_MME_1_MASK BIT_MASK(2)
|
||||
#define CFG_RST_H_MME_2_MASK BIT_MASK(3)
|
||||
#define CFG_RST_H_MME_3_MASK BIT_MASK(4)
|
||||
#define CFG_RST_H_MME_MASK GENMASK(4, 1)
|
||||
|
||||
#define CFG_RST_H_NIC_MASK ((1 << CFG_RST_H_NIC_0_SHIFT) | \
|
||||
(1 << CFG_RST_H_NIC_1_SHIFT) | \
|
||||
(1 << CFG_RST_H_NIC_2_SHIFT) | \
|
||||
(1 << CFG_RST_H_NIC_3_SHIFT) | \
|
||||
(1 << CFG_RST_H_NIC_4_SHIFT))
|
||||
#define CFG_RST_H_HBM_0_MASK BIT_MASK(5)
|
||||
#define CFG_RST_H_HBM_1_MASK BIT_MASK(6)
|
||||
#define CFG_RST_H_HBM_2_MASK BIT_MASK(7)
|
||||
#define CFG_RST_H_HBM_3_MASK BIT_MASK(8)
|
||||
#define CFG_RST_H_HBM_MASK GENMASK(8, 5)
|
||||
|
||||
#define CFG_RST_H_SM_MASK ((1 << CFG_RST_H_SM_0_SHIFT) | \
|
||||
(1 << CFG_RST_H_SM_1_SHIFT) | \
|
||||
(1 << CFG_RST_H_SM_2_SHIFT) | \
|
||||
(1 << CFG_RST_H_SM_3_SHIFT))
|
||||
#define CFG_RST_H_NIC_0_MASK BIT_MASK(9)
|
||||
#define CFG_RST_H_NIC_1_MASK BIT_MASK(10)
|
||||
#define CFG_RST_H_NIC_2_MASK BIT_MASK(11)
|
||||
#define CFG_RST_H_NIC_3_MASK BIT_MASK(12)
|
||||
#define CFG_RST_H_NIC_4_MASK BIT_MASK(13)
|
||||
#define CFG_RST_H_NIC_MASK GENMASK(13, 9)
|
||||
|
||||
#define CFG_RST_H_MME_MASK ((1 << CFG_RST_H_MME_0_SHIFT) | \
|
||||
(1 << CFG_RST_H_MME_1_SHIFT) | \
|
||||
(1 << CFG_RST_H_MME_2_SHIFT) | \
|
||||
(1 << CFG_RST_H_MME_3_SHIFT))
|
||||
#define CFG_RST_H_SM_0_MASK BIT_MASK(14)
|
||||
#define CFG_RST_H_SM_1_MASK BIT_MASK(15)
|
||||
#define CFG_RST_H_SM_2_MASK BIT_MASK(16)
|
||||
#define CFG_RST_H_SM_3_MASK BIT_MASK(17)
|
||||
#define CFG_RST_H_SM_MASK GENMASK(17, 14)
|
||||
|
||||
#define CFG_RST_L_PSOC_MASK (1 << CFG_RST_L_PSOC_SHIFT)
|
||||
#define CFG_RST_H_DMA_0_MASK BIT_MASK(18)
|
||||
#define CFG_RST_H_DMA_1_MASK BIT_MASK(19)
|
||||
#define CFG_RST_H_DMA_MASK GENMASK(19, 18)
|
||||
|
||||
#define CFG_RST_L_IF_MASK ((1 << CFG_RST_L_IF_0_SHIFT) | \
|
||||
(1 << CFG_RST_L_IF_1_SHIFT) | \
|
||||
(1 << CFG_RST_L_IF_2_SHIFT) | \
|
||||
(1 << CFG_RST_L_IF_3_SHIFT))
|
||||
|
||||
#define CFG_RST_L_TPC_MASK ((1 << CFG_RST_L_TPC_0_SHIFT) | \
|
||||
(1 << CFG_RST_L_TPC_1_SHIFT) | \
|
||||
(1 << CFG_RST_L_TPC_2_SHIFT) | \
|
||||
(1 << CFG_RST_L_TPC_3_SHIFT) | \
|
||||
(1 << CFG_RST_L_TPC_4_SHIFT) | \
|
||||
(1 << CFG_RST_L_TPC_5_SHIFT) | \
|
||||
(1 << CFG_RST_L_TPC_6_SHIFT))
|
||||
|
||||
#define CFG_RST_H_TPC_MASK (1 << CFG_RST_H_TPC_7_SHIFT)
|
||||
|
||||
#define CA53_RESET (1 << CFG_RST_H_CPU_SHIFT)
|
||||
#define CFG_RST_H_CPU_MASK BIT_MASK(20)
|
||||
#define CFG_RST_H_MMU_MASK BIT_MASK(21)
|
||||
|
||||
#define UNIT_RST_L_PSOC_SHIFT 0
|
||||
#define UNIT_RST_L_PCIE_SHIFT 1
|
||||
|
@ -12,6 +12,7 @@
|
||||
* PSOC scratch-pad registers
|
||||
*/
|
||||
#define mmHW_STATE mmPSOC_GLOBAL_CONF_SCRATCHPAD_0
|
||||
#define mmFUSE_VER_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_22
|
||||
#define mmCPU_CMD_STATUS_TO_HOST mmPSOC_GLOBAL_CONF_SCRATCHPAD_23
|
||||
#define mmCPU_BOOT_ERR0 mmPSOC_GLOBAL_CONF_SCRATCHPAD_24
|
||||
#define mmCPU_BOOT_ERR1 mmPSOC_GLOBAL_CONF_SCRATCHPAD_25
|
||||
|
@ -22,6 +22,7 @@
|
||||
#define mmCPU_CQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_8
|
||||
#define mmCPU_CQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_9
|
||||
#define mmCPU_CQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_10
|
||||
#define mmFUSE_VER_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_22
|
||||
#define mmCPU_CMD_STATUS_TO_HOST mmPSOC_GLOBAL_CONF_SCRATCHPAD_23
|
||||
#define mmCPU_BOOT_ERR0 mmPSOC_GLOBAL_CONF_SCRATCHPAD_24
|
||||
#define mmCPU_BOOT_ERR1 mmPSOC_GLOBAL_CONF_SCRATCHPAD_25
|
||||
|
@ -29,6 +29,8 @@
|
||||
#define HOP3_SHIFT 21
|
||||
#define HOP4_SHIFT 12
|
||||
|
||||
#define MMU_ARCH_5_HOPS 5
|
||||
|
||||
#define HOP_PHYS_ADDR_MASK (~FLAGS_MASK)
|
||||
|
||||
#define HL_PTE_SIZE sizeof(u64)
|
||||
|
@ -264,6 +264,10 @@ enum hl_device_status {
|
||||
* HL_INFO_TIME_SYNC - Retrieve the device's time alongside the host's time
|
||||
* for synchronization.
|
||||
* HL_INFO_CS_COUNTERS - Retrieve command submission counters
|
||||
* HL_INFO_PCI_COUNTERS - Retrieve PCI counters
|
||||
* HL_INFO_CLK_THROTTLE_REASON - Retrieve clock throttling reason
|
||||
* HL_INFO_SYNC_MANAGER - Retrieve sync manager info per dcore
|
||||
* HL_INFO_TOTAL_ENERGY - Retrieve total energy consumption
|
||||
*/
|
||||
#define HL_INFO_HW_IP_INFO 0
|
||||
#define HL_INFO_HW_EVENTS 1
|
||||
@ -276,6 +280,10 @@ enum hl_device_status {
|
||||
#define HL_INFO_RESET_COUNT 9
|
||||
#define HL_INFO_TIME_SYNC 10
|
||||
#define HL_INFO_CS_COUNTERS 11
|
||||
#define HL_INFO_PCI_COUNTERS 12
|
||||
#define HL_INFO_CLK_THROTTLE_REASON 13
|
||||
#define HL_INFO_SYNC_MANAGER 14
|
||||
#define HL_INFO_TOTAL_ENERGY 15
|
||||
|
||||
#define HL_INFO_VERSION_MAX_LEN 128
|
||||
#define HL_INFO_CARD_NAME_MAX_LEN 16
|
||||
@ -289,7 +297,7 @@ struct hl_info_hw_ip_info {
|
||||
__u32 device_id; /* PCI Device ID */
|
||||
__u32 module_id; /* For mezzanine cards in servers (From OCP spec.) */
|
||||
__u32 reserved[2];
|
||||
__u32 armcp_cpld_version;
|
||||
__u32 cpld_version;
|
||||
__u32 psoc_pci_pll_nr;
|
||||
__u32 psoc_pci_pll_nf;
|
||||
__u32 psoc_pci_pll_od;
|
||||
@ -297,7 +305,7 @@ struct hl_info_hw_ip_info {
|
||||
__u8 tpc_enabled_mask;
|
||||
__u8 dram_enabled;
|
||||
__u8 pad[2];
|
||||
__u8 armcp_version[HL_INFO_VERSION_MAX_LEN];
|
||||
__u8 cpucp_version[HL_INFO_VERSION_MAX_LEN];
|
||||
__u8 card_name[HL_INFO_CARD_NAME_MAX_LEN];
|
||||
};
|
||||
|
||||
@ -313,6 +321,12 @@ struct hl_info_hw_idle {
|
||||
* Bits definition is according to `enum <chip>_enging_id'.
|
||||
*/
|
||||
__u32 busy_engines_mask;
|
||||
|
||||
/*
|
||||
* Extended Bitmask of busy engines.
|
||||
* Bits definition is according to `enum <chip>_enging_id'.
|
||||
*/
|
||||
__u64 busy_engines_mask_ext;
|
||||
};
|
||||
|
||||
struct hl_info_device_status {
|
||||
@ -340,18 +354,61 @@ struct hl_info_time_sync {
|
||||
__u64 host_time;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_info_pci_counters - pci counters
|
||||
* @rx_throughput: PCI rx throughput KBps
|
||||
* @tx_throughput: PCI tx throughput KBps
|
||||
* @replay_cnt: PCI replay counter
|
||||
*/
|
||||
struct hl_info_pci_counters {
|
||||
__u64 rx_throughput;
|
||||
__u64 tx_throughput;
|
||||
__u64 replay_cnt;
|
||||
};
|
||||
|
||||
#define HL_CLK_THROTTLE_POWER 0x1
|
||||
#define HL_CLK_THROTTLE_THERMAL 0x2
|
||||
|
||||
/**
|
||||
* struct hl_info_clk_throttle - clock throttling reason
|
||||
* @clk_throttling_reason: each bit represents a clk throttling reason
|
||||
*/
|
||||
struct hl_info_clk_throttle {
|
||||
__u32 clk_throttling_reason;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_info_energy - device energy information
|
||||
* @total_energy_consumption: total device energy consumption
|
||||
*/
|
||||
struct hl_info_energy {
|
||||
__u64 total_energy_consumption;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_info_sync_manager - sync manager information
|
||||
* @first_available_sync_object: first available sob
|
||||
* @first_available_monitor: first available monitor
|
||||
*/
|
||||
struct hl_info_sync_manager {
|
||||
__u32 first_available_sync_object;
|
||||
__u32 first_available_monitor;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_info_cs_counters - command submission counters
|
||||
* @out_of_mem_drop_cnt: dropped due to memory allocation issue
|
||||
* @parsing_drop_cnt: dropped due to error in packet parsing
|
||||
* @queue_full_drop_cnt: dropped due to queue full
|
||||
* @device_in_reset_drop_cnt: dropped due to device in reset
|
||||
* @max_cs_in_flight_drop_cnt: dropped due to maximum CS in-flight
|
||||
*/
|
||||
struct hl_cs_counters {
|
||||
__u64 out_of_mem_drop_cnt;
|
||||
__u64 parsing_drop_cnt;
|
||||
__u64 queue_full_drop_cnt;
|
||||
__u64 device_in_reset_drop_cnt;
|
||||
__u64 max_cs_in_flight_drop_cnt;
|
||||
};
|
||||
|
||||
struct hl_info_cs_counters {
|
||||
@ -359,6 +416,13 @@ struct hl_info_cs_counters {
|
||||
struct hl_cs_counters ctx_cs_counters;
|
||||
};
|
||||
|
||||
enum gaudi_dcores {
|
||||
HL_GAUDI_WS_DCORE,
|
||||
HL_GAUDI_WN_DCORE,
|
||||
HL_GAUDI_EN_DCORE,
|
||||
HL_GAUDI_ES_DCORE
|
||||
};
|
||||
|
||||
struct hl_info_args {
|
||||
/* Location of relevant struct in userspace */
|
||||
__u64 return_pointer;
|
||||
@ -375,6 +439,10 @@ struct hl_info_args {
|
||||
__u32 op;
|
||||
|
||||
union {
|
||||
/* Dcore id for which the information is relevant.
|
||||
* For Gaudi refer to 'enum gaudi_dcores'
|
||||
*/
|
||||
__u32 dcore_id;
|
||||
/* Context ID - Currently not in use */
|
||||
__u32 ctx_id;
|
||||
/* Period value for utilization rate (100ms - 1000ms, in 100ms
|
||||
@ -394,6 +462,9 @@ struct hl_info_args {
|
||||
/* 2MB minus 32 bytes for 2xMSG_PROT */
|
||||
#define HL_MAX_CB_SIZE (0x200000 - 32)
|
||||
|
||||
/* Indicates whether the command buffer should be mapped to the device's MMU */
|
||||
#define HL_CB_FLAGS_MAP 0x1
|
||||
|
||||
struct hl_cb_in {
|
||||
/* Handle of CB or 0 if we want to create one */
|
||||
__u64 cb_handle;
|
||||
@ -405,7 +476,8 @@ struct hl_cb_in {
|
||||
__u32 cb_size;
|
||||
/* Context ID - Currently not in use */
|
||||
__u32 ctx_id;
|
||||
__u32 pad;
|
||||
/* HL_CB_FLAGS_* */
|
||||
__u32 flags;
|
||||
};
|
||||
|
||||
struct hl_cb_out {
|
||||
@ -788,6 +860,12 @@ struct hl_debug_args {
|
||||
* When creating a new CB, the IOCTL returns a handle of it, and the user-space
|
||||
* process needs to use that handle to mmap the buffer so it can access them.
|
||||
*
|
||||
* In some instances, the device must access the command buffer through the
|
||||
* device's MMU, and thus its memory should be mapped. In these cases, user can
|
||||
* indicate the driver that such a mapping is required.
|
||||
* The resulting device virtual address will be used internally by the driver,
|
||||
* and won't be returned to user.
|
||||
*
|
||||
*/
|
||||
#define HL_IOCTL_CB \
|
||||
_IOWR('H', 0x02, union hl_cb_args)
|
||||
@ -846,6 +924,9 @@ struct hl_debug_args {
|
||||
* inside the kernel until the CS has finished or until the user-requested
|
||||
* timeout has expired.
|
||||
*
|
||||
* If the timeout value is 0, the driver won't sleep at all. It will check
|
||||
* the status of the CS and return immediately
|
||||
*
|
||||
* The return value of the IOCTL is a standard Linux error code. The possible
|
||||
* values are:
|
||||
*
|
||||
|
Loading…
Reference in New Issue
Block a user