mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-11-24 16:30:52 +07:00
habanalabs: add h/w queues module
This patch adds the H/W queues module and the code to initialize Goya's various compute and DMA engines and their queues. Goya has 5 DMA channels, 8 TPC engines and a single MME engine. For each channel/engine, there is a H/W queue logic which is used to pass commands from the user to the H/W. That logic is called QMAN. There are two types of QMANs: external and internal. The DMA QMANs are considered external while the TPC and MME QMANs are considered internal. For each external queue there is a completion queue, which is located on the Host memory. The differences between external and internal QMANs are: 1. The location of the queue's memory. External QMANs are located on the Host memory while internal QMANs are located on the on-chip memory. 2. The external QMAN write an entry to a completion queue and sends an MSI-X interrupt upon completion of a command buffer that was given to it. The internal QMAN doesn't do that. Reviewed-by: Mike Rapoport <rppt@linux.ibm.com> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
parent
839c48030d
commit
9494a8dd8d
@ -5,7 +5,7 @@
|
||||
obj-m := habanalabs.o
|
||||
|
||||
habanalabs-y := habanalabs_drv.o device.o context.o asid.o habanalabs_ioctl.o \
|
||||
command_buffer.o
|
||||
command_buffer.o hw_queue.o irq.o
|
||||
|
||||
include $(src)/goya/Makefile
|
||||
habanalabs-y += $(HL_GOYA_FILES)
|
||||
|
@ -174,13 +174,23 @@ static int device_early_init(struct hl_device *hdev)
|
||||
if (rc)
|
||||
goto early_fini;
|
||||
|
||||
hdev->cq_wq = alloc_workqueue("hl-free-jobs", WQ_UNBOUND, 0);
|
||||
if (hdev->cq_wq == NULL) {
|
||||
dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
|
||||
rc = -ENOMEM;
|
||||
goto asid_fini;
|
||||
}
|
||||
|
||||
hl_cb_mgr_init(&hdev->kernel_cb_mgr);
|
||||
|
||||
mutex_init(&hdev->fd_open_cnt_lock);
|
||||
mutex_init(&hdev->send_cpu_message_lock);
|
||||
atomic_set(&hdev->fd_open_cnt, 0);
|
||||
|
||||
return 0;
|
||||
|
||||
asid_fini:
|
||||
hl_asid_fini(hdev);
|
||||
early_fini:
|
||||
if (hdev->asic_funcs->early_fini)
|
||||
hdev->asic_funcs->early_fini(hdev);
|
||||
@ -196,9 +206,12 @@ static int device_early_init(struct hl_device *hdev)
|
||||
*/
|
||||
static void device_early_fini(struct hl_device *hdev)
|
||||
{
|
||||
mutex_destroy(&hdev->send_cpu_message_lock);
|
||||
|
||||
hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
|
||||
|
||||
destroy_workqueue(hdev->cq_wq);
|
||||
|
||||
hl_asid_fini(hdev);
|
||||
|
||||
if (hdev->asic_funcs->early_fini)
|
||||
@ -277,7 +290,7 @@ int hl_device_resume(struct hl_device *hdev)
|
||||
*/
|
||||
int hl_device_init(struct hl_device *hdev, struct class *hclass)
|
||||
{
|
||||
int rc;
|
||||
int i, rc, cq_ready_cnt;
|
||||
|
||||
/* Create device */
|
||||
rc = device_setup_cdev(hdev, hclass, hdev->id, &hl_ops);
|
||||
@ -298,11 +311,48 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
|
||||
if (rc)
|
||||
goto early_fini;
|
||||
|
||||
/*
|
||||
* Initialize the H/W queues. Must be done before hw_init, because
|
||||
* there the addresses of the kernel queue are being written to the
|
||||
* registers of the device
|
||||
*/
|
||||
rc = hl_hw_queues_create(hdev);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "failed to initialize kernel queues\n");
|
||||
goto sw_fini;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize the completion queues. Must be done before hw_init,
|
||||
* because there the addresses of the completion queues are being
|
||||
* passed as arguments to request_irq
|
||||
*/
|
||||
hdev->completion_queue =
|
||||
kcalloc(hdev->asic_prop.completion_queues_count,
|
||||
sizeof(*hdev->completion_queue), GFP_KERNEL);
|
||||
|
||||
if (!hdev->completion_queue) {
|
||||
dev_err(hdev->dev, "failed to allocate completion queues\n");
|
||||
rc = -ENOMEM;
|
||||
goto hw_queues_destroy;
|
||||
}
|
||||
|
||||
for (i = 0, cq_ready_cnt = 0;
|
||||
i < hdev->asic_prop.completion_queues_count;
|
||||
i++, cq_ready_cnt++) {
|
||||
rc = hl_cq_init(hdev, &hdev->completion_queue[i], i);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"failed to initialize completion queue\n");
|
||||
goto cq_fini;
|
||||
}
|
||||
}
|
||||
|
||||
/* Allocate the kernel context */
|
||||
hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL);
|
||||
if (!hdev->kernel_ctx) {
|
||||
rc = -ENOMEM;
|
||||
goto sw_fini;
|
||||
goto cq_fini;
|
||||
}
|
||||
|
||||
hdev->user_ctx = NULL;
|
||||
@ -328,6 +378,14 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
|
||||
|
||||
hdev->disabled = false;
|
||||
|
||||
/* Check that the communication with the device is working */
|
||||
rc = hdev->asic_funcs->test_queues(hdev);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to detect if device is alive\n");
|
||||
rc = 0;
|
||||
goto out_disabled;
|
||||
}
|
||||
|
||||
dev_notice(hdev->dev,
|
||||
"Successfully added device to habanalabs driver\n");
|
||||
|
||||
@ -339,6 +397,12 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
|
||||
"kernel ctx is still alive on initialization failure\n");
|
||||
free_ctx:
|
||||
kfree(hdev->kernel_ctx);
|
||||
cq_fini:
|
||||
for (i = 0 ; i < cq_ready_cnt ; i++)
|
||||
hl_cq_fini(hdev, &hdev->completion_queue[i]);
|
||||
kfree(hdev->completion_queue);
|
||||
hw_queues_destroy:
|
||||
hl_hw_queues_destroy(hdev);
|
||||
sw_fini:
|
||||
hdev->asic_funcs->sw_fini(hdev);
|
||||
early_fini:
|
||||
@ -368,6 +432,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
|
||||
*/
|
||||
void hl_device_fini(struct hl_device *hdev)
|
||||
{
|
||||
int i;
|
||||
dev_info(hdev->dev, "Removing device\n");
|
||||
|
||||
/* Mark device as disabled */
|
||||
@ -382,6 +447,12 @@ void hl_device_fini(struct hl_device *hdev)
|
||||
/* Reset the H/W. It will be in idle state after this returns */
|
||||
hdev->asic_funcs->hw_fini(hdev, true);
|
||||
|
||||
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
|
||||
hl_cq_fini(hdev, &hdev->completion_queue[i]);
|
||||
kfree(hdev->completion_queue);
|
||||
|
||||
hl_hw_queues_destroy(hdev);
|
||||
|
||||
/* Call ASIC S/W finalize function */
|
||||
hdev->asic_funcs->sw_fini(hdev);
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -11,7 +11,9 @@
|
||||
#include <uapi/misc/habanalabs.h>
|
||||
#include "habanalabs.h"
|
||||
#include "include/hl_boot_if.h"
|
||||
#include "include/goya/goya_packets.h"
|
||||
#include "include/goya/goya.h"
|
||||
#include "include/goya/goya_async_events.h"
|
||||
#include "include/goya/goya_fw_if.h"
|
||||
|
||||
#define NUMBER_OF_CMPLT_QUEUES 5
|
||||
@ -145,12 +147,17 @@ enum goya_fw_component {
|
||||
};
|
||||
|
||||
struct goya_device {
|
||||
int (*test_cpu_queue)(struct hl_device *hdev);
|
||||
|
||||
/* TODO: remove hw_queues_lock after moving to scheduler code */
|
||||
spinlock_t hw_queues_lock;
|
||||
u64 ddr_bar_cur_addr;
|
||||
u32 hw_cap_initialized;
|
||||
};
|
||||
|
||||
int goya_test_cpu_queue(struct hl_device *hdev);
|
||||
int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
|
||||
u32 timeout, long *result);
|
||||
void goya_init_security(struct hl_device *hdev);
|
||||
|
||||
#endif /* GOYAP_H_ */
|
||||
|
@ -9,6 +9,7 @@
|
||||
#define HABANALABSP_H_
|
||||
|
||||
#include "include/armcp_if.h"
|
||||
#include "include/qman_if.h"
|
||||
|
||||
#define pr_fmt(fmt) "habanalabs: " fmt
|
||||
|
||||
@ -26,9 +27,36 @@
|
||||
struct hl_device;
|
||||
struct hl_fpriv;
|
||||
|
||||
/**
|
||||
* enum hl_queue_type - Supported QUEUE types.
|
||||
* @QUEUE_TYPE_NA: queue is not available.
|
||||
* @QUEUE_TYPE_EXT: external queue which is a DMA channel that may access the
|
||||
* host.
|
||||
* @QUEUE_TYPE_INT: internal queue that performs DMA inside the device's
|
||||
* memories and/or operates the compute engines.
|
||||
* @QUEUE_TYPE_CPU: S/W queue for communication with the device's CPU.
|
||||
*/
|
||||
enum hl_queue_type {
|
||||
QUEUE_TYPE_NA,
|
||||
QUEUE_TYPE_EXT,
|
||||
QUEUE_TYPE_INT,
|
||||
QUEUE_TYPE_CPU
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hw_queue_properties - queue information.
|
||||
* @type: queue type.
|
||||
* @kmd_only: true if only KMD is allowed to send a job to this queue, false
|
||||
* otherwise.
|
||||
*/
|
||||
struct hw_queue_properties {
|
||||
enum hl_queue_type type;
|
||||
u8 kmd_only;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct asic_fixed_properties - ASIC specific immutable properties.
|
||||
* @hw_queues_props: H/W queues properties.
|
||||
* @uboot_ver: F/W U-boot version.
|
||||
* @preboot_ver: F/W Preboot version.
|
||||
* @sram_base_address: SRAM physical start address.
|
||||
@ -59,6 +87,7 @@ struct hl_fpriv;
|
||||
* @tpc_enabled_mask: which TPCs are enabled.
|
||||
*/
|
||||
struct asic_fixed_properties {
|
||||
struct hw_queue_properties hw_queues_props[HL_MAX_QUEUES];
|
||||
char uboot_ver[VERSION_MAX_LEN];
|
||||
char preboot_ver[VERSION_MAX_LEN];
|
||||
u64 sram_base_address;
|
||||
@ -132,7 +161,89 @@ struct hl_cb {
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* QUEUES
|
||||
*/
|
||||
|
||||
struct hl_cs_job;
|
||||
|
||||
/*
|
||||
* Currently, there are two limitations on the maximum length of a queue:
|
||||
*
|
||||
* 1. The memory footprint of the queue. The current allocated space for the
|
||||
* queue is PAGE_SIZE. Because each entry in the queue is HL_BD_SIZE,
|
||||
* the maximum length of the queue can be PAGE_SIZE / HL_BD_SIZE,
|
||||
* which currently is 4096/16 = 256 entries.
|
||||
*
|
||||
* To increase that, we need either to decrease the size of the
|
||||
* BD (difficult), or allocate more than a single page (easier).
|
||||
*
|
||||
* 2. Because the size of the JOB handle field in the BD CTL / completion queue
|
||||
* is 10-bit, we can have up to 1024 open jobs per hardware queue.
|
||||
* Therefore, each queue can hold up to 1024 entries.
|
||||
*
|
||||
* HL_QUEUE_LENGTH is in units of struct hl_bd.
|
||||
* HL_QUEUE_LENGTH * sizeof(struct hl_bd) should be <= HL_PAGE_SIZE
|
||||
*/
|
||||
|
||||
#define HL_PAGE_SIZE 4096 /* minimum page size */
|
||||
/* Must be power of 2 (HL_PAGE_SIZE / HL_BD_SIZE) */
|
||||
#define HL_QUEUE_LENGTH 256
|
||||
#define HL_QUEUE_SIZE_IN_BYTES (HL_QUEUE_LENGTH * HL_BD_SIZE)
|
||||
|
||||
/*
|
||||
* HL_CQ_LENGTH is in units of struct hl_cq_entry.
|
||||
* HL_CQ_LENGTH should be <= HL_PAGE_SIZE
|
||||
*/
|
||||
#define HL_CQ_LENGTH HL_QUEUE_LENGTH
|
||||
#define HL_CQ_SIZE_IN_BYTES (HL_CQ_LENGTH * HL_CQ_ENTRY_SIZE)
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* struct hl_hw_queue - describes a H/W transport queue.
|
||||
* @shadow_queue: pointer to a shadow queue that holds pointers to jobs.
|
||||
* @queue_type: type of queue.
|
||||
* @kernel_address: holds the queue's kernel virtual address.
|
||||
* @bus_address: holds the queue's DMA address.
|
||||
* @pi: holds the queue's pi value.
|
||||
* @ci: holds the queue's ci value, AS CALCULATED BY THE DRIVER (not real ci).
|
||||
* @hw_queue_id: the id of the H/W queue.
|
||||
* @int_queue_len: length of internal queue (number of entries).
|
||||
* @valid: is the queue valid (we have array of 32 queues, not all of them
|
||||
* exists).
|
||||
*/
|
||||
struct hl_hw_queue {
|
||||
struct hl_cs_job **shadow_queue;
|
||||
enum hl_queue_type queue_type;
|
||||
u64 kernel_address;
|
||||
dma_addr_t bus_address;
|
||||
u32 pi;
|
||||
u32 ci;
|
||||
u32 hw_queue_id;
|
||||
u16 int_queue_len;
|
||||
u8 valid;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_cq - describes a completion queue
|
||||
* @hdev: pointer to the device structure
|
||||
* @kernel_address: holds the queue's kernel virtual address
|
||||
* @bus_address: holds the queue's DMA address
|
||||
* @hw_queue_id: the id of the matching H/W queue
|
||||
* @ci: ci inside the queue
|
||||
* @pi: pi inside the queue
|
||||
* @free_slots_cnt: counter of free slots in queue
|
||||
*/
|
||||
struct hl_cq {
|
||||
struct hl_device *hdev;
|
||||
u64 kernel_address;
|
||||
dma_addr_t bus_address;
|
||||
u32 hw_queue_id;
|
||||
u32 ci;
|
||||
u32 pi;
|
||||
atomic_t free_slots_cnt;
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
@ -164,6 +275,8 @@ enum hl_asic_type {
|
||||
* @resume: handles IP specific H/W or SW changes for resume.
|
||||
* @mmap: mmap function, does nothing.
|
||||
* @cb_mmap: maps a CB.
|
||||
* @ring_doorbell: increment PI on a given QMAN.
|
||||
* @flush_pq_write: flush PQ entry write if necessary, WARN if flushing failed.
|
||||
* @dma_alloc_coherent: Allocate coherent DMA memory by calling
|
||||
* dma_alloc_coherent(). This is ASIC function because its
|
||||
* implementation is not trivial when the driver is loaded
|
||||
@ -172,6 +285,16 @@ enum hl_asic_type {
|
||||
* This is ASIC function because its implementation is not
|
||||
* trivial when the driver is loaded in simulation mode
|
||||
* (not upstreamed).
|
||||
* @get_int_queue_base: get the internal queue base address.
|
||||
* @test_queues: run simple test on all queues for sanity check.
|
||||
* @dma_pool_zalloc: small DMA allocation of coherent memory from DMA pool.
|
||||
* size of allocation is HL_DMA_POOL_BLK_SIZE.
|
||||
* @dma_pool_free: free small DMA allocation from pool.
|
||||
* @cpu_accessible_dma_pool_alloc: allocate CPU PQ packet from DMA pool.
|
||||
* @cpu_accessible_dma_pool_free: free CPU PQ packet from DMA pool.
|
||||
* @hw_queues_lock: acquire H/W queues lock.
|
||||
* @hw_queues_unlock: release H/W queues lock.
|
||||
* @send_cpu_message: send buffer to ArmCP.
|
||||
*/
|
||||
struct hl_asic_funcs {
|
||||
int (*early_init)(struct hl_device *hdev);
|
||||
@ -185,10 +308,27 @@ struct hl_asic_funcs {
|
||||
int (*mmap)(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
|
||||
int (*cb_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
|
||||
u64 kaddress, phys_addr_t paddress, u32 size);
|
||||
void (*ring_doorbell)(struct hl_device *hdev, u32 hw_queue_id, u32 pi);
|
||||
void (*flush_pq_write)(struct hl_device *hdev, u64 *pq, u64 exp_val);
|
||||
void* (*dma_alloc_coherent)(struct hl_device *hdev, size_t size,
|
||||
dma_addr_t *dma_handle, gfp_t flag);
|
||||
void (*dma_free_coherent)(struct hl_device *hdev, size_t size,
|
||||
void *cpu_addr, dma_addr_t dma_handle);
|
||||
void* (*get_int_queue_base)(struct hl_device *hdev, u32 queue_id,
|
||||
dma_addr_t *dma_handle, u16 *queue_len);
|
||||
int (*test_queues)(struct hl_device *hdev);
|
||||
void* (*dma_pool_zalloc)(struct hl_device *hdev, size_t size,
|
||||
gfp_t mem_flags, dma_addr_t *dma_handle);
|
||||
void (*dma_pool_free)(struct hl_device *hdev, void *vaddr,
|
||||
dma_addr_t dma_addr);
|
||||
void* (*cpu_accessible_dma_pool_alloc)(struct hl_device *hdev,
|
||||
size_t size, dma_addr_t *dma_handle);
|
||||
void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev,
|
||||
size_t size, void *vaddr);
|
||||
void (*hw_queues_lock)(struct hl_device *hdev);
|
||||
void (*hw_queues_unlock)(struct hl_device *hdev);
|
||||
int (*send_cpu_message)(struct hl_device *hdev, u32 *msg,
|
||||
u16 len, u32 timeout, long *result);
|
||||
};
|
||||
|
||||
|
||||
@ -224,6 +364,17 @@ struct hl_ctx_mgr {
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* struct hl_cs_job - command submission job.
|
||||
* @finish_work: workqueue object to run when job is completed.
|
||||
* @id: the id of this job inside a CS.
|
||||
*/
|
||||
struct hl_cs_job {
|
||||
struct work_struct finish_work;
|
||||
u32 id;
|
||||
};
|
||||
/*
|
||||
* FILE PRIVATE STRUCTURE
|
||||
*/
|
||||
@ -298,7 +449,11 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
|
||||
* @dev: realted kernel basic device structure.
|
||||
* @asic_name: ASIC specific nmae.
|
||||
* @asic_type: ASIC specific type.
|
||||
* @completion_queue: array of hl_cq.
|
||||
* @cq_wq: work queue of completion queues for executing work in process context
|
||||
* @eq_wq: work queue of event queue for executing work in process context.
|
||||
* @kernel_ctx: KMD context structure.
|
||||
* @kernel_queues: array of hl_hw_queue.
|
||||
* @kernel_cb_mgr: command buffer manager for creating/destroying/handling CGs.
|
||||
* @dma_pool: DMA pool for small allocations.
|
||||
* @cpu_accessible_dma_mem: KMD <-> ArmCP shared memory CPU address.
|
||||
@ -312,6 +467,7 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
|
||||
* only a single process at a time. In addition, we need a
|
||||
* lock here so we can flush user processes which are opening
|
||||
* the device while we are trying to hard reset it
|
||||
* @send_cpu_message_lock: enforces only one message in KMD <-> ArmCP queue.
|
||||
* @asic_prop: ASIC specific immutable properties.
|
||||
* @asic_funcs: ASIC specific functions.
|
||||
* @asic_specific: ASIC specific information to use only from ASIC files.
|
||||
@ -331,7 +487,10 @@ struct hl_device {
|
||||
struct device *dev;
|
||||
char asic_name[16];
|
||||
enum hl_asic_type asic_type;
|
||||
struct hl_cq *completion_queue;
|
||||
struct workqueue_struct *cq_wq;
|
||||
struct hl_ctx *kernel_ctx;
|
||||
struct hl_hw_queue *kernel_queues;
|
||||
struct hl_cb_mgr kernel_cb_mgr;
|
||||
struct dma_pool *dma_pool;
|
||||
void *cpu_accessible_dma_mem;
|
||||
@ -341,6 +500,7 @@ struct hl_device {
|
||||
struct mutex asid_mutex;
|
||||
/* TODO: remove fd_open_cnt_lock for multiple process support */
|
||||
struct mutex fd_open_cnt_lock;
|
||||
struct mutex send_cpu_message_lock;
|
||||
struct asic_fixed_properties asic_prop;
|
||||
const struct hl_asic_funcs *asic_funcs;
|
||||
void *asic_specific;
|
||||
@ -358,6 +518,7 @@ struct hl_device {
|
||||
/* Parameters for bring-up */
|
||||
u8 cpu_enable;
|
||||
u8 reset_pcilink;
|
||||
u8 cpu_queues_enable;
|
||||
u8 fw_loading;
|
||||
u8 pldm;
|
||||
};
|
||||
@ -400,7 +561,18 @@ int hl_poll_timeout_memory(struct hl_device *hdev, u64 addr, u32 timeout_us,
|
||||
u32 *val);
|
||||
int hl_poll_timeout_device_memory(struct hl_device *hdev, void __iomem *addr,
|
||||
u32 timeout_us, u32 *val);
|
||||
int hl_hw_queues_create(struct hl_device *hdev);
|
||||
void hl_hw_queues_destroy(struct hl_device *hdev);
|
||||
int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
|
||||
u32 cb_size, u64 cb_ptr);
|
||||
u32 hl_hw_queue_add_ptr(u32 ptr, u16 val);
|
||||
void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id);
|
||||
|
||||
#define hl_queue_inc_ptr(p) hl_hw_queue_add_ptr(p, 1)
|
||||
#define hl_pi_2_offset(pi) ((pi) & (HL_QUEUE_LENGTH - 1))
|
||||
|
||||
int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id);
|
||||
void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q);
|
||||
int hl_asid_init(struct hl_device *hdev);
|
||||
void hl_asid_fini(struct hl_device *hdev);
|
||||
unsigned long hl_asid_alloc(struct hl_device *hdev);
|
||||
|
@ -169,6 +169,7 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
|
||||
/* Parameters for bring-up - set them to defaults */
|
||||
hdev->cpu_enable = 1;
|
||||
hdev->reset_pcilink = 0;
|
||||
hdev->cpu_queues_enable = 1;
|
||||
hdev->fw_loading = 1;
|
||||
hdev->pldm = 0;
|
||||
|
||||
@ -176,6 +177,10 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
|
||||
if (!hdev->cpu_enable)
|
||||
hdev->fw_loading = 0;
|
||||
|
||||
/* If we don't load FW, no need to initialize CPU queues */
|
||||
if (!hdev->fw_loading)
|
||||
hdev->cpu_queues_enable = 0;
|
||||
|
||||
hdev->disabled = true;
|
||||
hdev->pdev = pdev; /* can be NULL in case of simulator device */
|
||||
|
||||
|
400
drivers/misc/habanalabs/hw_queue.c
Normal file
400
drivers/misc/habanalabs/hw_queue.c
Normal file
@ -0,0 +1,400 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
/*
|
||||
* Copyright 2016-2019 HabanaLabs, Ltd.
|
||||
* All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include "habanalabs.h"
|
||||
|
||||
#include <linux/slab.h>
|
||||
|
||||
/*
|
||||
* hl_queue_add_ptr - add to pi or ci and checks if it wraps around
|
||||
*
|
||||
* @ptr: the current pi/ci value
|
||||
* @val: the amount to add
|
||||
*
|
||||
* Add val to ptr. It can go until twice the queue length.
|
||||
*/
|
||||
inline u32 hl_hw_queue_add_ptr(u32 ptr, u16 val)
|
||||
{
|
||||
ptr += val;
|
||||
ptr &= ((HL_QUEUE_LENGTH << 1) - 1);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static inline int queue_free_slots(struct hl_hw_queue *q, u32 queue_len)
|
||||
{
|
||||
int delta = (q->pi - q->ci);
|
||||
|
||||
if (delta >= 0)
|
||||
return (queue_len - delta);
|
||||
else
|
||||
return (abs(delta) - queue_len);
|
||||
}
|
||||
|
||||
/*
|
||||
* ext_queue_submit_bd - Submit a buffer descriptor to an external queue
|
||||
*
|
||||
* @hdev: pointer to habanalabs device structure
|
||||
* @q: pointer to habanalabs queue structure
|
||||
* @ctl: BD's control word
|
||||
* @len: BD's length
|
||||
* @ptr: BD's pointer
|
||||
*
|
||||
* This function assumes there is enough space on the queue to submit a new
|
||||
* BD to it. It initializes the next BD and calls the device specific
|
||||
* function to set the pi (and doorbell)
|
||||
*
|
||||
* This function must be called when the scheduler mutex is taken
|
||||
*
|
||||
*/
|
||||
static void ext_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q,
|
||||
u32 ctl, u32 len, u64 ptr)
|
||||
{
|
||||
struct hl_bd *bd;
|
||||
|
||||
bd = (struct hl_bd *) (uintptr_t) q->kernel_address;
|
||||
bd += hl_pi_2_offset(q->pi);
|
||||
bd->ctl = ctl;
|
||||
bd->len = len;
|
||||
bd->ptr = ptr + hdev->asic_prop.host_phys_base_address;
|
||||
|
||||
q->pi = hl_queue_inc_ptr(q->pi);
|
||||
hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi);
|
||||
}
|
||||
|
||||
/*
|
||||
* ext_queue_sanity_checks - perform some sanity checks on external queue
|
||||
*
|
||||
* @hdev : pointer to hl_device structure
|
||||
* @q : pointer to hl_hw_queue structure
|
||||
* @num_of_entries : how many entries to check for space
|
||||
* @reserve_cq_entry : whether to reserve an entry in the cq
|
||||
*
|
||||
* H/W queues spinlock should be taken before calling this function
|
||||
*
|
||||
* Perform the following:
|
||||
* - Make sure we have enough space in the h/w queue
|
||||
* - Make sure we have enough space in the completion queue
|
||||
* - Reserve space in the completion queue (needs to be reversed if there
|
||||
* is a failure down the road before the actual submission of work). Only
|
||||
* do this action if reserve_cq_entry is true
|
||||
*
|
||||
*/
|
||||
static int ext_queue_sanity_checks(struct hl_device *hdev,
|
||||
struct hl_hw_queue *q, int num_of_entries,
|
||||
bool reserve_cq_entry)
|
||||
{
|
||||
atomic_t *free_slots =
|
||||
&hdev->completion_queue[q->hw_queue_id].free_slots_cnt;
|
||||
int free_slots_cnt;
|
||||
|
||||
/* Check we have enough space in the queue */
|
||||
free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH);
|
||||
|
||||
if (free_slots_cnt < num_of_entries) {
|
||||
dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n",
|
||||
q->hw_queue_id, num_of_entries);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
if (reserve_cq_entry) {
|
||||
/*
|
||||
* Check we have enough space in the completion queue
|
||||
* Add -1 to counter (decrement) unless counter was already 0
|
||||
* In that case, CQ is full so we can't submit a new CB because
|
||||
* we won't get ack on its completion
|
||||
* atomic_add_unless will return 0 if counter was already 0
|
||||
*/
|
||||
if (atomic_add_negative(num_of_entries * -1, free_slots)) {
|
||||
dev_dbg(hdev->dev, "No space for %d on CQ %d\n",
|
||||
num_of_entries, q->hw_queue_id);
|
||||
atomic_add(num_of_entries, free_slots);
|
||||
return -EAGAIN;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* hl_hw_queue_send_cb_no_cmpl - send a single CB (not a JOB) without completion
|
||||
*
|
||||
* @hdev: pointer to hl_device structure
|
||||
* @hw_queue_id: Queue's type
|
||||
* @cb_size: size of CB
|
||||
* @cb_ptr: pointer to CB location
|
||||
*
|
||||
* This function sends a single CB, that must NOT generate a completion entry
|
||||
*
|
||||
*/
|
||||
int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
|
||||
u32 cb_size, u64 cb_ptr)
|
||||
{
|
||||
struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id];
|
||||
int rc;
|
||||
|
||||
/*
|
||||
* The CPU queue is a synchronous queue with an effective depth of
|
||||
* a single entry (although it is allocated with room for multiple
|
||||
* entries). Therefore, there is a different lock, called
|
||||
* send_cpu_message_lock, that serializes accesses to the CPU queue.
|
||||
* As a result, we don't need to lock the access to the entire H/W
|
||||
* queues module when submitting a JOB to the CPU queue
|
||||
*/
|
||||
if (q->queue_type != QUEUE_TYPE_CPU)
|
||||
hdev->asic_funcs->hw_queues_lock(hdev);
|
||||
|
||||
if (hdev->disabled) {
|
||||
rc = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
rc = ext_queue_sanity_checks(hdev, q, 1, false);
|
||||
if (rc)
|
||||
goto out;
|
||||
|
||||
ext_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr);
|
||||
|
||||
out:
|
||||
if (q->queue_type != QUEUE_TYPE_CPU)
|
||||
hdev->asic_funcs->hw_queues_unlock(hdev);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* hl_hw_queue_inc_ci_kernel - increment ci for kernel's queue
|
||||
*
|
||||
* @hdev: pointer to hl_device structure
|
||||
* @hw_queue_id: which queue to increment its ci
|
||||
*/
|
||||
void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id)
|
||||
{
|
||||
struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id];
|
||||
|
||||
q->ci = hl_queue_inc_ptr(q->ci);
|
||||
}
|
||||
|
||||
static int ext_and_cpu_hw_queue_init(struct hl_device *hdev,
|
||||
struct hl_hw_queue *q)
|
||||
{
|
||||
void *p;
|
||||
int rc;
|
||||
|
||||
p = hdev->asic_funcs->dma_alloc_coherent(hdev,
|
||||
HL_QUEUE_SIZE_IN_BYTES,
|
||||
&q->bus_address, GFP_KERNEL | __GFP_ZERO);
|
||||
if (!p)
|
||||
return -ENOMEM;
|
||||
|
||||
q->kernel_address = (u64) (uintptr_t) p;
|
||||
|
||||
q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH,
|
||||
sizeof(*q->shadow_queue),
|
||||
GFP_KERNEL);
|
||||
if (!q->shadow_queue) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to allocate shadow queue for H/W queue %d\n",
|
||||
q->hw_queue_id);
|
||||
rc = -ENOMEM;
|
||||
goto free_queue;
|
||||
}
|
||||
|
||||
/* Make sure read/write pointers are initialized to start of queue */
|
||||
q->ci = 0;
|
||||
q->pi = 0;
|
||||
|
||||
return 0;
|
||||
|
||||
free_queue:
|
||||
hdev->asic_funcs->dma_free_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES,
|
||||
(void *) (uintptr_t) q->kernel_address, q->bus_address);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int int_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
|
||||
{
|
||||
void *p;
|
||||
|
||||
p = hdev->asic_funcs->get_int_queue_base(hdev, q->hw_queue_id,
|
||||
&q->bus_address, &q->int_queue_len);
|
||||
if (!p) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to get base address for internal queue %d\n",
|
||||
q->hw_queue_id);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
q->kernel_address = (u64) (uintptr_t) p;
|
||||
q->pi = 0;
|
||||
q->ci = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cpu_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
|
||||
{
|
||||
return ext_and_cpu_hw_queue_init(hdev, q);
|
||||
}
|
||||
|
||||
static int ext_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
|
||||
{
|
||||
return ext_and_cpu_hw_queue_init(hdev, q);
|
||||
}
|
||||
|
||||
/*
|
||||
* hw_queue_init - main initialization function for H/W queue object
|
||||
*
|
||||
* @hdev: pointer to hl_device device structure
|
||||
* @q: pointer to hl_hw_queue queue structure
|
||||
* @hw_queue_id: The id of the H/W queue
|
||||
*
|
||||
* Allocate dma-able memory for the queue and initialize fields
|
||||
* Returns 0 on success
|
||||
*/
|
||||
static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
|
||||
u32 hw_queue_id)
|
||||
{
|
||||
int rc;
|
||||
|
||||
BUILD_BUG_ON(HL_QUEUE_SIZE_IN_BYTES > HL_PAGE_SIZE);
|
||||
|
||||
q->hw_queue_id = hw_queue_id;
|
||||
|
||||
switch (q->queue_type) {
|
||||
case QUEUE_TYPE_EXT:
|
||||
rc = ext_hw_queue_init(hdev, q);
|
||||
break;
|
||||
|
||||
case QUEUE_TYPE_INT:
|
||||
rc = int_hw_queue_init(hdev, q);
|
||||
break;
|
||||
|
||||
case QUEUE_TYPE_CPU:
|
||||
rc = cpu_hw_queue_init(hdev, q);
|
||||
break;
|
||||
|
||||
case QUEUE_TYPE_NA:
|
||||
q->valid = 0;
|
||||
return 0;
|
||||
|
||||
default:
|
||||
dev_crit(hdev->dev, "wrong queue type %d during init\n",
|
||||
q->queue_type);
|
||||
rc = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
q->valid = 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* hw_queue_fini - destroy queue
|
||||
*
|
||||
* @hdev: pointer to hl_device device structure
|
||||
* @q: pointer to hl_hw_queue queue structure
|
||||
*
|
||||
* Free the queue memory
|
||||
*/
|
||||
static void hw_queue_fini(struct hl_device *hdev, struct hl_hw_queue *q)
|
||||
{
|
||||
if (!q->valid)
|
||||
return;
|
||||
|
||||
/*
|
||||
* If we arrived here, there are no jobs waiting on this queue
|
||||
* so we can safely remove it.
|
||||
* This is because this function can only called when:
|
||||
* 1. Either a context is deleted, which only can occur if all its
|
||||
* jobs were finished
|
||||
* 2. A context wasn't able to be created due to failure or timeout,
|
||||
* which means there are no jobs on the queue yet
|
||||
*
|
||||
* The only exception are the queues of the kernel context, but
|
||||
* if they are being destroyed, it means that the entire module is
|
||||
* being removed. If the module is removed, it means there is no open
|
||||
* user context. It also means that if a job was submitted by
|
||||
* the kernel driver (e.g. context creation), the job itself was
|
||||
* released by the kernel driver when a timeout occurred on its
|
||||
* Completion. Thus, we don't need to release it again.
|
||||
*/
|
||||
|
||||
if (q->queue_type == QUEUE_TYPE_INT)
|
||||
return;
|
||||
|
||||
kfree(q->shadow_queue);
|
||||
|
||||
hdev->asic_funcs->dma_free_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES,
|
||||
(void *) (uintptr_t) q->kernel_address, q->bus_address);
|
||||
}
|
||||
|
||||
int hl_hw_queues_create(struct hl_device *hdev)
|
||||
{
|
||||
struct asic_fixed_properties *asic = &hdev->asic_prop;
|
||||
struct hl_hw_queue *q;
|
||||
int i, rc, q_ready_cnt;
|
||||
|
||||
hdev->kernel_queues = kcalloc(HL_MAX_QUEUES,
|
||||
sizeof(*hdev->kernel_queues), GFP_KERNEL);
|
||||
|
||||
if (!hdev->kernel_queues) {
|
||||
dev_err(hdev->dev, "Not enough memory for H/W queues\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* Initialize the H/W queues */
|
||||
for (i = 0, q_ready_cnt = 0, q = hdev->kernel_queues;
|
||||
i < HL_MAX_QUEUES ; i++, q_ready_cnt++, q++) {
|
||||
|
||||
q->queue_type = asic->hw_queues_props[i].type;
|
||||
rc = hw_queue_init(hdev, q, i);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"failed to initialize queue %d\n", i);
|
||||
goto release_queues;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
release_queues:
|
||||
for (i = 0, q = hdev->kernel_queues ; i < q_ready_cnt ; i++, q++)
|
||||
hw_queue_fini(hdev, q);
|
||||
|
||||
kfree(hdev->kernel_queues);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
void hl_hw_queues_destroy(struct hl_device *hdev)
|
||||
{
|
||||
struct hl_hw_queue *q;
|
||||
int i;
|
||||
|
||||
for (i = 0, q = hdev->kernel_queues ; i < HL_MAX_QUEUES ; i++, q++)
|
||||
hw_queue_fini(hdev, q);
|
||||
|
||||
kfree(hdev->kernel_queues);
|
||||
}
|
||||
|
||||
void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset)
|
||||
{
|
||||
struct hl_hw_queue *q;
|
||||
int i;
|
||||
|
||||
for (i = 0, q = hdev->kernel_queues ; i < HL_MAX_QUEUES ; i++, q++) {
|
||||
if ((!q->valid) ||
|
||||
((!hard_reset) && (q->queue_type == QUEUE_TYPE_CPU)))
|
||||
continue;
|
||||
q->pi = q->ci = 0;
|
||||
}
|
||||
}
|
@ -10,10 +10,302 @@
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
enum pq_init_status {
|
||||
PQ_INIT_STATUS_NA = 0,
|
||||
PQ_INIT_STATUS_READY_FOR_CP,
|
||||
PQ_INIT_STATUS_READY_FOR_HOST
|
||||
};
|
||||
|
||||
/*
|
||||
* ArmCP Primary Queue Packets
|
||||
*
|
||||
* During normal operation, KMD needs to send various messages to ArmCP,
|
||||
* usually either to SET some value into a H/W periphery or to GET the current
|
||||
* value of some H/W periphery. For example, SET the frequency of MME/TPC and
|
||||
* GET the value of the thermal sensor.
|
||||
*
|
||||
* These messages can be initiated either by the User application or by KMD
|
||||
* itself, e.g. power management code. In either case, the communication from
|
||||
* KMD to ArmCP will *always* be in synchronous mode, meaning that KMD will
|
||||
* send a single message and poll until the message was acknowledged and the
|
||||
* results are ready (if results are needed).
|
||||
*
|
||||
* This means that only a single message can be sent at a time and KMD must
|
||||
* wait for its result before sending the next message. Having said that,
|
||||
* because these are control messages which are sent in a relatively low
|
||||
* frequency, this limitation seems acceptable. It's important to note that
|
||||
* in case of multiple devices, messages to different devices *can* be sent
|
||||
* at the same time.
|
||||
*
|
||||
* The message, inputs/outputs (if relevant) and fence object will be located
|
||||
* on the device DDR at an address that will be determined by KMD. During
|
||||
* device initialization phase, KMD will pass to ArmCP that address. Most of
|
||||
* the message types will contain inputs/outputs inside the message itself.
|
||||
* The common part of each message will contain the opcode of the message (its
|
||||
* type) and a field representing a fence object.
|
||||
*
|
||||
* When KMD wishes to send a message to ArmCP, it will write the message
|
||||
* contents to the device DDR, clear the fence object and then write the
|
||||
* value 484 to the mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR register to issue
|
||||
* the 484 interrupt-id to the ARM core.
|
||||
*
|
||||
* Upon receiving the 484 interrupt-id, ArmCP will read the message from the
|
||||
* DDR. In case the message is a SET operation, ArmCP will first perform the
|
||||
* operation and then write to the fence object on the device DDR. In case the
|
||||
* message is a GET operation, ArmCP will first fill the results section on the
|
||||
* device DDR and then write to the fence object. If an error occurred, ArmCP
|
||||
* will fill the rc field with the right error code.
|
||||
*
|
||||
* In the meantime, KMD will poll on the fence object. Once KMD sees that the
|
||||
* fence object is signaled, it will read the results from the device DDR
|
||||
* (if relevant) and resume the code execution in KMD.
|
||||
*
|
||||
* To use QMAN packets, the opcode must be the QMAN opcode, shifted by 8
|
||||
* so the value being put by the KMD matches the value read by ArmCP
|
||||
*
|
||||
* Non-QMAN packets should be limited to values 1 through (2^8 - 1)
|
||||
*
|
||||
* Detailed description:
|
||||
*
|
||||
* ARMCP_PACKET_DISABLE_PCI_ACCESS -
|
||||
* After receiving this packet the embedded CPU must NOT issue PCI
|
||||
* transactions (read/write) towards the Host CPU. This also include
|
||||
* sending MSI-X interrupts.
|
||||
* This packet is usually sent before the device is moved to D3Hot state.
|
||||
*
|
||||
* ARMCP_PACKET_ENABLE_PCI_ACCESS -
|
||||
* After receiving this packet the embedded CPU is allowed to issue PCI
|
||||
* transactions towards the Host CPU, including sending MSI-X interrupts.
|
||||
* This packet is usually send after the device is moved to D0 state.
|
||||
*
|
||||
* ARMCP_PACKET_TEMPERATURE_GET -
|
||||
* Fetch the current temperature / Max / Max Hyst / Critical /
|
||||
* Critical Hyst of a specified thermal sensor. The packet's
|
||||
* arguments specify the desired sensor and the field to get.
|
||||
*
|
||||
* ARMCP_PACKET_VOLTAGE_GET -
|
||||
* Fetch the voltage / Max / Min of a specified sensor. The packet's
|
||||
* arguments specify the sensor and type.
|
||||
*
|
||||
* ARMCP_PACKET_CURRENT_GET -
|
||||
* Fetch the current / Max / Min of a specified sensor. The packet's
|
||||
* arguments specify the sensor and type.
|
||||
*
|
||||
* ARMCP_PACKET_FAN_SPEED_GET -
|
||||
* Fetch the speed / Max / Min of a specified fan. The packet's
|
||||
* arguments specify the sensor and type.
|
||||
*
|
||||
* ARMCP_PACKET_PWM_GET -
|
||||
* Fetch the pwm value / mode of a specified pwm. The packet's
|
||||
* arguments specify the sensor and type.
|
||||
*
|
||||
* ARMCP_PACKET_PWM_SET -
|
||||
* Set the pwm value / mode of a specified pwm. The packet's
|
||||
* arguments specify the sensor, type and value.
|
||||
*
|
||||
* ARMCP_PACKET_FREQUENCY_SET -
|
||||
* Set the frequency of a specified PLL. The packet's arguments specify
|
||||
* the PLL and the desired frequency. The actual frequency in the device
|
||||
* might differ from the requested frequency.
|
||||
*
|
||||
* ARMCP_PACKET_FREQUENCY_GET -
|
||||
* Fetch the frequency of a specified PLL. The packet's arguments specify
|
||||
* the PLL.
|
||||
*
|
||||
* ARMCP_PACKET_LED_SET -
|
||||
* Set the state of a specified led. The packet's arguments
|
||||
* specify the led and the desired state.
|
||||
*
|
||||
* ARMCP_PACKET_I2C_WR -
|
||||
* Write 32-bit value to I2C device. The packet's arguments specify the
|
||||
* I2C bus, address and value.
|
||||
*
|
||||
* ARMCP_PACKET_I2C_RD -
|
||||
* Read 32-bit value from I2C device. The packet's arguments specify the
|
||||
* I2C bus and address.
|
||||
*
|
||||
* ARMCP_PACKET_INFO_GET -
|
||||
* Fetch information from the device as specified in the packet's
|
||||
* structure. KMD passes the max size it allows the ArmCP to write to
|
||||
* the structure, to prevent data corruption in case of mismatched
|
||||
* KMD/FW versions.
|
||||
*
|
||||
* ARMCP_PACKET_FLASH_PROGRAM_REMOVED - this packet was removed
|
||||
*
|
||||
* ARMCP_PACKET_UNMASK_RAZWI_IRQ -
|
||||
* Unmask the given IRQ. The IRQ number is specified in the value field.
|
||||
* The packet is sent after receiving an interrupt and printing its
|
||||
* relevant information.
|
||||
*
|
||||
* ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY -
|
||||
* Unmask the given IRQs. The IRQs numbers are specified in an array right
|
||||
* after the armcp_packet structure, where its first element is the array
|
||||
* length. The packet is sent after a soft reset was done in order to
|
||||
* handle any interrupts that were sent during the reset process.
|
||||
*
|
||||
* ARMCP_PACKET_TEST -
|
||||
* Test packet for ArmCP connectivity. The CPU will put the fence value
|
||||
* in the result field.
|
||||
*
|
||||
* ARMCP_PACKET_FREQUENCY_CURR_GET -
|
||||
* Fetch the current frequency of a specified PLL. The packet's arguments
|
||||
* specify the PLL.
|
||||
*
|
||||
* ARMCP_PACKET_MAX_POWER_GET -
|
||||
* Fetch the maximal power of the device.
|
||||
*
|
||||
* ARMCP_PACKET_MAX_POWER_SET -
|
||||
* Set the maximal power of the device. The packet's arguments specify
|
||||
* the power.
|
||||
*
|
||||
* ARMCP_PACKET_EEPROM_DATA_GET -
|
||||
* Get EEPROM data from the ArmCP kernel. The buffer is specified in the
|
||||
* addr field. The CPU will put the returned data size in the result
|
||||
* field. In addition, KMD passes the max size it allows the ArmCP to
|
||||
* write to the structure, to prevent data corruption in case of
|
||||
* mismatched KMD/FW versions.
|
||||
*
|
||||
*/
|
||||
|
||||
enum armcp_packet_id {
|
||||
ARMCP_PACKET_DISABLE_PCI_ACCESS = 1, /* internal */
|
||||
ARMCP_PACKET_ENABLE_PCI_ACCESS, /* internal */
|
||||
ARMCP_PACKET_TEMPERATURE_GET, /* sysfs */
|
||||
ARMCP_PACKET_VOLTAGE_GET, /* sysfs */
|
||||
ARMCP_PACKET_CURRENT_GET, /* sysfs */
|
||||
ARMCP_PACKET_FAN_SPEED_GET, /* sysfs */
|
||||
ARMCP_PACKET_PWM_GET, /* sysfs */
|
||||
ARMCP_PACKET_PWM_SET, /* sysfs */
|
||||
ARMCP_PACKET_FREQUENCY_SET, /* sysfs */
|
||||
ARMCP_PACKET_FREQUENCY_GET, /* sysfs */
|
||||
ARMCP_PACKET_LED_SET, /* debugfs */
|
||||
ARMCP_PACKET_I2C_WR, /* debugfs */
|
||||
ARMCP_PACKET_I2C_RD, /* debugfs */
|
||||
ARMCP_PACKET_INFO_GET, /* IOCTL */
|
||||
ARMCP_PACKET_FLASH_PROGRAM_REMOVED,
|
||||
ARMCP_PACKET_UNMASK_RAZWI_IRQ, /* internal */
|
||||
ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY, /* internal */
|
||||
ARMCP_PACKET_TEST, /* internal */
|
||||
ARMCP_PACKET_FREQUENCY_CURR_GET, /* sysfs */
|
||||
ARMCP_PACKET_MAX_POWER_GET, /* sysfs */
|
||||
ARMCP_PACKET_MAX_POWER_SET, /* sysfs */
|
||||
ARMCP_PACKET_EEPROM_DATA_GET, /* sysfs */
|
||||
};
|
||||
|
||||
#define ARMCP_PACKET_FENCE_VAL 0xFE8CE7A5
|
||||
|
||||
#define ARMCP_PKT_CTL_RC_SHIFT 12
|
||||
#define ARMCP_PKT_CTL_RC_MASK 0x0000F000
|
||||
|
||||
#define ARMCP_PKT_CTL_OPCODE_SHIFT 16
|
||||
#define ARMCP_PKT_CTL_OPCODE_MASK 0x1FFF0000
|
||||
|
||||
struct armcp_packet {
|
||||
union {
|
||||
__le64 value; /* For SET packets */
|
||||
__le64 result; /* For GET packets */
|
||||
__le64 addr; /* For PQ */
|
||||
};
|
||||
|
||||
__le32 ctl;
|
||||
|
||||
__le32 fence; /* Signal to KMD that message is completed */
|
||||
|
||||
union {
|
||||
struct {/* For temperature/current/voltage/fan/pwm get/set */
|
||||
__le16 sensor_index;
|
||||
__le16 type;
|
||||
};
|
||||
|
||||
struct { /* For I2C read/write */
|
||||
__u8 i2c_bus;
|
||||
__u8 i2c_addr;
|
||||
__u8 i2c_reg;
|
||||
__u8 pad; /* unused */
|
||||
};
|
||||
|
||||
/* For frequency get/set */
|
||||
__le32 pll_index;
|
||||
|
||||
/* For led set */
|
||||
__le32 led_index;
|
||||
|
||||
/* For get Armcp info/EEPROM data */
|
||||
__le32 data_max_size;
|
||||
};
|
||||
};
|
||||
|
||||
struct armcp_unmask_irq_arr_packet {
|
||||
struct armcp_packet armcp_pkt;
|
||||
__le32 length;
|
||||
__le32 irqs[0];
|
||||
};
|
||||
|
||||
enum armcp_packet_rc {
|
||||
armcp_packet_success,
|
||||
armcp_packet_invalid,
|
||||
armcp_packet_fault
|
||||
};
|
||||
|
||||
enum armcp_temp_type {
|
||||
armcp_temp_input,
|
||||
armcp_temp_max = 6,
|
||||
armcp_temp_max_hyst,
|
||||
armcp_temp_crit,
|
||||
armcp_temp_crit_hyst
|
||||
};
|
||||
|
||||
enum armcp_in_attributes {
|
||||
armcp_in_input,
|
||||
armcp_in_min,
|
||||
armcp_in_max
|
||||
};
|
||||
|
||||
enum armcp_curr_attributes {
|
||||
armcp_curr_input,
|
||||
armcp_curr_min,
|
||||
armcp_curr_max
|
||||
};
|
||||
|
||||
enum armcp_fan_attributes {
|
||||
armcp_fan_input,
|
||||
armcp_fan_min = 2,
|
||||
armcp_fan_max
|
||||
};
|
||||
|
||||
enum armcp_pwm_attributes {
|
||||
armcp_pwm_input,
|
||||
armcp_pwm_enable
|
||||
};
|
||||
|
||||
/* Event Queue Packets */
|
||||
|
||||
struct eq_generic_event {
|
||||
__le64 data[7];
|
||||
};
|
||||
|
||||
/*
|
||||
* ArmCP info
|
||||
*/
|
||||
|
||||
#define VERSION_MAX_LEN 128
|
||||
#define ARMCP_MAX_SENSORS 128
|
||||
|
||||
struct armcp_sensor {
|
||||
__le32 type;
|
||||
__le32 flags;
|
||||
};
|
||||
|
||||
struct armcp_info {
|
||||
struct armcp_sensor sensors[ARMCP_MAX_SENSORS];
|
||||
__u8 kernel_version[VERSION_MAX_LEN];
|
||||
__le32 reserved[3];
|
||||
__le32 cpld_version;
|
||||
__le32 infineon_version;
|
||||
__u8 fuse_version[VERSION_MAX_LEN];
|
||||
__u8 thermal_version[VERSION_MAX_LEN];
|
||||
__u8 armcp_version[VERSION_MAX_LEN];
|
||||
__le64 dram_size;
|
||||
};
|
||||
|
||||
#endif /* ARMCP_IF_H */
|
||||
|
186
drivers/misc/habanalabs/include/goya/goya_async_events.h
Normal file
186
drivers/misc/habanalabs/include/goya/goya_async_events.h
Normal file
@ -0,0 +1,186 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
* Copyright 2018 HabanaLabs, Ltd.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __GOYA_ASYNC_EVENTS_H_
|
||||
#define __GOYA_ASYNC_EVENTS_H_
|
||||
|
||||
enum goya_async_event_id {
|
||||
GOYA_ASYNC_EVENT_ID_PCIE_IF = 33,
|
||||
GOYA_ASYNC_EVENT_ID_TPC0_ECC = 36,
|
||||
GOYA_ASYNC_EVENT_ID_TPC1_ECC = 39,
|
||||
GOYA_ASYNC_EVENT_ID_TPC2_ECC = 42,
|
||||
GOYA_ASYNC_EVENT_ID_TPC3_ECC = 45,
|
||||
GOYA_ASYNC_EVENT_ID_TPC4_ECC = 48,
|
||||
GOYA_ASYNC_EVENT_ID_TPC5_ECC = 51,
|
||||
GOYA_ASYNC_EVENT_ID_TPC6_ECC = 54,
|
||||
GOYA_ASYNC_EVENT_ID_TPC7_ECC = 57,
|
||||
GOYA_ASYNC_EVENT_ID_MME_ECC = 60,
|
||||
GOYA_ASYNC_EVENT_ID_MME_ECC_EXT = 61,
|
||||
GOYA_ASYNC_EVENT_ID_MMU_ECC = 63,
|
||||
GOYA_ASYNC_EVENT_ID_DMA_MACRO = 64,
|
||||
GOYA_ASYNC_EVENT_ID_DMA_ECC = 66,
|
||||
GOYA_ASYNC_EVENT_ID_CPU_IF_ECC = 75,
|
||||
GOYA_ASYNC_EVENT_ID_PSOC_MEM = 78,
|
||||
GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT = 79,
|
||||
GOYA_ASYNC_EVENT_ID_SRAM0 = 81,
|
||||
GOYA_ASYNC_EVENT_ID_SRAM1 = 82,
|
||||
GOYA_ASYNC_EVENT_ID_SRAM2 = 83,
|
||||
GOYA_ASYNC_EVENT_ID_SRAM3 = 84,
|
||||
GOYA_ASYNC_EVENT_ID_SRAM4 = 85,
|
||||
GOYA_ASYNC_EVENT_ID_SRAM5 = 86,
|
||||
GOYA_ASYNC_EVENT_ID_SRAM6 = 87,
|
||||
GOYA_ASYNC_EVENT_ID_SRAM7 = 88,
|
||||
GOYA_ASYNC_EVENT_ID_SRAM8 = 89,
|
||||
GOYA_ASYNC_EVENT_ID_SRAM9 = 90,
|
||||
GOYA_ASYNC_EVENT_ID_SRAM10 = 91,
|
||||
GOYA_ASYNC_EVENT_ID_SRAM11 = 92,
|
||||
GOYA_ASYNC_EVENT_ID_SRAM12 = 93,
|
||||
GOYA_ASYNC_EVENT_ID_SRAM13 = 94,
|
||||
GOYA_ASYNC_EVENT_ID_SRAM14 = 95,
|
||||
GOYA_ASYNC_EVENT_ID_SRAM15 = 96,
|
||||
GOYA_ASYNC_EVENT_ID_SRAM16 = 97,
|
||||
GOYA_ASYNC_EVENT_ID_SRAM17 = 98,
|
||||
GOYA_ASYNC_EVENT_ID_SRAM18 = 99,
|
||||
GOYA_ASYNC_EVENT_ID_SRAM19 = 100,
|
||||
GOYA_ASYNC_EVENT_ID_SRAM20 = 101,
|
||||
GOYA_ASYNC_EVENT_ID_SRAM21 = 102,
|
||||
GOYA_ASYNC_EVENT_ID_SRAM22 = 103,
|
||||
GOYA_ASYNC_EVENT_ID_SRAM23 = 104,
|
||||
GOYA_ASYNC_EVENT_ID_SRAM24 = 105,
|
||||
GOYA_ASYNC_EVENT_ID_SRAM25 = 106,
|
||||
GOYA_ASYNC_EVENT_ID_SRAM26 = 107,
|
||||
GOYA_ASYNC_EVENT_ID_SRAM27 = 108,
|
||||
GOYA_ASYNC_EVENT_ID_SRAM28 = 109,
|
||||
GOYA_ASYNC_EVENT_ID_SRAM29 = 110,
|
||||
GOYA_ASYNC_EVENT_ID_GIC500 = 112,
|
||||
GOYA_ASYNC_EVENT_ID_PCIE_DEC = 115,
|
||||
GOYA_ASYNC_EVENT_ID_TPC0_DEC = 117,
|
||||
GOYA_ASYNC_EVENT_ID_TPC1_DEC = 120,
|
||||
GOYA_ASYNC_EVENT_ID_TPC2_DEC = 123,
|
||||
GOYA_ASYNC_EVENT_ID_TPC3_DEC = 126,
|
||||
GOYA_ASYNC_EVENT_ID_TPC4_DEC = 129,
|
||||
GOYA_ASYNC_EVENT_ID_TPC5_DEC = 132,
|
||||
GOYA_ASYNC_EVENT_ID_TPC6_DEC = 135,
|
||||
GOYA_ASYNC_EVENT_ID_TPC7_DEC = 138,
|
||||
GOYA_ASYNC_EVENT_ID_AXI_ECC = 139,
|
||||
GOYA_ASYNC_EVENT_ID_L2_RAM_ECC = 140,
|
||||
GOYA_ASYNC_EVENT_ID_MME_WACS = 141,
|
||||
GOYA_ASYNC_EVENT_ID_MME_WACSD = 142,
|
||||
GOYA_ASYNC_EVENT_ID_PLL0 = 143,
|
||||
GOYA_ASYNC_EVENT_ID_PLL1 = 144,
|
||||
GOYA_ASYNC_EVENT_ID_PLL3 = 146,
|
||||
GOYA_ASYNC_EVENT_ID_PLL4 = 147,
|
||||
GOYA_ASYNC_EVENT_ID_PLL5 = 148,
|
||||
GOYA_ASYNC_EVENT_ID_PLL6 = 149,
|
||||
GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER = 155,
|
||||
GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC = 159,
|
||||
GOYA_ASYNC_EVENT_ID_PSOC = 160,
|
||||
GOYA_ASYNC_EVENT_ID_PCIE_FLR = 171,
|
||||
GOYA_ASYNC_EVENT_ID_PCIE_HOT_RESET = 172,
|
||||
GOYA_ASYNC_EVENT_ID_PCIE_QID0_ENG0 = 174,
|
||||
GOYA_ASYNC_EVENT_ID_PCIE_QID0_ENG1 = 175,
|
||||
GOYA_ASYNC_EVENT_ID_PCIE_QID0_ENG2 = 176,
|
||||
GOYA_ASYNC_EVENT_ID_PCIE_QID0_ENG3 = 177,
|
||||
GOYA_ASYNC_EVENT_ID_PCIE_QID1_ENG0 = 178,
|
||||
GOYA_ASYNC_EVENT_ID_PCIE_QID1_ENG1 = 179,
|
||||
GOYA_ASYNC_EVENT_ID_PCIE_QID1_ENG2 = 180,
|
||||
GOYA_ASYNC_EVENT_ID_PCIE_QID1_ENG3 = 181,
|
||||
GOYA_ASYNC_EVENT_ID_PCIE_APB = 182,
|
||||
GOYA_ASYNC_EVENT_ID_PCIE_QDB = 183,
|
||||
GOYA_ASYNC_EVENT_ID_PCIE_BM_D_P_WR = 184,
|
||||
GOYA_ASYNC_EVENT_ID_PCIE_BM_D_RD = 185,
|
||||
GOYA_ASYNC_EVENT_ID_PCIE_BM_U_P_WR = 186,
|
||||
GOYA_ASYNC_EVENT_ID_PCIE_BM_U_RD = 187,
|
||||
GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU = 190,
|
||||
GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR = 191,
|
||||
GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU = 200,
|
||||
GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR = 201,
|
||||
GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU = 210,
|
||||
GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR = 211,
|
||||
GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU = 220,
|
||||
GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR = 221,
|
||||
GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU = 230,
|
||||
GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR = 231,
|
||||
GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU = 240,
|
||||
GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR = 241,
|
||||
GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU = 250,
|
||||
GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR = 251,
|
||||
GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU = 260,
|
||||
GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR = 261,
|
||||
GOYA_ASYNC_EVENT_ID_MMU_SBA_SPMU0 = 270,
|
||||
GOYA_ASYNC_EVENT_ID_MMU_SBA_SPMU1 = 271,
|
||||
GOYA_ASYNC_EVENT_ID_MME_WACS_UP = 272,
|
||||
GOYA_ASYNC_EVENT_ID_MME_WACS_DOWN = 273,
|
||||
GOYA_ASYNC_EVENT_ID_MMU_PAGE_FAULT = 280,
|
||||
GOYA_ASYNC_EVENT_ID_MMU_WR_PERM = 281,
|
||||
GOYA_ASYNC_EVENT_ID_MMU_DBG_BM = 282,
|
||||
GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 = 290,
|
||||
GOYA_ASYNC_EVENT_ID_DMA_BM_CH1 = 291,
|
||||
GOYA_ASYNC_EVENT_ID_DMA_BM_CH2 = 292,
|
||||
GOYA_ASYNC_EVENT_ID_DMA_BM_CH3 = 293,
|
||||
GOYA_ASYNC_EVENT_ID_DMA_BM_CH4 = 294,
|
||||
GOYA_ASYNC_EVENT_ID_DDR0_PHY_DFI = 300,
|
||||
GOYA_ASYNC_EVENT_ID_DDR0_ECC_SCRUB = 301,
|
||||
GOYA_ASYNC_EVENT_ID_DDR0_DB_ECC = 302,
|
||||
GOYA_ASYNC_EVENT_ID_DDR0_SB_ECC = 303,
|
||||
GOYA_ASYNC_EVENT_ID_DDR0_SB_ECC_MC = 304,
|
||||
GOYA_ASYNC_EVENT_ID_DDR0_AXI_RD = 305,
|
||||
GOYA_ASYNC_EVENT_ID_DDR0_AXI_WR = 306,
|
||||
GOYA_ASYNC_EVENT_ID_DDR1_PHY_DFI = 310,
|
||||
GOYA_ASYNC_EVENT_ID_DDR1_ECC_SCRUB = 311,
|
||||
GOYA_ASYNC_EVENT_ID_DDR1_DB_ECC = 312,
|
||||
GOYA_ASYNC_EVENT_ID_DDR1_SB_ECC = 313,
|
||||
GOYA_ASYNC_EVENT_ID_DDR1_SB_ECC_MC = 314,
|
||||
GOYA_ASYNC_EVENT_ID_DDR1_AXI_RD = 315,
|
||||
GOYA_ASYNC_EVENT_ID_DDR1_AXI_WR = 316,
|
||||
GOYA_ASYNC_EVENT_ID_CPU_BMON = 320,
|
||||
GOYA_ASYNC_EVENT_ID_TS_EAST = 322,
|
||||
GOYA_ASYNC_EVENT_ID_TS_WEST = 323,
|
||||
GOYA_ASYNC_EVENT_ID_TS_NORTH = 324,
|
||||
GOYA_ASYNC_EVENT_ID_PSOC_GPIO_U16_0 = 330,
|
||||
GOYA_ASYNC_EVENT_ID_PSOC_GPIO_U16_1 = 331,
|
||||
GOYA_ASYNC_EVENT_ID_PSOC_GPIO_U16_2 = 332,
|
||||
GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET = 356,
|
||||
GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT = 361,
|
||||
GOYA_ASYNC_EVENT_ID_TPC0_CMDQ = 430,
|
||||
GOYA_ASYNC_EVENT_ID_TPC1_CMDQ = 431,
|
||||
GOYA_ASYNC_EVENT_ID_TPC2_CMDQ = 432,
|
||||
GOYA_ASYNC_EVENT_ID_TPC3_CMDQ = 433,
|
||||
GOYA_ASYNC_EVENT_ID_TPC4_CMDQ = 434,
|
||||
GOYA_ASYNC_EVENT_ID_TPC5_CMDQ = 435,
|
||||
GOYA_ASYNC_EVENT_ID_TPC6_CMDQ = 436,
|
||||
GOYA_ASYNC_EVENT_ID_TPC7_CMDQ = 437,
|
||||
GOYA_ASYNC_EVENT_ID_TPC0_QM = 438,
|
||||
GOYA_ASYNC_EVENT_ID_TPC1_QM = 439,
|
||||
GOYA_ASYNC_EVENT_ID_TPC2_QM = 440,
|
||||
GOYA_ASYNC_EVENT_ID_TPC3_QM = 441,
|
||||
GOYA_ASYNC_EVENT_ID_TPC4_QM = 442,
|
||||
GOYA_ASYNC_EVENT_ID_TPC5_QM = 443,
|
||||
GOYA_ASYNC_EVENT_ID_TPC6_QM = 444,
|
||||
GOYA_ASYNC_EVENT_ID_TPC7_QM = 445,
|
||||
GOYA_ASYNC_EVENT_ID_MME_QM = 447,
|
||||
GOYA_ASYNC_EVENT_ID_MME_CMDQ = 448,
|
||||
GOYA_ASYNC_EVENT_ID_DMA0_QM = 449,
|
||||
GOYA_ASYNC_EVENT_ID_DMA1_QM = 450,
|
||||
GOYA_ASYNC_EVENT_ID_DMA2_QM = 451,
|
||||
GOYA_ASYNC_EVENT_ID_DMA3_QM = 452,
|
||||
GOYA_ASYNC_EVENT_ID_DMA4_QM = 453,
|
||||
GOYA_ASYNC_EVENT_ID_DMA_ON_HBW = 454,
|
||||
GOYA_ASYNC_EVENT_ID_DMA0_CH = 455,
|
||||
GOYA_ASYNC_EVENT_ID_DMA1_CH = 456,
|
||||
GOYA_ASYNC_EVENT_ID_DMA2_CH = 457,
|
||||
GOYA_ASYNC_EVENT_ID_DMA3_CH = 458,
|
||||
GOYA_ASYNC_EVENT_ID_DMA4_CH = 459,
|
||||
GOYA_ASYNC_EVENT_ID_PI_UPDATE = 484,
|
||||
GOYA_ASYNC_EVENT_ID_HALT_MACHINE = 485,
|
||||
GOYA_ASYNC_EVENT_ID_INTS_REGISTER = 486,
|
||||
GOYA_ASYNC_EVENT_ID_SOFT_RESET = 487,
|
||||
GOYA_ASYNC_EVENT_ID_LAST_VALID_ID = 1023,
|
||||
GOYA_ASYNC_EVENT_ID_SIZE
|
||||
};
|
||||
|
||||
#endif /* __GOYA_ASYNC_EVENTS_H_ */
|
129
drivers/misc/habanalabs/include/goya/goya_packets.h
Normal file
129
drivers/misc/habanalabs/include/goya/goya_packets.h
Normal file
@ -0,0 +1,129 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
* Copyright 2017-2018 HabanaLabs, Ltd.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef GOYA_PACKETS_H
|
||||
#define GOYA_PACKETS_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#define PACKET_HEADER_PACKET_ID_SHIFT 56
|
||||
#define PACKET_HEADER_PACKET_ID_MASK 0x1F00000000000000ull
|
||||
|
||||
enum packet_id {
|
||||
PACKET_WREG_32 = 0x1,
|
||||
PACKET_WREG_BULK = 0x2,
|
||||
PACKET_MSG_LONG = 0x3,
|
||||
PACKET_MSG_SHORT = 0x4,
|
||||
PACKET_CP_DMA = 0x5,
|
||||
PACKET_MSG_PROT = 0x7,
|
||||
PACKET_FENCE = 0x8,
|
||||
PACKET_LIN_DMA = 0x9,
|
||||
PACKET_NOP = 0xA,
|
||||
PACKET_STOP = 0xB,
|
||||
MAX_PACKET_ID = (PACKET_HEADER_PACKET_ID_MASK >>
|
||||
PACKET_HEADER_PACKET_ID_SHIFT) + 1
|
||||
};
|
||||
|
||||
enum goya_dma_direction {
|
||||
DMA_HOST_TO_DRAM,
|
||||
DMA_HOST_TO_SRAM,
|
||||
DMA_DRAM_TO_SRAM,
|
||||
DMA_SRAM_TO_DRAM,
|
||||
DMA_SRAM_TO_HOST,
|
||||
DMA_DRAM_TO_HOST,
|
||||
DMA_DRAM_TO_DRAM,
|
||||
DMA_SRAM_TO_SRAM,
|
||||
DMA_ENUM_MAX
|
||||
};
|
||||
|
||||
#define GOYA_PKT_CTL_OPCODE_SHIFT 24
|
||||
#define GOYA_PKT_CTL_OPCODE_MASK 0x1F000000
|
||||
|
||||
#define GOYA_PKT_CTL_EB_SHIFT 29
|
||||
#define GOYA_PKT_CTL_EB_MASK 0x20000000
|
||||
|
||||
#define GOYA_PKT_CTL_RB_SHIFT 30
|
||||
#define GOYA_PKT_CTL_RB_MASK 0x40000000
|
||||
|
||||
#define GOYA_PKT_CTL_MB_SHIFT 31
|
||||
#define GOYA_PKT_CTL_MB_MASK 0x80000000
|
||||
|
||||
struct packet_nop {
|
||||
__le32 reserved;
|
||||
__le32 ctl;
|
||||
};
|
||||
|
||||
struct packet_stop {
|
||||
__le32 reserved;
|
||||
__le32 ctl;
|
||||
};
|
||||
|
||||
#define GOYA_PKT_WREG32_CTL_REG_OFFSET_SHIFT 0
|
||||
#define GOYA_PKT_WREG32_CTL_REG_OFFSET_MASK 0x0000FFFF
|
||||
|
||||
struct packet_wreg32 {
|
||||
__le32 value;
|
||||
__le32 ctl;
|
||||
};
|
||||
|
||||
struct packet_wreg_bulk {
|
||||
__le32 size64;
|
||||
__le32 ctl;
|
||||
__le64 values[0]; /* data starts here */
|
||||
};
|
||||
|
||||
struct packet_msg_long {
|
||||
__le32 value;
|
||||
__le32 ctl;
|
||||
__le64 addr;
|
||||
};
|
||||
|
||||
struct packet_msg_short {
|
||||
__le32 value;
|
||||
__le32 ctl;
|
||||
};
|
||||
|
||||
struct packet_msg_prot {
|
||||
__le32 value;
|
||||
__le32 ctl;
|
||||
__le64 addr;
|
||||
};
|
||||
|
||||
struct packet_fence {
|
||||
__le32 cfg;
|
||||
__le32 ctl;
|
||||
};
|
||||
|
||||
#define GOYA_PKT_LIN_DMA_CTL_WO_SHIFT 0
|
||||
#define GOYA_PKT_LIN_DMA_CTL_WO_MASK 0x00000001
|
||||
|
||||
#define GOYA_PKT_LIN_DMA_CTL_RDCOMP_SHIFT 1
|
||||
#define GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK 0x00000002
|
||||
|
||||
#define GOYA_PKT_LIN_DMA_CTL_WRCOMP_SHIFT 2
|
||||
#define GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK 0x00000004
|
||||
|
||||
#define GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT 6
|
||||
#define GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK 0x00000040
|
||||
|
||||
#define GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT 20
|
||||
#define GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK 0x00700000
|
||||
|
||||
struct packet_lin_dma {
|
||||
__le32 tsize;
|
||||
__le32 ctl;
|
||||
__le64 src_addr;
|
||||
__le64 dst_addr;
|
||||
};
|
||||
|
||||
struct packet_cp_dma {
|
||||
__le32 tsize;
|
||||
__le32 ctl;
|
||||
__le64 src_addr;
|
||||
};
|
||||
|
||||
#endif /* GOYA_PACKETS_H */
|
56
drivers/misc/habanalabs/include/qman_if.h
Normal file
56
drivers/misc/habanalabs/include/qman_if.h
Normal file
@ -0,0 +1,56 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
* Copyright 2016-2018 HabanaLabs, Ltd.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef QMAN_IF_H
|
||||
#define QMAN_IF_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
/*
|
||||
* PRIMARY QUEUE
|
||||
*/
|
||||
|
||||
struct hl_bd {
|
||||
__le64 ptr;
|
||||
__le32 len;
|
||||
__le32 ctl;
|
||||
};
|
||||
|
||||
#define HL_BD_SIZE sizeof(struct hl_bd)
|
||||
|
||||
/*
|
||||
* BD_CTL_REPEAT_VALID tells the CP whether the repeat field in the BD CTL is
|
||||
* valid. 1 means the repeat field is valid, 0 means not-valid,
|
||||
* i.e. repeat == 1
|
||||
*/
|
||||
#define BD_CTL_REPEAT_VALID_SHIFT 24
|
||||
#define BD_CTL_REPEAT_VALID_MASK 0x01000000
|
||||
|
||||
#define BD_CTL_SHADOW_INDEX_SHIFT 0
|
||||
#define BD_CTL_SHADOW_INDEX_MASK 0x00000FFF
|
||||
|
||||
/*
|
||||
* COMPLETION QUEUE
|
||||
*/
|
||||
|
||||
struct hl_cq_entry {
|
||||
__le32 data;
|
||||
};
|
||||
|
||||
#define HL_CQ_ENTRY_SIZE sizeof(struct hl_cq_entry)
|
||||
|
||||
#define CQ_ENTRY_READY_SHIFT 31
|
||||
#define CQ_ENTRY_READY_MASK 0x80000000
|
||||
|
||||
#define CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT 30
|
||||
#define CQ_ENTRY_SHADOW_INDEX_VALID_MASK 0x40000000
|
||||
|
||||
#define CQ_ENTRY_SHADOW_INDEX_SHIFT BD_CTL_SHADOW_INDEX_SHIFT
|
||||
#define CQ_ENTRY_SHADOW_INDEX_MASK BD_CTL_SHADOW_INDEX_MASK
|
||||
|
||||
|
||||
#endif /* QMAN_IF_H */
|
149
drivers/misc/habanalabs/irq.c
Normal file
149
drivers/misc/habanalabs/irq.c
Normal file
@ -0,0 +1,149 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
/*
|
||||
* Copyright 2016-2019 HabanaLabs, Ltd.
|
||||
* All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include "habanalabs.h"
|
||||
|
||||
#include <linux/irqreturn.h>
|
||||
|
||||
/*
|
||||
* hl_cq_inc_ptr - increment ci or pi of cq
|
||||
*
|
||||
* @ptr: the current ci or pi value of the completion queue
|
||||
*
|
||||
* Increment ptr by 1. If it reaches the number of completion queue
|
||||
* entries, set it to 0
|
||||
*/
|
||||
inline u32 hl_cq_inc_ptr(u32 ptr)
|
||||
{
|
||||
ptr++;
|
||||
if (unlikely(ptr == HL_CQ_LENGTH))
|
||||
ptr = 0;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/*
|
||||
* hl_irq_handler_cq - irq handler for completion queue
|
||||
*
|
||||
* @irq: irq number
|
||||
* @arg: pointer to completion queue structure
|
||||
*
|
||||
*/
|
||||
irqreturn_t hl_irq_handler_cq(int irq, void *arg)
|
||||
{
|
||||
struct hl_cq *cq = arg;
|
||||
struct hl_device *hdev = cq->hdev;
|
||||
struct hl_hw_queue *queue;
|
||||
struct hl_cs_job *job;
|
||||
bool shadow_index_valid;
|
||||
u16 shadow_index;
|
||||
u32 *cq_entry;
|
||||
u32 *cq_base;
|
||||
|
||||
if (hdev->disabled) {
|
||||
dev_dbg(hdev->dev,
|
||||
"Device disabled but received IRQ %d for CQ %d\n",
|
||||
irq, cq->hw_queue_id);
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
cq_base = (u32 *) (uintptr_t) cq->kernel_address;
|
||||
|
||||
while (1) {
|
||||
bool entry_ready = ((cq_base[cq->ci] & CQ_ENTRY_READY_MASK)
|
||||
>> CQ_ENTRY_READY_SHIFT);
|
||||
|
||||
if (!entry_ready)
|
||||
break;
|
||||
|
||||
cq_entry = (u32 *) &cq_base[cq->ci];
|
||||
|
||||
/*
|
||||
* Make sure we read CQ entry contents after we've
|
||||
* checked the ownership bit.
|
||||
*/
|
||||
dma_rmb();
|
||||
|
||||
shadow_index_valid =
|
||||
((*cq_entry & CQ_ENTRY_SHADOW_INDEX_VALID_MASK)
|
||||
>> CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT);
|
||||
|
||||
shadow_index = (u16)
|
||||
((*cq_entry & CQ_ENTRY_SHADOW_INDEX_MASK)
|
||||
>> CQ_ENTRY_SHADOW_INDEX_SHIFT);
|
||||
|
||||
queue = &hdev->kernel_queues[cq->hw_queue_id];
|
||||
|
||||
if ((shadow_index_valid) && (!hdev->disabled)) {
|
||||
job = queue->shadow_queue[hl_pi_2_offset(shadow_index)];
|
||||
queue_work(hdev->cq_wq, &job->finish_work);
|
||||
}
|
||||
|
||||
/*
|
||||
* Update ci of the context's queue. There is no
|
||||
* need to protect it with spinlock because this update is
|
||||
* done only inside IRQ and there is a different IRQ per
|
||||
* queue
|
||||
*/
|
||||
queue->ci = hl_queue_inc_ptr(queue->ci);
|
||||
|
||||
/* Clear CQ entry ready bit */
|
||||
cq_base[cq->ci] &= ~CQ_ENTRY_READY_MASK;
|
||||
|
||||
cq->ci = hl_cq_inc_ptr(cq->ci);
|
||||
|
||||
/* Increment free slots */
|
||||
atomic_inc(&cq->free_slots_cnt);
|
||||
}
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
/*
|
||||
* hl_cq_init - main initialization function for an cq object
|
||||
*
|
||||
* @hdev: pointer to device structure
|
||||
* @q: pointer to cq structure
|
||||
* @hw_queue_id: The H/W queue ID this completion queue belongs to
|
||||
*
|
||||
* Allocate dma-able memory for the completion queue and initialize fields
|
||||
* Returns 0 on success
|
||||
*/
|
||||
int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id)
|
||||
{
|
||||
void *p;
|
||||
|
||||
BUILD_BUG_ON(HL_CQ_SIZE_IN_BYTES > HL_PAGE_SIZE);
|
||||
|
||||
p = hdev->asic_funcs->dma_alloc_coherent(hdev, HL_CQ_SIZE_IN_BYTES,
|
||||
&q->bus_address, GFP_KERNEL | __GFP_ZERO);
|
||||
if (!p)
|
||||
return -ENOMEM;
|
||||
|
||||
q->hdev = hdev;
|
||||
q->kernel_address = (u64) (uintptr_t) p;
|
||||
q->hw_queue_id = hw_queue_id;
|
||||
q->ci = 0;
|
||||
q->pi = 0;
|
||||
|
||||
atomic_set(&q->free_slots_cnt, HL_CQ_LENGTH);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* hl_cq_fini - destroy completion queue
|
||||
*
|
||||
* @hdev: pointer to device structure
|
||||
* @q: pointer to cq structure
|
||||
*
|
||||
* Free the completion queue memory
|
||||
*/
|
||||
void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q)
|
||||
{
|
||||
hdev->asic_funcs->dma_free_coherent(hdev, HL_CQ_SIZE_IN_BYTES,
|
||||
(void *) (uintptr_t) q->kernel_address, q->bus_address);
|
||||
}
|
@ -17,6 +17,35 @@
|
||||
*/
|
||||
#define GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START 0x8000 /* 32KB */
|
||||
|
||||
/*
|
||||
* Queue Numbering
|
||||
*
|
||||
* The external queues (DMA channels + CPU) MUST be before the internal queues
|
||||
* and each group (DMA channels + CPU and internal) must be contiguous inside
|
||||
* itself but there can be a gap between the two groups (although not
|
||||
* recommended)
|
||||
*/
|
||||
|
||||
enum goya_queue_id {
|
||||
GOYA_QUEUE_ID_DMA_0 = 0,
|
||||
GOYA_QUEUE_ID_DMA_1,
|
||||
GOYA_QUEUE_ID_DMA_2,
|
||||
GOYA_QUEUE_ID_DMA_3,
|
||||
GOYA_QUEUE_ID_DMA_4,
|
||||
GOYA_QUEUE_ID_CPU_PQ,
|
||||
GOYA_QUEUE_ID_MME,
|
||||
GOYA_QUEUE_ID_TPC0,
|
||||
GOYA_QUEUE_ID_TPC1,
|
||||
GOYA_QUEUE_ID_TPC2,
|
||||
GOYA_QUEUE_ID_TPC3,
|
||||
GOYA_QUEUE_ID_TPC4,
|
||||
GOYA_QUEUE_ID_TPC5,
|
||||
GOYA_QUEUE_ID_TPC6,
|
||||
GOYA_QUEUE_ID_TPC7,
|
||||
GOYA_QUEUE_ID_SIZE
|
||||
};
|
||||
|
||||
|
||||
/* Opcode to create a new command buffer */
|
||||
#define HL_CB_OP_CREATE 0
|
||||
/* Opcode to destroy previously created command buffer */
|
||||
|
Loading…
Reference in New Issue
Block a user