mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-24 23:34:40 +07:00
f8c8c7d5f1
This patch adds support for doing various on-the-fly reset of Goya. The driver supports two types of resets: 1. soft-reset 2. hard-reset Soft-reset is done when the device detects a timeout of a command submission that was given to the device. The soft-reset process only resets the engines that are relevant for the submission of compute jobs, i.e. the DMA channels, the TPCs and the MME. The purpose is to bring the device as fast as possible to a working state. Hard-reset is done in several cases: 1. After soft-reset is done but the device is not responding 2. When fatal errors occur inside the device, e.g. ECC error 3. When the driver is removed Hard-reset performs a reset of the entire chip except for the PCI controller and the PLLs. It is a much longer process then soft-reset but it helps to recover the device without the need to reboot the Host. After hard-reset, the driver will restore the max power attribute and in case of manual power management, the frequencies that were set. This patch also adds two entries to the sysfs, which allows the root user to initiate a soft or hard reset. Reviewed-by: Mike Rapoport <rppt@linux.ibm.com> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
804 lines
28 KiB
C
804 lines
28 KiB
C
/* SPDX-License-Identifier: GPL-2.0
|
|
*
|
|
* Copyright 2016-2019 HabanaLabs, Ltd.
|
|
* All Rights Reserved.
|
|
*
|
|
*/
|
|
|
|
#ifndef HABANALABSP_H_
|
|
#define HABANALABSP_H_
|
|
|
|
#include "include/armcp_if.h"
|
|
#include "include/qman_if.h"
|
|
|
|
#define pr_fmt(fmt) "habanalabs: " fmt
|
|
|
|
#include <linux/cdev.h>
|
|
#include <linux/iopoll.h>
|
|
#include <linux/irqreturn.h>
|
|
|
|
#define HL_NAME "habanalabs"
|
|
|
|
#define HL_MMAP_CB_MASK (0x8000000000000000ull >> PAGE_SHIFT)
|
|
|
|
#define HL_PENDING_RESET_PER_SEC 5
|
|
|
|
#define HL_DEVICE_TIMEOUT_USEC 1000000 /* 1 s */
|
|
|
|
#define HL_HEARTBEAT_PER_USEC 5000000 /* 5 s */
|
|
|
|
#define HL_PLL_LOW_JOB_FREQ_USEC 5000000 /* 5 s */
|
|
|
|
#define HL_MAX_QUEUES 128
|
|
|
|
struct hl_device;
|
|
struct hl_fpriv;
|
|
|
|
/**
|
|
* enum hl_queue_type - Supported QUEUE types.
|
|
* @QUEUE_TYPE_NA: queue is not available.
|
|
* @QUEUE_TYPE_EXT: external queue which is a DMA channel that may access the
|
|
* host.
|
|
* @QUEUE_TYPE_INT: internal queue that performs DMA inside the device's
|
|
* memories and/or operates the compute engines.
|
|
* @QUEUE_TYPE_CPU: S/W queue for communication with the device's CPU.
|
|
*/
|
|
enum hl_queue_type {
|
|
QUEUE_TYPE_NA,
|
|
QUEUE_TYPE_EXT,
|
|
QUEUE_TYPE_INT,
|
|
QUEUE_TYPE_CPU
|
|
};
|
|
|
|
/**
|
|
* struct hw_queue_properties - queue information.
|
|
* @type: queue type.
|
|
* @kmd_only: true if only KMD is allowed to send a job to this queue, false
|
|
* otherwise.
|
|
*/
|
|
struct hw_queue_properties {
|
|
enum hl_queue_type type;
|
|
u8 kmd_only;
|
|
};
|
|
|
|
/**
|
|
* enum hl_device_hw_state - H/W device state. use this to understand whether
|
|
* to do reset before hw_init or not
|
|
* @HL_DEVICE_HW_STATE_CLEAN: H/W state is clean. i.e. after hard reset
|
|
* @HL_DEVICE_HW_STATE_DIRTY: H/W state is dirty. i.e. we started to execute
|
|
* hw_init
|
|
*/
|
|
enum hl_device_hw_state {
|
|
HL_DEVICE_HW_STATE_CLEAN = 0,
|
|
HL_DEVICE_HW_STATE_DIRTY
|
|
};
|
|
|
|
/**
|
|
* struct asic_fixed_properties - ASIC specific immutable properties.
|
|
* @hw_queues_props: H/W queues properties.
|
|
* @armcp_info: received various information from ArmCP regarding the H/W. e.g.
|
|
* available sensors.
|
|
* @uboot_ver: F/W U-boot version.
|
|
* @preboot_ver: F/W Preboot version.
|
|
* @sram_base_address: SRAM physical start address.
|
|
* @sram_end_address: SRAM physical end address.
|
|
* @sram_user_base_address - SRAM physical start address for user access.
|
|
* @dram_base_address: DRAM physical start address.
|
|
* @dram_end_address: DRAM physical end address.
|
|
* @dram_user_base_address: DRAM physical start address for user access.
|
|
* @dram_size: DRAM total size.
|
|
* @dram_pci_bar_size: size of PCI bar towards DRAM.
|
|
* @host_phys_base_address: base physical address of host memory for
|
|
* transactions that the device generates.
|
|
* @max_power_default: max power of the device after reset
|
|
* @va_space_host_start_address: base address of virtual memory range for
|
|
* mapping host memory.
|
|
* @va_space_host_end_address: end address of virtual memory range for
|
|
* mapping host memory.
|
|
* @va_space_dram_start_address: base address of virtual memory range for
|
|
* mapping DRAM memory.
|
|
* @va_space_dram_end_address: end address of virtual memory range for
|
|
* mapping DRAM memory.
|
|
* @cfg_size: configuration space size on SRAM.
|
|
* @sram_size: total size of SRAM.
|
|
* @max_asid: maximum number of open contexts (ASIDs).
|
|
* @num_of_events: number of possible internal H/W IRQs.
|
|
* @psoc_pci_pll_nr: PCI PLL NR value.
|
|
* @psoc_pci_pll_nf: PCI PLL NF value.
|
|
* @psoc_pci_pll_od: PCI PLL OD value.
|
|
* @psoc_pci_pll_div_factor: PCI PLL DIV FACTOR 1 value.
|
|
* @completion_queues_count: number of completion queues.
|
|
* @high_pll: high PLL frequency used by the device.
|
|
* @cb_pool_cb_cnt: number of CBs in the CB pool.
|
|
* @cb_pool_cb_size: size of each CB in the CB pool.
|
|
* @tpc_enabled_mask: which TPCs are enabled.
|
|
*/
|
|
struct asic_fixed_properties {
|
|
struct hw_queue_properties hw_queues_props[HL_MAX_QUEUES];
|
|
struct armcp_info armcp_info;
|
|
char uboot_ver[VERSION_MAX_LEN];
|
|
char preboot_ver[VERSION_MAX_LEN];
|
|
u64 sram_base_address;
|
|
u64 sram_end_address;
|
|
u64 sram_user_base_address;
|
|
u64 dram_base_address;
|
|
u64 dram_end_address;
|
|
u64 dram_user_base_address;
|
|
u64 dram_size;
|
|
u64 dram_pci_bar_size;
|
|
u64 host_phys_base_address;
|
|
u64 max_power_default;
|
|
u64 va_space_host_start_address;
|
|
u64 va_space_host_end_address;
|
|
u64 va_space_dram_start_address;
|
|
u64 va_space_dram_end_address;
|
|
u32 cfg_size;
|
|
u32 sram_size;
|
|
u32 max_asid;
|
|
u32 num_of_events;
|
|
u32 psoc_pci_pll_nr;
|
|
u32 psoc_pci_pll_nf;
|
|
u32 psoc_pci_pll_od;
|
|
u32 psoc_pci_pll_div_factor;
|
|
u32 high_pll;
|
|
u32 cb_pool_cb_cnt;
|
|
u32 cb_pool_cb_size;
|
|
u8 completion_queues_count;
|
|
u8 tpc_enabled_mask;
|
|
};
|
|
|
|
|
|
/*
|
|
* Command Buffers
|
|
*/
|
|
|
|
#define HL_MAX_CB_SIZE 0x200000 /* 2MB */
|
|
|
|
/**
|
|
* struct hl_cb_mgr - describes a Command Buffer Manager.
|
|
* @cb_lock: protects cb_handles.
|
|
* @cb_handles: an idr to hold all command buffer handles.
|
|
*/
|
|
struct hl_cb_mgr {
|
|
spinlock_t cb_lock;
|
|
struct idr cb_handles; /* protected by cb_lock */
|
|
};
|
|
|
|
/**
|
|
* struct hl_cb - describes a Command Buffer.
|
|
* @refcount: reference counter for usage of the CB.
|
|
* @hdev: pointer to device this CB belongs to.
|
|
* @lock: spinlock to protect mmap/cs flows.
|
|
* @pool_list: node in pool list of command buffers.
|
|
* @kernel_address: Holds the CB's kernel virtual address.
|
|
* @bus_address: Holds the CB's DMA address.
|
|
* @mmap_size: Holds the CB's size that was mmaped.
|
|
* @size: holds the CB's size.
|
|
* @id: the CB's ID.
|
|
* @ctx_id: holds the ID of the owner's context.
|
|
* @mmap: true if the CB is currently mmaped to user.
|
|
* @is_pool: true if CB was acquired from the pool, false otherwise.
|
|
*/
|
|
struct hl_cb {
|
|
struct kref refcount;
|
|
struct hl_device *hdev;
|
|
spinlock_t lock;
|
|
struct list_head pool_list;
|
|
u64 kernel_address;
|
|
dma_addr_t bus_address;
|
|
u32 mmap_size;
|
|
u32 size;
|
|
u32 id;
|
|
u32 ctx_id;
|
|
u8 mmap;
|
|
u8 is_pool;
|
|
};
|
|
|
|
|
|
/*
|
|
* QUEUES
|
|
*/
|
|
|
|
struct hl_cs_job;
|
|
|
|
/*
|
|
* Currently, there are two limitations on the maximum length of a queue:
|
|
*
|
|
* 1. The memory footprint of the queue. The current allocated space for the
|
|
* queue is PAGE_SIZE. Because each entry in the queue is HL_BD_SIZE,
|
|
* the maximum length of the queue can be PAGE_SIZE / HL_BD_SIZE,
|
|
* which currently is 4096/16 = 256 entries.
|
|
*
|
|
* To increase that, we need either to decrease the size of the
|
|
* BD (difficult), or allocate more than a single page (easier).
|
|
*
|
|
* 2. Because the size of the JOB handle field in the BD CTL / completion queue
|
|
* is 10-bit, we can have up to 1024 open jobs per hardware queue.
|
|
* Therefore, each queue can hold up to 1024 entries.
|
|
*
|
|
* HL_QUEUE_LENGTH is in units of struct hl_bd.
|
|
* HL_QUEUE_LENGTH * sizeof(struct hl_bd) should be <= HL_PAGE_SIZE
|
|
*/
|
|
|
|
#define HL_PAGE_SIZE 4096 /* minimum page size */
|
|
/* Must be power of 2 (HL_PAGE_SIZE / HL_BD_SIZE) */
|
|
#define HL_QUEUE_LENGTH 256
|
|
#define HL_QUEUE_SIZE_IN_BYTES (HL_QUEUE_LENGTH * HL_BD_SIZE)
|
|
|
|
/*
|
|
* HL_CQ_LENGTH is in units of struct hl_cq_entry.
|
|
* HL_CQ_LENGTH should be <= HL_PAGE_SIZE
|
|
*/
|
|
#define HL_CQ_LENGTH HL_QUEUE_LENGTH
|
|
#define HL_CQ_SIZE_IN_BYTES (HL_CQ_LENGTH * HL_CQ_ENTRY_SIZE)
|
|
|
|
/* Must be power of 2 (HL_PAGE_SIZE / HL_EQ_ENTRY_SIZE) */
|
|
#define HL_EQ_LENGTH 64
|
|
#define HL_EQ_SIZE_IN_BYTES (HL_EQ_LENGTH * HL_EQ_ENTRY_SIZE)
|
|
|
|
|
|
/**
|
|
* struct hl_hw_queue - describes a H/W transport queue.
|
|
* @shadow_queue: pointer to a shadow queue that holds pointers to jobs.
|
|
* @queue_type: type of queue.
|
|
* @kernel_address: holds the queue's kernel virtual address.
|
|
* @bus_address: holds the queue's DMA address.
|
|
* @pi: holds the queue's pi value.
|
|
* @ci: holds the queue's ci value, AS CALCULATED BY THE DRIVER (not real ci).
|
|
* @hw_queue_id: the id of the H/W queue.
|
|
* @int_queue_len: length of internal queue (number of entries).
|
|
* @valid: is the queue valid (we have array of 32 queues, not all of them
|
|
* exists).
|
|
*/
|
|
struct hl_hw_queue {
|
|
struct hl_cs_job **shadow_queue;
|
|
enum hl_queue_type queue_type;
|
|
u64 kernel_address;
|
|
dma_addr_t bus_address;
|
|
u32 pi;
|
|
u32 ci;
|
|
u32 hw_queue_id;
|
|
u16 int_queue_len;
|
|
u8 valid;
|
|
};
|
|
|
|
/**
|
|
* struct hl_cq - describes a completion queue
|
|
* @hdev: pointer to the device structure
|
|
* @kernel_address: holds the queue's kernel virtual address
|
|
* @bus_address: holds the queue's DMA address
|
|
* @hw_queue_id: the id of the matching H/W queue
|
|
* @ci: ci inside the queue
|
|
* @pi: pi inside the queue
|
|
* @free_slots_cnt: counter of free slots in queue
|
|
*/
|
|
struct hl_cq {
|
|
struct hl_device *hdev;
|
|
u64 kernel_address;
|
|
dma_addr_t bus_address;
|
|
u32 hw_queue_id;
|
|
u32 ci;
|
|
u32 pi;
|
|
atomic_t free_slots_cnt;
|
|
};
|
|
|
|
/**
|
|
* struct hl_eq - describes the event queue (single one per device)
|
|
* @hdev: pointer to the device structure
|
|
* @kernel_address: holds the queue's kernel virtual address
|
|
* @bus_address: holds the queue's DMA address
|
|
* @ci: ci inside the queue
|
|
*/
|
|
struct hl_eq {
|
|
struct hl_device *hdev;
|
|
u64 kernel_address;
|
|
dma_addr_t bus_address;
|
|
u32 ci;
|
|
};
|
|
|
|
|
|
/*
|
|
* ASICs
|
|
*/
|
|
|
|
/**
|
|
* enum hl_asic_type - supported ASIC types.
|
|
* @ASIC_AUTO_DETECT: ASIC type will be automatically set.
|
|
* @ASIC_GOYA: Goya device.
|
|
* @ASIC_INVALID: Invalid ASIC type.
|
|
*/
|
|
enum hl_asic_type {
|
|
ASIC_AUTO_DETECT,
|
|
ASIC_GOYA,
|
|
ASIC_INVALID
|
|
};
|
|
|
|
/**
|
|
* enum hl_pm_mng_profile - power management profile.
|
|
* @PM_AUTO: internal clock is set by KMD.
|
|
* @PM_MANUAL: internal clock is set by the user.
|
|
* @PM_LAST: last power management type.
|
|
*/
|
|
enum hl_pm_mng_profile {
|
|
PM_AUTO = 1,
|
|
PM_MANUAL,
|
|
PM_LAST
|
|
};
|
|
|
|
/**
|
|
* enum hl_pll_frequency - PLL frequency.
|
|
* @PLL_HIGH: high frequency.
|
|
* @PLL_LOW: low frequency.
|
|
* @PLL_LAST: last frequency values that were configured by the user.
|
|
*/
|
|
enum hl_pll_frequency {
|
|
PLL_HIGH = 1,
|
|
PLL_LOW,
|
|
PLL_LAST
|
|
};
|
|
|
|
/**
|
|
* struct hl_asic_funcs - ASIC specific functions that are can be called from
|
|
* common code.
|
|
* @early_init: sets up early driver state (pre sw_init), doesn't configure H/W.
|
|
* @early_fini: tears down what was done in early_init.
|
|
* @late_init: sets up late driver/hw state (post hw_init) - Optional.
|
|
* @late_fini: tears down what was done in late_init (pre hw_fini) - Optional.
|
|
* @sw_init: sets up driver state, does not configure H/W.
|
|
* @sw_fini: tears down driver state, does not configure H/W.
|
|
* @hw_init: sets up the H/W state.
|
|
* @hw_fini: tears down the H/W state.
|
|
* @halt_engines: halt engines, needed for reset sequence. This also disables
|
|
* interrupts from the device. Should be called before
|
|
* hw_fini and before CS rollback.
|
|
* @suspend: handles IP specific H/W or SW changes for suspend.
|
|
* @resume: handles IP specific H/W or SW changes for resume.
|
|
* @mmap: mmap function, does nothing.
|
|
* @cb_mmap: maps a CB.
|
|
* @ring_doorbell: increment PI on a given QMAN.
|
|
* @flush_pq_write: flush PQ entry write if necessary, WARN if flushing failed.
|
|
* @dma_alloc_coherent: Allocate coherent DMA memory by calling
|
|
* dma_alloc_coherent(). This is ASIC function because its
|
|
* implementation is not trivial when the driver is loaded
|
|
* in simulation mode (not upstreamed).
|
|
* @dma_free_coherent: Free coherent DMA memory by calling dma_free_coherent().
|
|
* This is ASIC function because its implementation is not
|
|
* trivial when the driver is loaded in simulation mode
|
|
* (not upstreamed).
|
|
* @get_int_queue_base: get the internal queue base address.
|
|
* @test_queues: run simple test on all queues for sanity check.
|
|
* @dma_pool_zalloc: small DMA allocation of coherent memory from DMA pool.
|
|
* size of allocation is HL_DMA_POOL_BLK_SIZE.
|
|
* @dma_pool_free: free small DMA allocation from pool.
|
|
* @cpu_accessible_dma_pool_alloc: allocate CPU PQ packet from DMA pool.
|
|
* @cpu_accessible_dma_pool_free: free CPU PQ packet from DMA pool.
|
|
* @update_eq_ci: update event queue CI.
|
|
* @add_device_attr: add ASIC specific device attributes.
|
|
* @handle_eqe: handle event queue entry (IRQ) from ArmCP.
|
|
* @set_pll_profile: change PLL profile (manual/automatic).
|
|
* @get_events_stat: retrieve event queue entries histogram.
|
|
* @send_heartbeat: send is-alive packet to ArmCP and verify response.
|
|
* @enable_clock_gating: enable clock gating for reducing power consumption.
|
|
* @disable_clock_gating: disable clock for accessing registers on HBW.
|
|
* @soft_reset_late_init: perform certain actions needed after soft reset.
|
|
* @hw_queues_lock: acquire H/W queues lock.
|
|
* @hw_queues_unlock: release H/W queues lock.
|
|
* @get_eeprom_data: retrieve EEPROM data from F/W.
|
|
* @send_cpu_message: send buffer to ArmCP.
|
|
* @get_hw_state: retrieve the H/W state
|
|
*/
|
|
struct hl_asic_funcs {
|
|
int (*early_init)(struct hl_device *hdev);
|
|
int (*early_fini)(struct hl_device *hdev);
|
|
int (*late_init)(struct hl_device *hdev);
|
|
void (*late_fini)(struct hl_device *hdev);
|
|
int (*sw_init)(struct hl_device *hdev);
|
|
int (*sw_fini)(struct hl_device *hdev);
|
|
int (*hw_init)(struct hl_device *hdev);
|
|
void (*hw_fini)(struct hl_device *hdev, bool hard_reset);
|
|
void (*halt_engines)(struct hl_device *hdev, bool hard_reset);
|
|
int (*suspend)(struct hl_device *hdev);
|
|
int (*resume)(struct hl_device *hdev);
|
|
int (*mmap)(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
|
|
int (*cb_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
|
|
u64 kaddress, phys_addr_t paddress, u32 size);
|
|
void (*ring_doorbell)(struct hl_device *hdev, u32 hw_queue_id, u32 pi);
|
|
void (*flush_pq_write)(struct hl_device *hdev, u64 *pq, u64 exp_val);
|
|
void* (*dma_alloc_coherent)(struct hl_device *hdev, size_t size,
|
|
dma_addr_t *dma_handle, gfp_t flag);
|
|
void (*dma_free_coherent)(struct hl_device *hdev, size_t size,
|
|
void *cpu_addr, dma_addr_t dma_handle);
|
|
void* (*get_int_queue_base)(struct hl_device *hdev, u32 queue_id,
|
|
dma_addr_t *dma_handle, u16 *queue_len);
|
|
int (*test_queues)(struct hl_device *hdev);
|
|
void* (*dma_pool_zalloc)(struct hl_device *hdev, size_t size,
|
|
gfp_t mem_flags, dma_addr_t *dma_handle);
|
|
void (*dma_pool_free)(struct hl_device *hdev, void *vaddr,
|
|
dma_addr_t dma_addr);
|
|
void* (*cpu_accessible_dma_pool_alloc)(struct hl_device *hdev,
|
|
size_t size, dma_addr_t *dma_handle);
|
|
void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev,
|
|
size_t size, void *vaddr);
|
|
void (*update_eq_ci)(struct hl_device *hdev, u32 val);
|
|
void (*add_device_attr)(struct hl_device *hdev,
|
|
struct attribute_group *dev_attr_grp);
|
|
void (*handle_eqe)(struct hl_device *hdev,
|
|
struct hl_eq_entry *eq_entry);
|
|
void (*set_pll_profile)(struct hl_device *hdev,
|
|
enum hl_pll_frequency freq);
|
|
void* (*get_events_stat)(struct hl_device *hdev, u32 *size);
|
|
int (*send_heartbeat)(struct hl_device *hdev);
|
|
void (*enable_clock_gating)(struct hl_device *hdev);
|
|
void (*disable_clock_gating)(struct hl_device *hdev);
|
|
int (*soft_reset_late_init)(struct hl_device *hdev);
|
|
void (*hw_queues_lock)(struct hl_device *hdev);
|
|
void (*hw_queues_unlock)(struct hl_device *hdev);
|
|
int (*get_eeprom_data)(struct hl_device *hdev, void *data,
|
|
size_t max_size);
|
|
int (*send_cpu_message)(struct hl_device *hdev, u32 *msg,
|
|
u16 len, u32 timeout, long *result);
|
|
enum hl_device_hw_state (*get_hw_state)(struct hl_device *hdev);
|
|
};
|
|
|
|
|
|
/*
|
|
* CONTEXTS
|
|
*/
|
|
|
|
#define HL_KERNEL_ASID_ID 0
|
|
|
|
/**
|
|
* struct hl_ctx - user/kernel context.
|
|
* @hpriv: pointer to the private (KMD) data of the process (fd).
|
|
* @hdev: pointer to the device structure.
|
|
* @refcount: reference counter for the context. Context is released only when
|
|
* this hits 0l. It is incremented on CS and CS_WAIT.
|
|
* @asid: context's unique address space ID in the device's MMU.
|
|
*/
|
|
struct hl_ctx {
|
|
struct hl_fpriv *hpriv;
|
|
struct hl_device *hdev;
|
|
struct kref refcount;
|
|
u32 asid;
|
|
};
|
|
|
|
/**
|
|
* struct hl_ctx_mgr - for handling multiple contexts.
|
|
* @ctx_lock: protects ctx_handles.
|
|
* @ctx_handles: idr to hold all ctx handles.
|
|
*/
|
|
struct hl_ctx_mgr {
|
|
struct mutex ctx_lock;
|
|
struct idr ctx_handles;
|
|
};
|
|
|
|
|
|
/**
|
|
* struct hl_cs_job - command submission job.
|
|
* @finish_work: workqueue object to run when job is completed.
|
|
* @id: the id of this job inside a CS.
|
|
*/
|
|
struct hl_cs_job {
|
|
struct work_struct finish_work;
|
|
u32 id;
|
|
};
|
|
|
|
|
|
/*
|
|
* FILE PRIVATE STRUCTURE
|
|
*/
|
|
|
|
/**
|
|
* struct hl_fpriv - process information stored in FD private data.
|
|
* @hdev: habanalabs device structure.
|
|
* @filp: pointer to the given file structure.
|
|
* @taskpid: current process ID.
|
|
* @ctx: current executing context.
|
|
* @ctx_mgr: context manager to handle multiple context for this FD.
|
|
* @cb_mgr: command buffer manager to handle multiple buffers for this FD.
|
|
* @refcount: number of related contexts.
|
|
*/
|
|
struct hl_fpriv {
|
|
struct hl_device *hdev;
|
|
struct file *filp;
|
|
struct pid *taskpid;
|
|
struct hl_ctx *ctx; /* TODO: remove for multiple ctx */
|
|
struct hl_ctx_mgr ctx_mgr;
|
|
struct hl_cb_mgr cb_mgr;
|
|
struct kref refcount;
|
|
};
|
|
|
|
|
|
/*
|
|
* DEVICES
|
|
*/
|
|
|
|
/* Theoretical limit only. A single host can only contain up to 4 or 8 PCIe
|
|
* x16 cards. In extereme cases, there are hosts that can accommodate 16 cards
|
|
*/
|
|
#define HL_MAX_MINORS 256
|
|
|
|
/*
|
|
* Registers read & write functions.
|
|
*/
|
|
|
|
u32 hl_rreg(struct hl_device *hdev, u32 reg);
|
|
void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
|
|
|
|
#define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \
|
|
readl_poll_timeout(hdev->rmmio + addr, val, cond, sleep_us, timeout_us)
|
|
|
|
#define RREG32(reg) hl_rreg(hdev, (reg))
|
|
#define WREG32(reg, v) hl_wreg(hdev, (reg), (v))
|
|
#define DREG32(reg) pr_info("REGISTER: " #reg " : 0x%08X\n", \
|
|
hl_rreg(hdev, (reg)))
|
|
|
|
#define WREG32_P(reg, val, mask) \
|
|
do { \
|
|
u32 tmp_ = RREG32(reg); \
|
|
tmp_ &= (mask); \
|
|
tmp_ |= ((val) & ~(mask)); \
|
|
WREG32(reg, tmp_); \
|
|
} while (0)
|
|
#define WREG32_AND(reg, and) WREG32_P(reg, 0, and)
|
|
#define WREG32_OR(reg, or) WREG32_P(reg, or, ~(or))
|
|
|
|
#define REG_FIELD_SHIFT(reg, field) reg##_##field##_SHIFT
|
|
#define REG_FIELD_MASK(reg, field) reg##_##field##_MASK
|
|
#define WREG32_FIELD(reg, field, val) \
|
|
WREG32(mm##reg, (RREG32(mm##reg) & ~REG_FIELD_MASK(reg, field)) | \
|
|
(val) << REG_FIELD_SHIFT(reg, field))
|
|
|
|
struct hwmon_chip_info;
|
|
|
|
/**
|
|
* struct hl_device_reset_work - reset workqueue task wrapper.
|
|
* @reset_work: reset work to be done.
|
|
* @hdev: habanalabs device structure.
|
|
*/
|
|
struct hl_device_reset_work {
|
|
struct work_struct reset_work;
|
|
struct hl_device *hdev;
|
|
};
|
|
|
|
/**
|
|
* struct hl_device - habanalabs device structure.
|
|
* @pdev: pointer to PCI device, can be NULL in case of simulator device.
|
|
* @pcie_bar: array of available PCIe bars.
|
|
* @rmmio: configuration area address on SRAM.
|
|
* @cdev: related char device.
|
|
* @dev: realted kernel basic device structure.
|
|
* @work_freq: delayed work to lower device frequency if possible.
|
|
* @work_heartbeat: delayed work for ArmCP is-alive check.
|
|
* @asic_name: ASIC specific nmae.
|
|
* @asic_type: ASIC specific type.
|
|
* @completion_queue: array of hl_cq.
|
|
* @cq_wq: work queue of completion queues for executing work in process context
|
|
* @eq_wq: work queue of event queue for executing work in process context.
|
|
* @kernel_ctx: KMD context structure.
|
|
* @kernel_queues: array of hl_hw_queue.
|
|
* @kernel_cb_mgr: command buffer manager for creating/destroying/handling CGs.
|
|
* @event_queue: event queue for IRQ from ArmCP.
|
|
* @dma_pool: DMA pool for small allocations.
|
|
* @cpu_accessible_dma_mem: KMD <-> ArmCP shared memory CPU address.
|
|
* @cpu_accessible_dma_address: KMD <-> ArmCP shared memory DMA address.
|
|
* @cpu_accessible_dma_pool: KMD <-> ArmCP shared memory pool.
|
|
* @asid_bitmap: holds used/available ASIDs.
|
|
* @asid_mutex: protects asid_bitmap.
|
|
* @fd_open_cnt_lock: lock for updating fd_open_cnt in hl_device_open. Although
|
|
* fd_open_cnt is atomic, we need this lock to serialize
|
|
* the open function because the driver currently supports
|
|
* only a single process at a time. In addition, we need a
|
|
* lock here so we can flush user processes which are opening
|
|
* the device while we are trying to hard reset it
|
|
* @send_cpu_message_lock: enforces only one message in KMD <-> ArmCP queue.
|
|
* @asic_prop: ASIC specific immutable properties.
|
|
* @asic_funcs: ASIC specific functions.
|
|
* @asic_specific: ASIC specific information to use only from ASIC files.
|
|
* @hwmon_dev: H/W monitor device.
|
|
* @pm_mng_profile: current power management profile.
|
|
* @hl_chip_info: ASIC's sensors information.
|
|
* @cb_pool: list of preallocated CBs.
|
|
* @cb_pool_lock: protects the CB pool.
|
|
* @user_ctx: current user context executing.
|
|
* @in_reset: is device in reset flow.
|
|
* @curr_pll_profile: current PLL profile.
|
|
* @fd_open_cnt: number of open user processes.
|
|
* @max_power: the max power of the device, as configured by the sysadmin. This
|
|
* value is saved so in case of hard-reset, KMD will restore this
|
|
* value and update the F/W after the re-initialization
|
|
* @major: habanalabs KMD major.
|
|
* @high_pll: high PLL profile frequency.
|
|
* @soft_reset_cnt: number of soft reset since KMD loading.
|
|
* @hard_reset_cnt: number of hard reset since KMD loading.
|
|
* @id: device minor.
|
|
* @disabled: is device disabled.
|
|
* @late_init_done: is late init stage was done during initialization.
|
|
* @hwmon_initialized: is H/W monitor sensors was initialized.
|
|
* @hard_reset_pending: is there a hard reset work pending.
|
|
* @heartbeat: is heartbeat sanity check towards ArmCP enabled.
|
|
* @init_done: is the initialization of the device done.
|
|
*/
|
|
struct hl_device {
|
|
struct pci_dev *pdev;
|
|
void __iomem *pcie_bar[6];
|
|
void __iomem *rmmio;
|
|
struct cdev cdev;
|
|
struct device *dev;
|
|
struct delayed_work work_freq;
|
|
struct delayed_work work_heartbeat;
|
|
char asic_name[16];
|
|
enum hl_asic_type asic_type;
|
|
struct hl_cq *completion_queue;
|
|
struct workqueue_struct *cq_wq;
|
|
struct workqueue_struct *eq_wq;
|
|
struct hl_ctx *kernel_ctx;
|
|
struct hl_hw_queue *kernel_queues;
|
|
struct hl_cb_mgr kernel_cb_mgr;
|
|
struct hl_eq event_queue;
|
|
struct dma_pool *dma_pool;
|
|
void *cpu_accessible_dma_mem;
|
|
dma_addr_t cpu_accessible_dma_address;
|
|
struct gen_pool *cpu_accessible_dma_pool;
|
|
unsigned long *asid_bitmap;
|
|
struct mutex asid_mutex;
|
|
/* TODO: remove fd_open_cnt_lock for multiple process support */
|
|
struct mutex fd_open_cnt_lock;
|
|
struct mutex send_cpu_message_lock;
|
|
struct asic_fixed_properties asic_prop;
|
|
const struct hl_asic_funcs *asic_funcs;
|
|
void *asic_specific;
|
|
struct device *hwmon_dev;
|
|
enum hl_pm_mng_profile pm_mng_profile;
|
|
struct hwmon_chip_info *hl_chip_info;
|
|
|
|
struct list_head cb_pool;
|
|
spinlock_t cb_pool_lock;
|
|
|
|
/* TODO: remove user_ctx for multiple process support */
|
|
struct hl_ctx *user_ctx;
|
|
|
|
atomic_t in_reset;
|
|
atomic_t curr_pll_profile;
|
|
atomic_t fd_open_cnt;
|
|
u64 max_power;
|
|
u32 major;
|
|
u32 high_pll;
|
|
u32 soft_reset_cnt;
|
|
u32 hard_reset_cnt;
|
|
u16 id;
|
|
u8 disabled;
|
|
u8 late_init_done;
|
|
u8 hwmon_initialized;
|
|
u8 hard_reset_pending;
|
|
u8 heartbeat;
|
|
u8 init_done;
|
|
|
|
/* Parameters for bring-up */
|
|
u8 cpu_enable;
|
|
u8 reset_pcilink;
|
|
u8 cpu_queues_enable;
|
|
u8 fw_loading;
|
|
u8 pldm;
|
|
};
|
|
|
|
|
|
/*
|
|
* IOCTLs
|
|
*/
|
|
|
|
/**
|
|
* typedef hl_ioctl_t - typedef for ioctl function in the driver
|
|
* @hpriv: pointer to the FD's private data, which contains state of
|
|
* user process
|
|
* @data: pointer to the input/output arguments structure of the IOCTL
|
|
*
|
|
* Return: 0 for success, negative value for error
|
|
*/
|
|
typedef int hl_ioctl_t(struct hl_fpriv *hpriv, void *data);
|
|
|
|
/**
|
|
* struct hl_ioctl_desc - describes an IOCTL entry of the driver.
|
|
* @cmd: the IOCTL code as created by the kernel macros.
|
|
* @func: pointer to the driver's function that should be called for this IOCTL.
|
|
*/
|
|
struct hl_ioctl_desc {
|
|
unsigned int cmd;
|
|
hl_ioctl_t *func;
|
|
};
|
|
|
|
|
|
/*
|
|
* Kernel module functions that can be accessed by entire module
|
|
*/
|
|
|
|
int hl_device_open(struct inode *inode, struct file *filp);
|
|
bool hl_device_disabled_or_in_reset(struct hl_device *hdev);
|
|
int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
|
|
enum hl_asic_type asic_type, int minor);
|
|
void destroy_hdev(struct hl_device *hdev);
|
|
int hl_poll_timeout_memory(struct hl_device *hdev, u64 addr, u32 timeout_us,
|
|
u32 *val);
|
|
int hl_poll_timeout_device_memory(struct hl_device *hdev, void __iomem *addr,
|
|
u32 timeout_us, u32 *val);
|
|
int hl_hw_queues_create(struct hl_device *hdev);
|
|
void hl_hw_queues_destroy(struct hl_device *hdev);
|
|
int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
|
|
u32 cb_size, u64 cb_ptr);
|
|
u32 hl_hw_queue_add_ptr(u32 ptr, u16 val);
|
|
void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id);
|
|
void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset);
|
|
|
|
#define hl_queue_inc_ptr(p) hl_hw_queue_add_ptr(p, 1)
|
|
#define hl_pi_2_offset(pi) ((pi) & (HL_QUEUE_LENGTH - 1))
|
|
|
|
int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id);
|
|
void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q);
|
|
int hl_eq_init(struct hl_device *hdev, struct hl_eq *q);
|
|
void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q);
|
|
void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q);
|
|
void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q);
|
|
irqreturn_t hl_irq_handler_cq(int irq, void *arg);
|
|
irqreturn_t hl_irq_handler_eq(int irq, void *arg);
|
|
int hl_asid_init(struct hl_device *hdev);
|
|
void hl_asid_fini(struct hl_device *hdev);
|
|
unsigned long hl_asid_alloc(struct hl_device *hdev);
|
|
void hl_asid_free(struct hl_device *hdev, unsigned long asid);
|
|
|
|
int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv);
|
|
void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx);
|
|
int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx);
|
|
int hl_ctx_put(struct hl_ctx *ctx);
|
|
void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr);
|
|
void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr);
|
|
int hl_device_init(struct hl_device *hdev, struct class *hclass);
|
|
void hl_device_fini(struct hl_device *hdev);
|
|
int hl_device_suspend(struct hl_device *hdev);
|
|
int hl_device_resume(struct hl_device *hdev);
|
|
int hl_device_reset(struct hl_device *hdev, bool hard_reset,
|
|
bool from_hard_reset_thread);
|
|
void hl_hpriv_get(struct hl_fpriv *hpriv);
|
|
void hl_hpriv_put(struct hl_fpriv *hpriv);
|
|
int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq);
|
|
int hl_build_hwmon_channel_info(struct hl_device *hdev,
|
|
struct armcp_sensor *sensors_arr);
|
|
|
|
int hl_sysfs_init(struct hl_device *hdev);
|
|
void hl_sysfs_fini(struct hl_device *hdev);
|
|
|
|
int hl_hwmon_init(struct hl_device *hdev);
|
|
void hl_hwmon_fini(struct hl_device *hdev);
|
|
|
|
int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, u32 cb_size,
|
|
u64 *handle, int ctx_id);
|
|
int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle);
|
|
int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
|
|
struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr,
|
|
u32 handle);
|
|
void hl_cb_put(struct hl_cb *cb);
|
|
void hl_cb_mgr_init(struct hl_cb_mgr *mgr);
|
|
void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr);
|
|
struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size);
|
|
int hl_cb_pool_init(struct hl_device *hdev);
|
|
int hl_cb_pool_fini(struct hl_device *hdev);
|
|
|
|
void goya_set_asic_funcs(struct hl_device *hdev);
|
|
|
|
long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr);
|
|
void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq);
|
|
long hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr);
|
|
long hl_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr);
|
|
long hl_get_current(struct hl_device *hdev, int sensor_index, u32 attr);
|
|
long hl_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr);
|
|
long hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr);
|
|
void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
|
|
long value);
|
|
u64 hl_get_max_power(struct hl_device *hdev);
|
|
void hl_set_max_power(struct hl_device *hdev, u64 value);
|
|
|
|
/* IOCTLs */
|
|
long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
|
|
int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data);
|
|
|
|
#endif /* HABANALABSP_H_ */
|