#ifndef __AMDGPU_MES_H__
#define __AMDGPU_MES_H__
#include "amdgpu_irq.h"
#include "kgd_kfd_interface.h"
#include "amdgpu_gfx.h"
#include "amdgpu_doorbell.h"
#include <linux/sched/mm.h>
#define AMDGPU_MES_MAX_COMPUTE_PIPES 8
#define AMDGPU_MES_MAX_GFX_PIPES 2
#define AMDGPU_MES_MAX_SDMA_PIPES 2
#define AMDGPU_MES_API_VERSION_SHIFT 12
#define AMDGPU_MES_FEAT_VERSION_SHIFT 24
#define AMDGPU_MES_VERSION_MASK 0x00000fff
#define AMDGPU_MES_API_VERSION_MASK 0x00fff000
#define AMDGPU_MES_FEAT_VERSION_MASK 0xff000000
#define AMDGPU_MES_MSCRATCH_SIZE 0x40000
#define AMDGPU_MES_INVALID_DB_OFFSET 0xffffffff
enum amdgpu_mes_priority_level {
AMDGPU_MES_PRIORITY_LEVEL_LOW = 0,
AMDGPU_MES_PRIORITY_LEVEL_NORMAL = 1,
AMDGPU_MES_PRIORITY_LEVEL_MEDIUM = 2,
AMDGPU_MES_PRIORITY_LEVEL_HIGH = 3,
AMDGPU_MES_PRIORITY_LEVEL_REALTIME = 4,
AMDGPU_MES_PRIORITY_NUM_LEVELS
};
#define AMDGPU_MES_PROC_CTX_SIZE 0x1000
#define AMDGPU_MES_GANG_CTX_SIZE 0x1000
struct amdgpu_mes_funcs;
enum amdgpu_mes_pipe {
AMDGPU_MES_PIPE_0 = 0,
AMDGPU_MES_PIPE_1,
AMDGPU_MAX_MES_PIPES = 2,
};
#define AMDGPU_MES_SCHED_PIPE AMDGPU_MES_PIPE_0
#define AMDGPU_MES_KIQ_PIPE AMDGPU_MES_PIPE_1
#define AMDGPU_MAX_MES_INST_PIPES \
(AMDGPU_MAX_MES_PIPES * AMDGPU_MAX_GC_INSTANCES)
#define MES_PIPE_INST(xcc_id, pipe_id) \
(xcc_id * AMDGPU_MAX_MES_PIPES + pipe_id)
struct amdgpu_mes {
struct amdgpu_device *adev;
struct mutex mutex_hidden;
struct ida doorbell_ida;
spinlock_t queue_id_lock;
uint32_t sched_version;
uint32_t kiq_version;
uint32_t fw_version[AMDGPU_MAX_MES_PIPES];
bool enable_legacy_queue_map;
uint32_t total_max_queue;
uint32_t max_doorbell_slices;
uint64_t default_process_quantum;
uint64_t default_gang_quantum;
struct amdgpu_ring ring[AMDGPU_MAX_MES_INST_PIPES];
spinlock_t ring_lock[AMDGPU_MAX_MES_INST_PIPES];
const struct firmware *fw[AMDGPU_MAX_MES_PIPES];
struct amdgpu_bo *ucode_fw_obj[AMDGPU_MAX_MES_INST_PIPES];
uint64_t ucode_fw_gpu_addr[AMDGPU_MAX_MES_INST_PIPES];
uint32_t *ucode_fw_ptr[AMDGPU_MAX_MES_INST_PIPES];
uint64_t uc_start_addr[AMDGPU_MAX_MES_PIPES];
struct amdgpu_bo *data_fw_obj[AMDGPU_MAX_MES_INST_PIPES];
uint64_t data_fw_gpu_addr[AMDGPU_MAX_MES_INST_PIPES];
uint32_t *data_fw_ptr[AMDGPU_MAX_MES_INST_PIPES];
uint64_t data_start_addr[AMDGPU_MAX_MES_PIPES];
struct amdgpu_bo *eop_gpu_obj[AMDGPU_MAX_MES_INST_PIPES];
uint64_t eop_gpu_addr[AMDGPU_MAX_MES_INST_PIPES];
void *mqd_backup[AMDGPU_MAX_MES_INST_PIPES];
struct amdgpu_irq_src irq[AMDGPU_MAX_MES_INST_PIPES];
uint32_t vmid_mask_gfxhub;
uint32_t vmid_mask_mmhub;
uint32_t gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES];
uint32_t compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES];
uint32_t sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES];
uint32_t aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS];
uint32_t sch_ctx_offs[AMDGPU_MAX_MES_INST_PIPES];
uint64_t sch_ctx_gpu_addr[AMDGPU_MAX_MES_INST_PIPES];
uint64_t *sch_ctx_ptr[AMDGPU_MAX_MES_INST_PIPES];
uint32_t query_status_fence_offs[AMDGPU_MAX_MES_INST_PIPES];
uint64_t query_status_fence_gpu_addr[AMDGPU_MAX_MES_INST_PIPES];
uint64_t *query_status_fence_ptr[AMDGPU_MAX_MES_INST_PIPES];
uint32_t saved_flags;
int (*kiq_hw_init)(struct amdgpu_device *adev,
uint32_t xcc_id);
int (*kiq_hw_fini)(struct amdgpu_device *adev,
uint32_t xcc_id);
uint32_t db_start_dw_offset;
uint32_t num_mes_dbs;
unsigned long *doorbell_bitmap;
uint32_t event_log_size;
struct amdgpu_bo *event_log_gpu_obj;
uint64_t event_log_gpu_addr;
void *event_log_cpu_addr;
const struct amdgpu_mes_funcs *funcs;
struct amdgpu_bo *resource_1[AMDGPU_MAX_MES_PIPES];
uint64_t resource_1_gpu_addr[AMDGPU_MAX_MES_PIPES];
void *resource_1_addr[AMDGPU_MAX_MES_PIPES];
int hung_queue_db_array_size;
int hung_queue_hqd_info_offset;
struct amdgpu_bo *hung_queue_db_array_gpu_obj[AMDGPU_MAX_MES_PIPES];
uint64_t hung_queue_db_array_gpu_addr[AMDGPU_MAX_MES_PIPES];
void *hung_queue_db_array_cpu_addr[AMDGPU_MAX_MES_PIPES];
bool enable_coop_mode;
int master_xcc_ids[AMDGPU_MAX_MES_INST_PIPES];
struct amdgpu_bo *shared_cmd_buf_obj[AMDGPU_MAX_MES_INST_PIPES];
uint64_t shared_cmd_buf_gpu_addr[AMDGPU_MAX_MES_INST_PIPES];
};
struct amdgpu_mes_gang {
int gang_id;
int priority;
int inprocess_gang_priority;
int global_priority_level;
struct list_head list;
struct amdgpu_mes_process *process;
struct amdgpu_bo *gang_ctx_bo;
uint64_t gang_ctx_gpu_addr;
void *gang_ctx_cpu_ptr;
uint64_t gang_quantum;
struct list_head queue_list;
};
struct amdgpu_mes_queue {
struct list_head list;
struct amdgpu_mes_gang *gang;
int queue_id;
uint64_t doorbell_off;
struct amdgpu_bo *mqd_obj;
void *mqd_cpu_ptr;
uint64_t mqd_gpu_addr;
uint64_t wptr_gpu_addr;
int queue_type;
int paging;
struct amdgpu_ring *ring;
};
struct amdgpu_mes_queue_properties {
int queue_type;
uint64_t hqd_base_gpu_addr;
uint64_t rptr_gpu_addr;
uint64_t wptr_gpu_addr;
uint64_t wptr_mc_addr;
uint32_t queue_size;
uint64_t eop_gpu_addr;
uint32_t hqd_pipe_priority;
uint32_t hqd_queue_priority;
bool paging;
struct amdgpu_ring *ring;
uint64_t doorbell_off;
};
struct amdgpu_mes_gang_properties {
uint32_t priority;
uint32_t gang_quantum;
uint32_t inprocess_gang_priority;
uint32_t priority_level;
int global_priority_level;
};
struct mes_add_queue_input {
uint32_t xcc_id;
uint32_t process_id;
uint64_t page_table_base_addr;
uint64_t process_va_start;
uint64_t process_va_end;
uint64_t process_quantum;
uint64_t process_context_addr;
uint64_t gang_quantum;
uint64_t gang_context_addr;
uint32_t inprocess_gang_priority;
uint32_t gang_global_priority_level;
uint32_t doorbell_offset;
uint64_t mqd_addr;
uint64_t wptr_addr;
uint64_t wptr_mc_addr;
uint32_t queue_type;
uint32_t paging;
uint32_t gws_base;
uint32_t gws_size;
uint64_t tba_addr;
uint64_t tma_addr;
uint32_t trap_en;
uint32_t skip_process_ctx_clear;
uint32_t is_kfd_process;
uint32_t is_aql_queue;
uint32_t queue_size;
uint32_t exclusively_scheduled;
uint32_t sh_mem_config_data;
uint32_t vm_cntx_cntl;
};
struct mes_remove_queue_input {
uint32_t xcc_id;
uint32_t doorbell_offset;
uint64_t gang_context_addr;
bool remove_queue_after_reset;
};
struct mes_map_legacy_queue_input {
uint32_t xcc_id;
uint32_t queue_type;
uint32_t doorbell_offset;
uint32_t pipe_id;
uint32_t queue_id;
uint64_t mqd_addr;
uint64_t wptr_addr;
};
struct mes_unmap_legacy_queue_input {
uint32_t xcc_id;
enum amdgpu_unmap_queues_action action;
uint32_t queue_type;
uint32_t doorbell_offset;
uint32_t pipe_id;
uint32_t queue_id;
uint64_t trail_fence_addr;
uint64_t trail_fence_data;
};
struct mes_suspend_gang_input {
uint32_t xcc_id;
bool suspend_all_gangs;
uint64_t gang_context_addr;
uint64_t suspend_fence_addr;
uint32_t suspend_fence_value;
};
struct mes_resume_gang_input {
uint32_t xcc_id;
bool resume_all_gangs;
uint64_t gang_context_addr;
};
struct mes_reset_queue_input {
uint32_t xcc_id;
uint32_t queue_type;
uint32_t doorbell_offset;
bool use_mmio;
uint32_t me_id;
uint32_t pipe_id;
uint32_t queue_id;
uint64_t mqd_addr;
uint64_t wptr_addr;
uint32_t vmid;
bool legacy_gfx;
bool is_kq;
};
struct mes_detect_and_reset_queue_input {
uint32_t queue_type;
bool detect_only;
};
struct mes_inv_tlbs_pasid_input {
uint32_t xcc_id;
uint16_t pasid;
uint8_t hub_id;
uint8_t flush_type;
};
enum mes_misc_opcode {
MES_MISC_OP_WRITE_REG,
MES_MISC_OP_READ_REG,
MES_MISC_OP_WRM_REG_WAIT,
MES_MISC_OP_WRM_REG_WR_WAIT,
MES_MISC_OP_SET_SHADER_DEBUGGER,
MES_MISC_OP_CHANGE_CONFIG,
};
struct mes_misc_op_input {
uint32_t xcc_id;
enum mes_misc_opcode op;
union {
struct {
uint32_t reg_offset;
uint64_t buffer_addr;
} read_reg;
struct {
uint32_t reg_offset;
uint32_t reg_value;
} write_reg;
struct {
uint32_t ref;
uint32_t mask;
uint32_t reg0;
uint32_t reg1;
} wrm_reg;
struct {
uint64_t process_context_addr;
union {
struct {
uint32_t single_memop : 1;
uint32_t single_alu_op : 1;
uint32_t reserved: 29;
uint32_t process_ctx_flush: 1;
};
uint32_t u32all;
} flags;
uint32_t spi_gdbg_per_vmid_cntl;
uint32_t tcp_watch_cntl[4];
uint32_t trap_en;
} set_shader_debugger;
struct {
union {
struct {
uint32_t limit_single_process : 1;
uint32_t enable_hws_logging_buffer : 1;
uint32_t reserved : 30;
};
uint32_t all;
} option;
struct {
uint32_t tdr_level;
uint32_t tdr_delay;
} tdr_config;
} change_config;
};
};
struct amdgpu_mes_funcs {
int (*add_hw_queue)(struct amdgpu_mes *mes,
struct mes_add_queue_input *input);
int (*remove_hw_queue)(struct amdgpu_mes *mes,
struct mes_remove_queue_input *input);
int (*map_legacy_queue)(struct amdgpu_mes *mes,
struct mes_map_legacy_queue_input *input);
int (*unmap_legacy_queue)(struct amdgpu_mes *mes,
struct mes_unmap_legacy_queue_input *input);
int (*suspend_gang)(struct amdgpu_mes *mes,
struct mes_suspend_gang_input *input);
int (*resume_gang)(struct amdgpu_mes *mes,
struct mes_resume_gang_input *input);
int (*misc_op)(struct amdgpu_mes *mes,
struct mes_misc_op_input *input);
int (*reset_hw_queue)(struct amdgpu_mes *mes,
struct mes_reset_queue_input *input);
int (*detect_and_reset_hung_queues)(struct amdgpu_mes *mes,
struct mes_detect_and_reset_queue_input *input);
int (*invalidate_tlbs_pasid)(struct amdgpu_mes *mes,
struct mes_inv_tlbs_pasid_input *input);
};
#define amdgpu_mes_kiq_hw_init(adev, xcc_id) \
(adev)->mes.kiq_hw_init((adev), (xcc_id))
#define amdgpu_mes_kiq_hw_fini(adev, xcc_id) \
(adev)->mes.kiq_hw_fini((adev), (xcc_id))
int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe);
int amdgpu_mes_init(struct amdgpu_device *adev);
void amdgpu_mes_fini(struct amdgpu_device *adev);
int amdgpu_mes_suspend(struct amdgpu_device *adev);
int amdgpu_mes_resume(struct amdgpu_device *adev);
int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev,
struct amdgpu_ring *ring, uint32_t xcc_id);
int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
enum amdgpu_unmap_queues_action action,
u64 gpu_addr, u64 seq, uint32_t xcc_id);
int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
unsigned int vmid,
bool use_mmio,
uint32_t xcc_id);
int amdgpu_mes_get_hung_queue_db_array_size(struct amdgpu_device *adev);
int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev,
int queue_type,
bool detect_only,
unsigned int *hung_db_num,
u32 *hung_db_array,
uint32_t xcc_id);
uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg,
uint32_t xcc_id);
int amdgpu_mes_wreg(struct amdgpu_device *adev,
uint32_t reg, uint32_t val, uint32_t xcc_id);
int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t reg1,
uint32_t ref, uint32_t mask, uint32_t xcc_id);
int amdgpu_mes_hdp_flush(struct amdgpu_device *adev);
int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
uint64_t process_context_addr,
uint32_t spi_gdbg_per_vmid_cntl,
const uint32_t *tcp_watch_cntl,
uint32_t flags,
bool trap_en,
uint32_t xcc_id);
int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev,
uint64_t process_context_addr, uint32_t xcc_id);
uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev,
enum amdgpu_mes_priority_level prio);
int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev);
static inline void amdgpu_mes_lock(struct amdgpu_mes *mes)
{
mutex_lock(&mes->mutex_hidden);
mes->saved_flags = memalloc_noreclaim_save();
}
static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes)
{
memalloc_noreclaim_restore(mes->saved_flags);
mutex_unlock(&mes->mutex_hidden);
}
bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev);
int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev);
#endif