#ifndef __RAS_CMD_H__
#define __RAS_CMD_H__
#include "ras.h"
#include "ras_eeprom.h"
#include "ras_log_ring.h"
#include "ras_cper.h"
#define RAS_CMD_DEV_HANDLE_MAGIC 0xFEEDAD00UL
#define RAS_CMD_MAX_IN_SIZE 256
#define RAS_CMD_MAX_GPU_NUM 32
#define RAS_CMD_MAX_BAD_PAGES_PER_GROUP 32
#define RAS_TA_INST_MASK 0xfffff000
#define RAS_TA_INST_SHIFT 0xc
enum ras_cmd_interface_type {
RAS_CMD_INTERFACE_TYPE_NONE,
RAS_CMD_INTERFACE_TYPE_AMDGPU,
RAS_CMD_INTERFACE_TYPE_VF,
RAS_CMD_INTERFACE_TYPE_PF,
};
enum ras_cmd_id_range {
RAS_CMD_ID_COMMON_START = 0,
RAS_CMD_ID_COMMON_END = 0x10000,
RAS_CMD_ID_AMDGPU_START = RAS_CMD_ID_COMMON_END,
RAS_CMD_ID_AMDGPU_END = 0x20000,
RAS_CMD_ID_MXGPU_START = RAS_CMD_ID_AMDGPU_END,
RAS_CMD_ID_MXGPU_END = 0x30000,
RAS_CMD_ID_MXGPU_VF_START = RAS_CMD_ID_MXGPU_END,
RAS_CMD_ID_MXGPU_VF_END = 0x40000,
};
enum ras_cmd_id {
RAS_CMD__BEGIN = RAS_CMD_ID_COMMON_START,
RAS_CMD__QUERY_INTERFACE_INFO,
RAS_CMD__GET_DEVICES_INFO,
RAS_CMD__GET_BLOCK_ECC_STATUS,
RAS_CMD__INJECT_ERROR,
RAS_CMD__GET_BAD_PAGES,
RAS_CMD__CLEAR_BAD_PAGE_INFO,
RAS_CMD__RESET_ALL_ERROR_COUNTS,
RAS_CMD__GET_SAFE_FB_ADDRESS_RANGES,
RAS_CMD__TRANSLATE_FB_ADDRESS,
RAS_CMD__GET_LINK_TOPOLOGY,
RAS_CMD__GET_CPER_SNAPSHOT,
RAS_CMD__GET_CPER_RECORD,
RAS_CMD__GET_BATCH_TRACE_SNAPSHOT,
RAS_CMD__GET_BATCH_TRACE_RECORD,
RAS_CMD__GET_ALL_BLOCK_ECC_STATUS,
RAS_CMD__SET_CMD_AUTO_UPDATE,
RAS_CMD__SUPPORTED_MAX = RAS_CMD_ID_COMMON_END,
};
enum ras_cmd_response {
RAS_CMD__SUCCESS = 0,
RAS_CMD__SUCCESS_EXEED_BUFFER,
RAS_CMD__ERROR_UKNOWN_CMD,
RAS_CMD__ERROR_INVALID_CMD,
RAS_CMD__ERROR_VERSION,
RAS_CMD__ERROR_INVALID_INPUT_SIZE,
RAS_CMD__ERROR_INVALID_INPUT_DATA,
RAS_CMD__ERROR_DRV_INIT_FAIL,
RAS_CMD__ERROR_ACCESS_DENIED,
RAS_CMD__ERROR_GENERIC,
RAS_CMD__ERROR_TIMEOUT,
};
enum ras_error_type {
RAS_TYPE_ERROR__NONE = 0,
RAS_TYPE_ERROR__PARITY = 1,
RAS_TYPE_ERROR__SINGLE_CORRECTABLE = 2,
RAS_TYPE_ERROR__MULTI_UNCORRECTABLE = 4,
RAS_TYPE_ERROR__POISON = 8,
};
struct ras_core_context;
struct ras_cmd_ctx;
struct ras_cmd_mgr {
struct list_head head;
struct ras_core_context *ras_core;
uint64_t dev_handle;
};
struct ras_cmd_func_map {
uint32_t cmd_id;
int (*func)(struct ras_core_context *ras_core,
struct ras_cmd_ctx *cmd, void *data);
};
struct ras_device_bdf {
union {
struct {
uint32_t function : 3;
uint32_t device : 5;
uint32_t bus : 8;
uint32_t domain : 16;
};
uint32_t u32_all;
};
};
struct ras_cmd_param {
uint32_t idx_vf;
void *data;
};
#pragma pack(push, 8)
struct ras_cmd_ctx {
uint32_t magic;
union {
struct {
uint16_t ras_cmd_minor_ver : 10;
uint16_t ras_cmd_major_ver : 6;
};
uint16_t ras_cmd_ver;
};
union {
struct {
uint16_t plat_major_ver : 10;
uint16_t plat_minor_ver : 6;
};
uint16_t plat_ver;
};
uint32_t cmd_id;
uint32_t cmd_res;
uint32_t input_size;
uint32_t output_size;
uint32_t output_buf_size;
uint32_t reserved[5];
uint8_t input_buff_raw[RAS_CMD_MAX_IN_SIZE];
uint8_t output_buff_raw[];
};
struct ras_cmd_dev_handle {
uint64_t dev_handle;
};
struct ras_cmd_block_ecc_info_req {
struct ras_cmd_dev_handle dev;
uint32_t block_id;
uint32_t subblock_id;
uint32_t reserved[4];
};
struct ras_cmd_block_ecc_info_rsp {
uint32_t version;
uint32_t ce_count;
uint32_t ue_count;
uint32_t de_count;
uint32_t reserved[6];
};
struct ras_cmd_inject_error_req {
struct ras_cmd_dev_handle dev;
uint32_t block_id;
uint32_t subblock_id;
uint64_t address;
uint32_t error_type;
uint32_t instance_mask;
union {
struct {
uint64_t vf_idx : 6;
uint64_t method : 10;
uint64_t rsv : 48;
};
uint64_t value;
};
uint32_t reserved[8];
};
struct ras_cmd_inject_error_rsp {
uint32_t version;
uint32_t reserved[5];
uint64_t address;
};
struct ras_cmd_dev_info {
uint64_t dev_handle;
uint32_t location_id;
uint32_t ecc_enabled;
uint32_t ecc_supported;
uint32_t vf_num;
uint32_t asic_type;
uint32_t oam_id;
uint32_t reserved[8];
};
struct ras_cmd_devices_info_rsp {
uint32_t version;
uint32_t dev_num;
uint32_t reserved[6];
struct ras_cmd_dev_info devs[RAS_CMD_MAX_GPU_NUM];
};
struct ras_cmd_bad_page_record {
union {
uint64_t address;
uint64_t offset;
};
uint64_t retired_page;
uint64_t ts;
uint32_t err_type;
union {
unsigned char bank;
unsigned char cu;
};
unsigned char mem_channel;
unsigned char mcumc_id;
unsigned char valid;
unsigned char reserved[8];
};
struct ras_cmd_bad_pages_info_req {
struct ras_cmd_dev_handle device;
uint32_t group_index;
uint32_t reserved[5];
};
struct ras_cmd_bad_pages_info_rsp {
uint32_t version;
uint32_t group_index;
uint32_t bp_in_group;
uint32_t bp_total_cnt;
uint32_t reserved[4];
struct ras_cmd_bad_page_record records[RAS_CMD_MAX_BAD_PAGES_PER_GROUP];
};
struct ras_query_interface_info_req {
uint32_t reserved[8];
};
struct ras_query_interface_info_rsp {
uint32_t version;
uint32_t ras_cmd_major_ver;
uint32_t ras_cmd_minor_ver;
uint32_t plat_major_ver;
uint32_t plat_minor_ver;
uint8_t interface_type;
uint8_t rsv[3];
uint32_t reserved[8];
};
#define RAS_MAX_NUM_SAFE_RANGES 64
struct ras_cmd_ras_safe_fb_address_ranges_rsp {
uint32_t version;
uint32_t num_ranges;
uint32_t reserved[4];
struct {
uint64_t start;
uint64_t size;
uint32_t idx;
uint32_t reserved[3];
} range[RAS_MAX_NUM_SAFE_RANGES];
};
enum ras_fb_addr_type {
RAS_FB_ADDR_SOC_PHY,
RAS_FB_ADDR_BANK,
RAS_FB_ADDR_VF_PHY,
RAS_FB_ADDR_UNKNOWN
};
struct ras_fb_bank_addr {
uint32_t stack_id;
uint32_t bank_group;
uint32_t bank;
uint32_t row;
uint32_t column;
uint32_t channel;
uint32_t subchannel;
uint32_t reserved[3];
};
struct ras_fb_vf_phy_addr {
uint32_t vf_idx;
uint32_t reserved;
uint64_t addr;
};
union ras_translate_fb_address {
struct ras_fb_bank_addr bank_addr;
uint64_t soc_phy_addr;
struct ras_fb_vf_phy_addr vf_phy_addr;
};
struct ras_cmd_translate_fb_address_req {
struct ras_cmd_dev_handle dev;
enum ras_fb_addr_type src_addr_type;
enum ras_fb_addr_type dest_addr_type;
union ras_translate_fb_address trans_addr;
};
struct ras_cmd_translate_fb_address_rsp {
uint32_t version;
uint32_t reserved[5];
union ras_translate_fb_address trans_addr;
};
struct ras_dev_link_topology_req {
struct ras_cmd_dev_handle src;
struct ras_cmd_dev_handle dst;
};
struct ras_dev_link_topology_rsp {
uint32_t version;
uint32_t link_status;
uint32_t link_type;
uint32_t num_hops;
uint32_t reserved[8];
};
struct ras_cmd_cper_snapshot_req {
struct ras_cmd_dev_handle dev;
};
struct ras_cmd_cper_snapshot_rsp {
uint32_t version;
uint32_t reserved[4];
uint32_t total_cper_num;
uint64_t start_cper_id;
uint64_t latest_cper_id;
};
struct ras_cmd_cper_record_req {
struct ras_cmd_dev_handle dev;
uint64_t cper_start_id;
uint32_t cper_num;
uint32_t buf_size;
uint64_t buf_ptr;
uint32_t reserved[4];
};
struct ras_cmd_cper_record_rsp {
uint32_t version;
uint32_t real_data_size;
uint32_t real_cper_num;
uint32_t remain_num;
uint32_t reserved[4];
};
struct ras_cmd_batch_trace_snapshot_req {
struct ras_cmd_dev_handle dev;
};
struct ras_cmd_batch_trace_snapshot_rsp {
uint32_t version;
uint32_t reserved[4];
uint32_t total_batch_num;
uint64_t start_batch_id;
uint64_t latest_batch_id;
};
struct ras_cmd_batch_trace_record_req {
struct ras_cmd_dev_handle dev;
uint64_t start_batch_id;
uint32_t batch_num;
uint32_t reserved[5];
};
struct batch_ras_trace_info {
uint64_t batch_id;
uint16_t offset;
uint8_t trace_num;
uint8_t rsv;
uint32_t reserved;
};
#define RAS_CMD_MAX_BATCH_NUM 300
#define RAS_CMD_MAX_TRACE_NUM 300
struct ras_cmd_batch_trace_record_rsp {
uint32_t version;
uint16_t real_batch_num;
uint16_t remain_num;
uint64_t start_batch_id;
uint32_t reserved[2];
struct batch_ras_trace_info batchs[RAS_CMD_MAX_BATCH_NUM];
struct ras_log_info records[RAS_CMD_MAX_TRACE_NUM];
};
struct ras_cmd_auto_update_req {
struct ras_cmd_dev_handle dev;
uint32_t mode;
uint32_t cmd_id;
uint64_t addr;
uint32_t len;
uint32_t reserved[5];
};
struct ras_cmd_auto_update_rsp {
uint32_t version;
uint32_t reserved[4];
};
struct ras_cmd_blocks_ecc_req {
struct ras_cmd_dev_handle dev;
};
struct ras_cmd_block_ecc {
uint32_t ce_count;
uint32_t ue_count;
uint32_t de_count;
};
#define MAX_RAS_BLOCK_NUM 20
struct ras_cmd_blocks_ecc_rsp {
uint32_t version;
uint32_t reserved[5];
struct ras_cmd_block_ecc blocks[MAX_RAS_BLOCK_NUM];
};
#pragma pack(pop)
int ras_cmd_init(struct ras_core_context *ras_core);
int ras_cmd_fini(struct ras_core_context *ras_core);
int rascore_handle_cmd(struct ras_core_context *ras_core, struct ras_cmd_ctx *cmd, void *data);
uint64_t ras_cmd_get_dev_handle(struct ras_core_context *ras_core);
int ras_cmd_query_interface_info(struct ras_core_context *ras_core,
struct ras_query_interface_info_rsp *rsp);
int ras_cmd_translate_soc_pa_to_bank(struct ras_core_context *ras_core,
uint64_t soc_pa, struct ras_fb_bank_addr *bank_addr);
int ras_cmd_translate_bank_to_soc_pa(struct ras_core_context *ras_core,
struct ras_fb_bank_addr bank_addr, uint64_t *soc_pa);
#endif