root/drivers/gpu/drm/amd/ras/rascore/ras_cmd.h
/* SPDX-License-Identifier: MIT */
/*
 * Copyright 2025 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __RAS_CMD_H__
#define __RAS_CMD_H__
#include "ras.h"
#include "ras_eeprom.h"
#include "ras_log_ring.h"
#include "ras_cper.h"

#define RAS_CMD_DEV_HANDLE_MAGIC 0xFEEDAD00UL

#define RAS_CMD_MAX_IN_SIZE 256
#define RAS_CMD_MAX_GPU_NUM 32
#define RAS_CMD_MAX_BAD_PAGES_PER_GROUP 32

/* position of instance value in sub_block_index of
 * ta_ras_trigger_error_input, the sub block uses lower 12 bits
 */
#define RAS_TA_INST_MASK 0xfffff000
#define RAS_TA_INST_SHIFT 0xc

enum ras_cmd_interface_type {
        RAS_CMD_INTERFACE_TYPE_NONE,
        RAS_CMD_INTERFACE_TYPE_AMDGPU,
        RAS_CMD_INTERFACE_TYPE_VF,
        RAS_CMD_INTERFACE_TYPE_PF,
};

enum ras_cmd_id_range {
        RAS_CMD_ID_COMMON_START = 0,
        RAS_CMD_ID_COMMON_END = 0x10000,
        RAS_CMD_ID_AMDGPU_START = RAS_CMD_ID_COMMON_END,
        RAS_CMD_ID_AMDGPU_END = 0x20000,
        RAS_CMD_ID_MXGPU_START = RAS_CMD_ID_AMDGPU_END,
        RAS_CMD_ID_MXGPU_END = 0x30000,
        RAS_CMD_ID_MXGPU_VF_START = RAS_CMD_ID_MXGPU_END,
        RAS_CMD_ID_MXGPU_VF_END = 0x40000,
};

enum ras_cmd_id {
        RAS_CMD__BEGIN = RAS_CMD_ID_COMMON_START,
        RAS_CMD__QUERY_INTERFACE_INFO,
        RAS_CMD__GET_DEVICES_INFO,
        RAS_CMD__GET_BLOCK_ECC_STATUS,
        RAS_CMD__INJECT_ERROR,
        RAS_CMD__GET_BAD_PAGES,
        RAS_CMD__CLEAR_BAD_PAGE_INFO,
        RAS_CMD__RESET_ALL_ERROR_COUNTS,
        RAS_CMD__GET_SAFE_FB_ADDRESS_RANGES,
        RAS_CMD__TRANSLATE_FB_ADDRESS,
        RAS_CMD__GET_LINK_TOPOLOGY,
        RAS_CMD__GET_CPER_SNAPSHOT,
        RAS_CMD__GET_CPER_RECORD,
        RAS_CMD__GET_BATCH_TRACE_SNAPSHOT,
        RAS_CMD__GET_BATCH_TRACE_RECORD,
        RAS_CMD__GET_ALL_BLOCK_ECC_STATUS,
        RAS_CMD__SET_CMD_AUTO_UPDATE,
        RAS_CMD__SUPPORTED_MAX = RAS_CMD_ID_COMMON_END,
};

enum ras_cmd_response {
        RAS_CMD__SUCCESS = 0,
        RAS_CMD__SUCCESS_EXEED_BUFFER,
        RAS_CMD__ERROR_UKNOWN_CMD,
        RAS_CMD__ERROR_INVALID_CMD,
        RAS_CMD__ERROR_VERSION,
        RAS_CMD__ERROR_INVALID_INPUT_SIZE,
        RAS_CMD__ERROR_INVALID_INPUT_DATA,
        RAS_CMD__ERROR_DRV_INIT_FAIL,
        RAS_CMD__ERROR_ACCESS_DENIED,
        RAS_CMD__ERROR_GENERIC,
        RAS_CMD__ERROR_TIMEOUT,
};

enum ras_error_type {
        RAS_TYPE_ERROR__NONE = 0,
        RAS_TYPE_ERROR__PARITY = 1,
        RAS_TYPE_ERROR__SINGLE_CORRECTABLE = 2,
        RAS_TYPE_ERROR__MULTI_UNCORRECTABLE = 4,
        RAS_TYPE_ERROR__POISON = 8,
};

struct ras_core_context;
struct ras_cmd_ctx;

struct ras_cmd_mgr {
        struct list_head head;
        struct ras_core_context *ras_core;
        uint64_t dev_handle;
};

struct ras_cmd_func_map {
        uint32_t cmd_id;
        int (*func)(struct ras_core_context *ras_core,
                        struct ras_cmd_ctx *cmd, void *data);
};

struct ras_device_bdf {
        union {
                struct {
                        uint32_t function : 3;
                        uint32_t device : 5;
                        uint32_t bus : 8;
                        uint32_t domain : 16;
                };
                uint32_t u32_all;
        };
};

struct ras_cmd_param {
        uint32_t idx_vf;
        void *data;
};

#pragma pack(push, 8)
struct ras_cmd_ctx {
        uint32_t magic;
        union {
                struct {
                        uint16_t ras_cmd_minor_ver : 10;
                        uint16_t ras_cmd_major_ver : 6;
                };
                uint16_t ras_cmd_ver;
        };
        union {
                struct {
                        uint16_t plat_major_ver : 10;
                        uint16_t plat_minor_ver : 6;
                };
                uint16_t plat_ver;
        };
        uint32_t cmd_id;
        uint32_t cmd_res;
        uint32_t input_size;
        uint32_t output_size;
        uint32_t output_buf_size;
        uint32_t reserved[5];
        uint8_t  input_buff_raw[RAS_CMD_MAX_IN_SIZE];
        uint8_t  output_buff_raw[];
};

struct ras_cmd_dev_handle {
        uint64_t dev_handle;
};

struct ras_cmd_block_ecc_info_req {
        struct ras_cmd_dev_handle dev;
        uint32_t block_id;
        uint32_t subblock_id;
        uint32_t reserved[4];
};

struct ras_cmd_block_ecc_info_rsp {
        uint32_t version;
        uint32_t ce_count;
        uint32_t ue_count;
        uint32_t de_count;
        uint32_t reserved[6];
};

struct ras_cmd_inject_error_req {
        struct ras_cmd_dev_handle dev;
        uint32_t block_id;
        uint32_t subblock_id;
        uint64_t address;
        uint32_t error_type;
        uint32_t instance_mask;
        union {
                struct {
                        /* vf index */
                        uint64_t vf_idx : 6;
                        /* method of error injection. i.e persistent, coherent etc */
                        uint64_t method : 10;
                        uint64_t rsv    : 48;
                };
                uint64_t value;
        };
        uint32_t reserved[8];
};

struct ras_cmd_inject_error_rsp {
        uint32_t version;
        uint32_t reserved[5];
        uint64_t address;
};

struct ras_cmd_dev_info {
        uint64_t dev_handle;
        uint32_t location_id;
        uint32_t ecc_enabled;
        uint32_t ecc_supported;
        uint32_t vf_num;
        uint32_t asic_type;
        uint32_t oam_id;
        uint32_t reserved[8];
};

struct ras_cmd_devices_info_rsp {
        uint32_t version;
        uint32_t dev_num;
        uint32_t reserved[6];
        struct ras_cmd_dev_info devs[RAS_CMD_MAX_GPU_NUM];
};

struct ras_cmd_bad_page_record {
        union {
                uint64_t address;
                uint64_t offset;
        };
        uint64_t retired_page;
        uint64_t ts;

        uint32_t err_type;

        union {
                unsigned char bank;
                unsigned char cu;
        };

        unsigned char mem_channel;
        unsigned char mcumc_id;

        unsigned char valid;
        unsigned char reserved[8];
};

struct ras_cmd_bad_pages_info_req {
        struct ras_cmd_dev_handle device;
        uint32_t group_index;
        uint32_t reserved[5];
};

struct ras_cmd_bad_pages_info_rsp {
        uint32_t version;
        uint32_t group_index;
        uint32_t bp_in_group;
        uint32_t bp_total_cnt;
        uint32_t reserved[4];
        struct ras_cmd_bad_page_record records[RAS_CMD_MAX_BAD_PAGES_PER_GROUP];
};

struct ras_query_interface_info_req {
        uint32_t reserved[8];
};

struct ras_query_interface_info_rsp {
        uint32_t version;
        uint32_t ras_cmd_major_ver;
        uint32_t ras_cmd_minor_ver;
        uint32_t plat_major_ver;
        uint32_t plat_minor_ver;
        uint8_t  interface_type;
        uint8_t  rsv[3];
        uint32_t reserved[8];
};

#define RAS_MAX_NUM_SAFE_RANGES 64
struct ras_cmd_ras_safe_fb_address_ranges_rsp {
        uint32_t version;
        uint32_t num_ranges;
        uint32_t reserved[4];
        struct {
                uint64_t start;
                uint64_t size;
                uint32_t idx;
                uint32_t reserved[3];
        } range[RAS_MAX_NUM_SAFE_RANGES];
};

enum ras_fb_addr_type {
        RAS_FB_ADDR_SOC_PHY, /* SPA */
        RAS_FB_ADDR_BANK,
        RAS_FB_ADDR_VF_PHY, /* GPA */
        RAS_FB_ADDR_UNKNOWN
};

struct ras_fb_bank_addr {
        uint32_t stack_id; /* SID */
        uint32_t bank_group;
        uint32_t bank;
        uint32_t row;
        uint32_t column;
        uint32_t channel;
        uint32_t subchannel; /* Also called Pseudochannel (PC) */
        uint32_t reserved[3];
};

struct ras_fb_vf_phy_addr {
        uint32_t vf_idx;
        uint32_t reserved;
        uint64_t addr;
};

union ras_translate_fb_address {
        struct ras_fb_bank_addr bank_addr;
        uint64_t soc_phy_addr;
        struct ras_fb_vf_phy_addr vf_phy_addr;
};

struct ras_cmd_translate_fb_address_req {
        struct ras_cmd_dev_handle dev;
        enum ras_fb_addr_type src_addr_type;
        enum ras_fb_addr_type dest_addr_type;
        union ras_translate_fb_address trans_addr;
};

struct ras_cmd_translate_fb_address_rsp {
        uint32_t version;
        uint32_t reserved[5];
        union ras_translate_fb_address trans_addr;
};

struct ras_dev_link_topology_req {
        struct ras_cmd_dev_handle src;
        struct ras_cmd_dev_handle dst;
};

struct ras_dev_link_topology_rsp {
        uint32_t  version;
        uint32_t  link_status;  /* HW status of the link */
        uint32_t  link_type;    /* type of the link */
        uint32_t  num_hops;     /* number of hops */
        uint32_t reserved[8];
};

struct ras_cmd_cper_snapshot_req {
        struct ras_cmd_dev_handle dev;
};

struct ras_cmd_cper_snapshot_rsp {
        uint32_t version;
        uint32_t reserved[4];
        uint32_t total_cper_num;
        uint64_t start_cper_id;
        uint64_t latest_cper_id;
};

struct ras_cmd_cper_record_req {
        struct ras_cmd_dev_handle dev;
        uint64_t cper_start_id;
        uint32_t cper_num;
        uint32_t buf_size;
        uint64_t buf_ptr;
        uint32_t reserved[4];
};

struct ras_cmd_cper_record_rsp {
        uint32_t version;
        uint32_t real_data_size;
        uint32_t real_cper_num;
        uint32_t remain_num;
        uint32_t reserved[4];
};

struct ras_cmd_batch_trace_snapshot_req {
        struct ras_cmd_dev_handle dev;
};

struct ras_cmd_batch_trace_snapshot_rsp {
        uint32_t version;
        uint32_t reserved[4];
        uint32_t total_batch_num;
        uint64_t start_batch_id;
        uint64_t latest_batch_id;
};

struct ras_cmd_batch_trace_record_req {
        struct ras_cmd_dev_handle dev;
        uint64_t start_batch_id;
        uint32_t batch_num;
        uint32_t reserved[5];
};

struct batch_ras_trace_info {
        uint64_t batch_id;
        uint16_t offset;
        uint8_t  trace_num;
        uint8_t  rsv;
        uint32_t reserved;
};

#define RAS_CMD_MAX_BATCH_NUM  300
#define RAS_CMD_MAX_TRACE_NUM  300
struct ras_cmd_batch_trace_record_rsp {
        uint32_t version;
        uint16_t real_batch_num;
        uint16_t remain_num;
        uint64_t start_batch_id;
        uint32_t reserved[2];
        struct batch_ras_trace_info batchs[RAS_CMD_MAX_BATCH_NUM];
        struct ras_log_info records[RAS_CMD_MAX_TRACE_NUM];
};

struct ras_cmd_auto_update_req {
        struct ras_cmd_dev_handle dev;
        uint32_t mode;
        uint32_t cmd_id;
        uint64_t addr;
        uint32_t len;
        uint32_t reserved[5];
};

struct ras_cmd_auto_update_rsp {
        uint32_t version;
        uint32_t reserved[4];
};

struct ras_cmd_blocks_ecc_req {
        struct ras_cmd_dev_handle dev;
};

struct ras_cmd_block_ecc {
        uint32_t ce_count;
        uint32_t ue_count;
        uint32_t de_count;
};

#define MAX_RAS_BLOCK_NUM  20
struct ras_cmd_blocks_ecc_rsp {
        uint32_t version;
        uint32_t reserved[5];
        struct ras_cmd_block_ecc blocks[MAX_RAS_BLOCK_NUM];
};

#pragma pack(pop)

int ras_cmd_init(struct ras_core_context *ras_core);
int ras_cmd_fini(struct ras_core_context *ras_core);
int rascore_handle_cmd(struct ras_core_context *ras_core, struct ras_cmd_ctx *cmd, void *data);
uint64_t ras_cmd_get_dev_handle(struct ras_core_context *ras_core);
int ras_cmd_query_interface_info(struct ras_core_context *ras_core,
        struct ras_query_interface_info_rsp *rsp);
int ras_cmd_translate_soc_pa_to_bank(struct ras_core_context *ras_core,
                        uint64_t soc_pa, struct ras_fb_bank_addr *bank_addr);
int ras_cmd_translate_bank_to_soc_pa(struct ras_core_context *ras_core,
                        struct ras_fb_bank_addr bank_addr, uint64_t *soc_pa);
#endif