root/drivers/accel/amdxdna/aie2_pci.h
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
 */

#ifndef _AIE2_PCI_H_
#define _AIE2_PCI_H_

#include <drm/amdxdna_accel.h>
#include <linux/semaphore.h>

#include "amdxdna_mailbox.h"

#define AIE2_INTERVAL   20000   /* us */
#define AIE2_TIMEOUT    1000000 /* us */

/* Firmware determines device memory base address and size */
#define AIE2_DEVM_BASE  0x4000000
#define AIE2_DEVM_SIZE  SZ_64M

#define NDEV2PDEV(ndev) (to_pci_dev((ndev)->xdna->ddev.dev))

#define AIE2_SRAM_OFF(ndev, addr) ((addr) - (ndev)->priv->sram_dev_addr)
#define AIE2_MBOX_OFF(ndev, addr) ((addr) - (ndev)->priv->mbox_dev_addr)

#define PSP_REG_BAR(ndev, idx) ((ndev)->priv->psp_regs_off[(idx)].bar_idx)
#define PSP_REG_OFF(ndev, idx) ((ndev)->priv->psp_regs_off[(idx)].offset)
#define SRAM_REG_OFF(ndev, idx) ((ndev)->priv->sram_offs[(idx)].offset)

#define SMU_REG(ndev, idx) \
({ \
        typeof(ndev) _ndev = ndev; \
        ((_ndev)->smu_base + (_ndev)->priv->smu_regs_off[(idx)].offset); \
})
#define SRAM_GET_ADDR(ndev, idx) \
({ \
        typeof(ndev) _ndev = ndev; \
        ((_ndev)->sram_base + SRAM_REG_OFF((_ndev), (idx))); \
})

#define CHAN_SLOT_SZ SZ_8K
#define MBOX_SIZE(ndev) \
({ \
        typeof(ndev) _ndev = (ndev); \
        ((_ndev)->priv->mbox_size) ? (_ndev)->priv->mbox_size : \
        pci_resource_len(NDEV2PDEV(_ndev), (_ndev)->xdna->dev_info->mbox_bar); \
})

enum aie2_smu_reg_idx {
        SMU_CMD_REG = 0,
        SMU_ARG_REG,
        SMU_INTR_REG,
        SMU_RESP_REG,
        SMU_OUT_REG,
        SMU_MAX_REGS /* Keep this at the end */
};

enum aie2_sram_reg_idx {
        MBOX_CHANN_OFF = 0,
        FW_ALIVE_OFF,
        SRAM_MAX_INDEX /* Keep this at the end */
};

enum psp_reg_idx {
        PSP_CMD_REG = 0,
        PSP_ARG0_REG,
        PSP_ARG1_REG,
        PSP_ARG2_REG,
        PSP_NUM_IN_REGS, /* number of input registers */
        PSP_INTR_REG = PSP_NUM_IN_REGS,
        PSP_STATUS_REG,
        PSP_RESP_REG,
        PSP_PWAITMODE_REG,
        PSP_MAX_REGS /* Keep this at the end */
};

struct amdxdna_client;
struct amdxdna_fw_ver;
struct amdxdna_hwctx;
struct amdxdna_sched_job;

struct psp_config {
        const void      *fw_buf;
        u32             fw_size;
        void __iomem    *psp_regs[PSP_MAX_REGS];
};

struct aie_version {
        u16 major;
        u16 minor;
};

struct aie_tile_metadata {
        u16 row_count;
        u16 row_start;
        u16 dma_channel_count;
        u16 lock_count;
        u16 event_reg_count;
};

struct aie_metadata {
        u32 size;
        u16 cols;
        u16 rows;
        struct aie_version version;
        struct aie_tile_metadata core;
        struct aie_tile_metadata mem;
        struct aie_tile_metadata shim;
};

enum rt_config_category {
        AIE2_RT_CFG_INIT,
        AIE2_RT_CFG_CLK_GATING,
        AIE2_RT_CFG_FORCE_PREEMPT,
        AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT,
};

struct rt_config {
        u32     type;
        u32     value;
        u32     category;
        unsigned long feature_mask;
};

struct dpm_clk_freq {
        u32     npuclk;
        u32     hclk;
};

/*
 * Define the maximum number of pending commands in a hardware context.
 * Must be power of 2!
 */
#define HWCTX_MAX_CMDS          4
#define get_job_idx(seq) ((seq) & (HWCTX_MAX_CMDS - 1))
struct amdxdna_hwctx_priv {
        struct amdxdna_gem_obj          *heap;
        void                            *mbox_chann;

        struct drm_gpu_scheduler        sched;
        struct drm_sched_entity         entity;

        struct mutex                    io_lock; /* protect seq and cmd order */
        struct wait_queue_head          job_free_wq;
        u32                             num_pending;
        u64                             seq;
        struct semaphore                job_sem;
        bool                            job_done;

        /* Completed job counter */
        u64                             completed;

        struct amdxdna_gem_obj          *cmd_buf[HWCTX_MAX_CMDS];
        struct drm_syncobj              *syncobj;
};

enum aie2_dev_status {
        AIE2_DEV_UNINIT,
        AIE2_DEV_INIT,
        AIE2_DEV_START,
};

struct aie2_exec_msg_ops {
        int (*init_cu_req)(struct amdxdna_gem_obj *cmd_bo, void *req,
                           size_t *size, u32 *msg_op);
        int (*init_dpu_req)(struct amdxdna_gem_obj *cmd_bo, void *req,
                            size_t *size, u32 *msg_op);
        void (*init_chain_req)(void *req, u64 slot_addr, size_t size, u32 cmd_cnt);
        int (*fill_cf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
        int (*fill_dpu_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
        int (*fill_preempt_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
        int (*fill_elf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
        u32 (*get_chain_msg_op)(u32 cmd_op);
};

struct amdxdna_dev_hdl {
        struct amdxdna_dev              *xdna;
        const struct amdxdna_dev_priv   *priv;
        void                    __iomem *sram_base;
        void                    __iomem *smu_base;
        void                    __iomem *mbox_base;
        struct psp_device               *psp_hdl;

        struct xdna_mailbox_chann_res   mgmt_x2i;
        struct xdna_mailbox_chann_res   mgmt_i2x;
        u32                             mgmt_chan_idx;
        u32                             mgmt_prot_major;
        u32                             mgmt_prot_minor;

        u32                             total_col;
        struct aie_version              version;
        struct aie_metadata             metadata;
        unsigned long                   feature_mask;
        struct aie2_exec_msg_ops        *exec_msg_ops;

        /* power management and clock*/
        enum amdxdna_power_mode_type    pw_mode;
        u32                             dpm_level;
        u32                             dft_dpm_level;
        u32                             max_dpm_level;
        u32                             clk_gating;
        u32                             npuclk_freq;
        u32                             hclk_freq;
        u32                             max_tops;
        u32                             curr_tops;
        u32                             force_preempt_enabled;
        u32                             frame_boundary_preempt;

        /* Mailbox and the management channel */
        struct mailbox                  *mbox;
        struct mailbox_channel          *mgmt_chann;
        struct async_events             *async_events;

        enum aie2_dev_status            dev_status;
        u32                             hwctx_num;

        struct amdxdna_async_error      last_async_err;
};

#define DEFINE_BAR_OFFSET(reg_name, bar, reg_addr) \
        [reg_name] = {bar##_BAR_INDEX, (reg_addr) - bar##_BAR_BASE}

struct aie2_bar_off_pair {
        int     bar_idx;
        u32     offset;
};

struct aie2_hw_ops {
        int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
};

enum aie2_fw_feature {
        AIE2_NPU_COMMAND,
        AIE2_PREEMPT,
        AIE2_TEMPORAL_ONLY,
        AIE2_FEATURE_MAX
};

struct aie2_fw_feature_tbl {
        u64 features;
        u32 major;
        u32 max_minor;
        u32 min_minor;
};

#define AIE2_FEATURE_ON(ndev, feature)  test_bit(feature, &(ndev)->feature_mask)

struct amdxdna_dev_priv {
        const char                      *fw_path;
        const struct rt_config          *rt_config;
        const struct dpm_clk_freq       *dpm_clk_tbl;
        const struct aie2_fw_feature_tbl *fw_feature_tbl;

#define COL_ALIGN_NONE   0
#define COL_ALIGN_NATURE 1
        u32                             col_align;
        u32                             mbox_dev_addr;
        /* If mbox_size is 0, use BAR size. See MBOX_SIZE macro */
        u32                             mbox_size;
        u32                             hwctx_limit;
        u32                             sram_dev_addr;
        struct aie2_bar_off_pair        sram_offs[SRAM_MAX_INDEX];
        struct aie2_bar_off_pair        psp_regs_off[PSP_MAX_REGS];
        struct aie2_bar_off_pair        smu_regs_off[SMU_MAX_REGS];
        struct aie2_hw_ops              hw_ops;
};

extern const struct amdxdna_dev_ops aie2_ops;

int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
                     enum rt_config_category category, u32 *val);

/* aie2 npu hw config */
extern const struct dpm_clk_freq npu1_dpm_clk_table[];
extern const struct dpm_clk_freq npu4_dpm_clk_table[];
extern const struct rt_config npu1_default_rt_cfg[];
extern const struct rt_config npu4_default_rt_cfg[];
extern const struct aie2_fw_feature_tbl npu4_fw_feature_table[];

/* aie2_smu.c */
int aie2_smu_init(struct amdxdna_dev_hdl *ndev);
void aie2_smu_fini(struct amdxdna_dev_hdl *ndev);
int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);

/* aie2_pm.c */
int aie2_pm_init(struct amdxdna_dev_hdl *ndev);
int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target);
int aie2_pm_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);

/* aie2_psp.c */
struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config *conf);
int aie2_psp_start(struct psp_device *psp);
void aie2_psp_stop(struct psp_device *psp);
int aie2_psp_waitmode_poll(struct psp_device *psp);

/* aie2_error.c */
int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev);
void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev);
int aie2_error_async_msg_thread(void *data);
int aie2_get_array_async_error(struct amdxdna_dev_hdl *ndev,
                               struct amdxdna_drm_get_array *args);

/* aie2_message.c */
void aie2_msg_init(struct amdxdna_dev_hdl *ndev);
void aie2_destroy_mgmt_chann(struct amdxdna_dev_hdl *ndev);
int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev);
int aie2_resume_fw(struct amdxdna_dev_hdl *ndev);
int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value);
int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value);
int aie2_assign_mgmt_pasid(struct amdxdna_dev_hdl *ndev, u16 pasid);
int aie2_query_aie_version(struct amdxdna_dev_hdl *ndev, struct aie_version *version);
int aie2_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata);
int aie2_query_firmware_version(struct amdxdna_dev_hdl *ndev,
                                struct amdxdna_fw_ver *fw_ver);
int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx);
int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx);
int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size);
int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, u32 size, u32 *cols_filled);
int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev,
                         char __user *buf, u32 size,
                         struct amdxdna_drm_query_telemetry_header *header);
int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size,
                                 void *handle, int (*cb)(void*, void __iomem *, size_t));
int aie2_config_cu(struct amdxdna_hwctx *hwctx,
                   int (*notify_cb)(void *, void __iomem *, size_t));
int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
                 int (*notify_cb)(void *, void __iomem *, size_t));
int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx,
                                struct amdxdna_sched_job *job,
                                int (*notify_cb)(void *, void __iomem *, size_t));
int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
                               struct amdxdna_sched_job *job,
                               int (*notify_cb)(void *, void __iomem *, size_t));
int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
                 int (*notify_cb)(void *, void __iomem *, size_t));
int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
                         int (*notify_cb)(void *, void __iomem *, size_t));
void *aie2_alloc_msg_buffer(struct amdxdna_dev_hdl *ndev, u32 *size,
                            dma_addr_t *dma_addr);
#define aie2_free_msg_buffer(ndev, size, buff_addr, dma_addr)           \
        dma_free_noncoherent((ndev)->xdna->ddev.dev, size, buff_addr,   \
                             dma_addr, DMA_FROM_DEVICE)

/* aie2_hwctx.c */
int aie2_hwctx_init(struct amdxdna_hwctx *hwctx);
void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx);
int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size);
int aie2_hwctx_sync_debug_bo(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl);
void aie2_hwctx_suspend(struct amdxdna_client *client);
int aie2_hwctx_resume(struct amdxdna_client *client);
int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, unsigned long cur_seq);

#endif /* _AIE2_PCI_H_ */