root/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
 * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 */

#include <linux/cleanup.h>
#include <linux/device.h>
#include <linux/interconnect.h>
#include <linux/firmware/qcom/qcom_scm.h>
#include <linux/iopoll.h>
#include <linux/list.h>
#include <linux/mod_devicetable.h>
#include <linux/mutex.h>
#include <linux/platform_device.h>
#include <linux/ratelimit.h>
#include <linux/spinlock.h>

#include "arm-smmu.h"
#include "arm-smmu-qcom.h"

#define TBU_DBG_TIMEOUT_US              100
#define DEBUG_AXUSER_REG                0x30
#define DEBUG_AXUSER_CDMID              GENMASK_ULL(43, 36)
#define DEBUG_AXUSER_CDMID_VAL          0xff
#define DEBUG_PAR_REG                   0x28
#define DEBUG_PAR_FAULT_VAL             BIT(0)
#define DEBUG_PAR_PA                    GENMASK_ULL(47, 12)
#define DEBUG_SID_HALT_REG              0x0
#define DEBUG_SID_HALT_VAL              BIT(16)
#define DEBUG_SID_HALT_SID              GENMASK(9, 0)
#define DEBUG_SR_HALT_ACK_REG           0x20
#define DEBUG_SR_HALT_ACK_VAL           BIT(1)
#define DEBUG_SR_ECATS_RUNNING_VAL      BIT(0)
#define DEBUG_TXN_AXCACHE               GENMASK(5, 2)
#define DEBUG_TXN_AXPROT                GENMASK(8, 6)
#define DEBUG_TXN_AXPROT_PRIV           0x1
#define DEBUG_TXN_AXPROT_NSEC           0x2
#define DEBUG_TXN_TRIGG_REG             0x18
#define DEBUG_TXN_TRIGGER               BIT(0)
#define DEBUG_VA_ADDR_REG               0x8

static LIST_HEAD(tbu_list);
static DEFINE_MUTEX(tbu_list_lock);
static DEFINE_SPINLOCK(atos_lock);

struct qcom_tbu {
        struct device *dev;
        struct device_node *smmu_np;
        u32 sid_range[2];
        struct list_head list;
        struct clk *clk;
        struct icc_path *path;
        void __iomem *base;
        spinlock_t halt_lock; /* multiple halt or resume can't execute concurrently */
        int halt_count;
};

static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
{
        return container_of(smmu, struct qcom_smmu, smmu);
}

void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu)
{
        int ret;
        u32 tbu_pwr_status, sync_inv_ack, sync_inv_progress;
        struct qcom_smmu *qsmmu = container_of(smmu, struct qcom_smmu, smmu);
        const struct qcom_smmu_config *cfg;
        static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
                                      DEFAULT_RATELIMIT_BURST);

        if (__ratelimit(&rs)) {
                dev_err(smmu->dev, "TLB sync timed out -- SMMU may be deadlocked\n");

                cfg = qsmmu->data->cfg;
                if (!cfg)
                        return;

                ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_TBU_PWR_STATUS],
                                        &tbu_pwr_status);
                if (ret)
                        dev_err(smmu->dev,
                                "Failed to read TBU power status: %d\n", ret);

                ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_STATS_SYNC_INV_TBU_ACK],
                                        &sync_inv_ack);
                if (ret)
                        dev_err(smmu->dev,
                                "Failed to read TBU sync/inv ack status: %d\n", ret);

                ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR],
                                        &sync_inv_progress);
                if (ret)
                        dev_err(smmu->dev,
                                "Failed to read TCU syn/inv progress: %d\n", ret);

                dev_err(smmu->dev,
                        "TBU: power_status %#x sync_inv_ack %#x sync_inv_progress %#x\n",
                        tbu_pwr_status, sync_inv_ack, sync_inv_progress);
        }
}

static struct qcom_tbu *qcom_find_tbu(struct qcom_smmu *qsmmu, u32 sid)
{
        struct qcom_tbu *tbu;
        u32 start, end;

        guard(mutex)(&tbu_list_lock);

        if (list_empty(&tbu_list))
                return NULL;

        list_for_each_entry(tbu, &tbu_list, list) {
                start = tbu->sid_range[0];
                end = start + tbu->sid_range[1];

                if (qsmmu->smmu.dev->of_node == tbu->smmu_np &&
                    start <= sid && sid < end)
                        return tbu;
        }
        dev_err(qsmmu->smmu.dev, "Unable to find TBU for sid 0x%x\n", sid);

        return NULL;
}

static int qcom_tbu_halt(struct qcom_tbu *tbu, struct arm_smmu_domain *smmu_domain)
{
        struct arm_smmu_device *smmu = smmu_domain->smmu;
        int ret = 0, idx = smmu_domain->cfg.cbndx;
        u32 val, fsr, status;

        guard(spinlock_irqsave)(&tbu->halt_lock);
        if (tbu->halt_count) {
                tbu->halt_count++;
                return ret;
        }

        val = readl_relaxed(tbu->base + DEBUG_SID_HALT_REG);
        val |= DEBUG_SID_HALT_VAL;
        writel_relaxed(val, tbu->base + DEBUG_SID_HALT_REG);

        fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
        if ((fsr & ARM_SMMU_CB_FSR_FAULT) && (fsr & ARM_SMMU_CB_FSR_SS)) {
                u32 sctlr_orig, sctlr;

                /*
                 * We are in a fault. Our request to halt the bus will not
                 * complete until transactions in front of us (such as the fault
                 * itself) have completed. Disable iommu faults and terminate
                 * any existing transactions.
                 */
                sctlr_orig = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_SCTLR);
                sctlr = sctlr_orig & ~(ARM_SMMU_SCTLR_CFCFG | ARM_SMMU_SCTLR_CFIE);
                arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, sctlr);
                arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
                arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME, ARM_SMMU_RESUME_TERMINATE);
                arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, sctlr_orig);
        }

        if (readl_poll_timeout_atomic(tbu->base + DEBUG_SR_HALT_ACK_REG, status,
                                      (status & DEBUG_SR_HALT_ACK_VAL),
                                      0, TBU_DBG_TIMEOUT_US)) {
                dev_err(tbu->dev, "Timeout while trying to halt TBU!\n");
                ret = -ETIMEDOUT;

                val = readl_relaxed(tbu->base + DEBUG_SID_HALT_REG);
                val &= ~DEBUG_SID_HALT_VAL;
                writel_relaxed(val, tbu->base + DEBUG_SID_HALT_REG);

                return ret;
        }

        tbu->halt_count = 1;

        return ret;
}

static void qcom_tbu_resume(struct qcom_tbu *tbu)
{
        u32 val;

        guard(spinlock_irqsave)(&tbu->halt_lock);
        if (!tbu->halt_count) {
                WARN(1, "%s: halt_count is 0", dev_name(tbu->dev));
                return;
        }

        if (tbu->halt_count > 1) {
                tbu->halt_count--;
                return;
        }

        val = readl_relaxed(tbu->base + DEBUG_SID_HALT_REG);
        val &= ~DEBUG_SID_HALT_VAL;
        writel_relaxed(val, tbu->base + DEBUG_SID_HALT_REG);

        tbu->halt_count = 0;
}

static phys_addr_t qcom_tbu_trigger_atos(struct arm_smmu_domain *smmu_domain,
                                         struct qcom_tbu *tbu, dma_addr_t iova, u32 sid)
{
        bool atos_timedout = false;
        phys_addr_t phys = 0;
        ktime_t timeout;
        u64 val;

        /* Set address and stream-id */
        val = readq_relaxed(tbu->base + DEBUG_SID_HALT_REG);
        val &= ~DEBUG_SID_HALT_SID;
        val |= FIELD_PREP(DEBUG_SID_HALT_SID, sid);
        writeq_relaxed(val, tbu->base + DEBUG_SID_HALT_REG);
        writeq_relaxed(iova, tbu->base + DEBUG_VA_ADDR_REG);
        val = FIELD_PREP(DEBUG_AXUSER_CDMID, DEBUG_AXUSER_CDMID_VAL);
        writeq_relaxed(val, tbu->base + DEBUG_AXUSER_REG);

        /* Write-back read and write-allocate */
        val = FIELD_PREP(DEBUG_TXN_AXCACHE, 0xf);

        /* Non-secure access */
        val |= FIELD_PREP(DEBUG_TXN_AXPROT, DEBUG_TXN_AXPROT_NSEC);

        /* Privileged access */
        val |= FIELD_PREP(DEBUG_TXN_AXPROT, DEBUG_TXN_AXPROT_PRIV);

        val |= DEBUG_TXN_TRIGGER;
        writeq_relaxed(val, tbu->base + DEBUG_TXN_TRIGG_REG);

        timeout = ktime_add_us(ktime_get(), TBU_DBG_TIMEOUT_US);
        for (;;) {
                val = readl_relaxed(tbu->base + DEBUG_SR_HALT_ACK_REG);
                if (!(val & DEBUG_SR_ECATS_RUNNING_VAL))
                        break;
                val = readl_relaxed(tbu->base + DEBUG_PAR_REG);
                if (val & DEBUG_PAR_FAULT_VAL)
                        break;
                if (ktime_compare(ktime_get(), timeout) > 0) {
                        atos_timedout = true;
                        break;
                }
        }

        val = readq_relaxed(tbu->base + DEBUG_PAR_REG);
        if (val & DEBUG_PAR_FAULT_VAL)
                dev_err(tbu->dev, "ATOS generated a fault interrupt! PAR = %llx, SID=0x%x\n",
                        val, sid);
        else if (atos_timedout)
                dev_err_ratelimited(tbu->dev, "ATOS translation timed out!\n");
        else
                phys = FIELD_GET(DEBUG_PAR_PA, val);

        /* Reset hardware */
        writeq_relaxed(0, tbu->base + DEBUG_TXN_TRIGG_REG);
        writeq_relaxed(0, tbu->base + DEBUG_VA_ADDR_REG);
        val = readl_relaxed(tbu->base + DEBUG_SID_HALT_REG);
        val &= ~DEBUG_SID_HALT_SID;
        writel_relaxed(val, tbu->base + DEBUG_SID_HALT_REG);

        return phys;
}

static phys_addr_t qcom_iova_to_phys(struct arm_smmu_domain *smmu_domain,
                                     dma_addr_t iova, u32 sid)
{
        struct arm_smmu_device *smmu = smmu_domain->smmu;
        struct qcom_smmu *qsmmu = to_qcom_smmu(smmu);
        int idx = smmu_domain->cfg.cbndx;
        struct qcom_tbu *tbu;
        u32 sctlr_orig, sctlr;
        phys_addr_t phys = 0;
        int attempt = 0;
        int ret;
        u64 fsr;

        tbu = qcom_find_tbu(qsmmu, sid);
        if (!tbu)
                return 0;

        ret = icc_set_bw(tbu->path, 0, UINT_MAX);
        if (ret)
                return ret;

        ret = clk_prepare_enable(tbu->clk);
        if (ret)
                goto disable_icc;

        ret = qcom_tbu_halt(tbu, smmu_domain);
        if (ret)
                goto disable_clk;

        /*
         * ATOS/ECATS can trigger the fault interrupt, so disable it temporarily
         * and check for an interrupt manually.
         */
        sctlr_orig = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_SCTLR);
        sctlr = sctlr_orig & ~(ARM_SMMU_SCTLR_CFCFG | ARM_SMMU_SCTLR_CFIE);
        arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, sctlr);

        fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
        if (fsr & ARM_SMMU_CB_FSR_FAULT) {
                /* Clear pending interrupts */
                arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);

                /*
                 * TBU halt takes care of resuming any stalled transcation.
                 * Kept it here for completeness sake.
                 */
                if (fsr & ARM_SMMU_CB_FSR_SS)
                        arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME,
                                          ARM_SMMU_RESUME_TERMINATE);
        }

        /* Only one concurrent atos operation */
        scoped_guard(spinlock_irqsave, &atos_lock) {
                /*
                 * If the translation fails, attempt the lookup more time."
                 */
                do {
                        phys = qcom_tbu_trigger_atos(smmu_domain, tbu, iova, sid);

                        fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
                        if (fsr & ARM_SMMU_CB_FSR_FAULT) {
                                /* Clear pending interrupts */
                                arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);

                                if (fsr & ARM_SMMU_CB_FSR_SS)
                                        arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME,
                                                          ARM_SMMU_RESUME_TERMINATE);
                        }
                } while (!phys && attempt++ < 2);

                arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, sctlr_orig);
        }
        qcom_tbu_resume(tbu);

        /* Read to complete prior write transcations */
        readl_relaxed(tbu->base + DEBUG_SR_HALT_ACK_REG);

disable_clk:
        clk_disable_unprepare(tbu->clk);
disable_icc:
        icc_set_bw(tbu->path, 0, 0);

        return phys;
}

static phys_addr_t qcom_smmu_iova_to_phys_hard(struct arm_smmu_domain *smmu_domain, dma_addr_t iova)
{
        struct arm_smmu_device *smmu = smmu_domain->smmu;
        int idx = smmu_domain->cfg.cbndx;
        u32 frsynra;
        u16 sid;

        frsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
        sid = FIELD_GET(ARM_SMMU_CBFRSYNRA_SID, frsynra);

        return qcom_iova_to_phys(smmu_domain, iova, sid);
}

static phys_addr_t qcom_smmu_verify_fault(struct arm_smmu_domain *smmu_domain, dma_addr_t iova, u32 fsr)
{
        struct io_pgtable *iop = io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops);
        struct arm_smmu_device *smmu = smmu_domain->smmu;
        phys_addr_t phys_post_tlbiall;
        phys_addr_t phys;

        phys = qcom_smmu_iova_to_phys_hard(smmu_domain, iova);
        io_pgtable_tlb_flush_all(iop);
        phys_post_tlbiall = qcom_smmu_iova_to_phys_hard(smmu_domain, iova);

        if (phys != phys_post_tlbiall) {
                dev_err(smmu->dev,
                        "ATOS results differed across TLBIALL... (before: %pa after: %pa)\n",
                        &phys, &phys_post_tlbiall);
        }

        return (phys == 0 ? phys_post_tlbiall : phys);
}

irqreturn_t qcom_smmu_context_fault(int irq, void *dev)
{
        struct arm_smmu_domain *smmu_domain = dev;
        struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
        struct arm_smmu_device *smmu = smmu_domain->smmu;
        struct arm_smmu_context_fault_info cfi;
        u32 resume = 0;
        int idx = smmu_domain->cfg.cbndx;
        phys_addr_t phys_soft;
        int ret, tmp;

        static DEFINE_RATELIMIT_STATE(_rs,
                                      DEFAULT_RATELIMIT_INTERVAL,
                                      DEFAULT_RATELIMIT_BURST);

        arm_smmu_read_context_fault_info(smmu, idx, &cfi);

        if (!(cfi.fsr & ARM_SMMU_CB_FSR_FAULT))
                return IRQ_NONE;

        if (list_empty(&tbu_list)) {
                ret = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova,
                                         cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);

                if (ret == -ENOSYS)
                        arm_smmu_print_context_fault_info(smmu, idx, &cfi);

                arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, cfi.fsr);

                if (cfi.fsr & ARM_SMMU_CB_FSR_SS) {
                        arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME,
                                          ret == -EAGAIN ? 0 : ARM_SMMU_RESUME_TERMINATE);
                }

                return IRQ_HANDLED;
        }

        phys_soft = ops->iova_to_phys(ops, cfi.iova);

        tmp = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova,
                                 cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
        if (!tmp || tmp == -EBUSY) {
                ret = IRQ_HANDLED;
                resume = ARM_SMMU_RESUME_TERMINATE;
        } else if (tmp == -EAGAIN) {
                ret = IRQ_HANDLED;
                resume = 0;
        } else {
                phys_addr_t phys_atos = qcom_smmu_verify_fault(smmu_domain, cfi.iova, cfi.fsr);

                if (__ratelimit(&_rs)) {
                        arm_smmu_print_context_fault_info(smmu, idx, &cfi);

                        dev_err(smmu->dev,
                                "soft iova-to-phys=%pa\n", &phys_soft);
                        if (!phys_soft)
                                dev_err(smmu->dev,
                                        "SOFTWARE TABLE WALK FAILED! Looks like %s accessed an unmapped address!\n",
                                        dev_name(smmu->dev));
                        if (phys_atos)
                                dev_err(smmu->dev, "hard iova-to-phys (ATOS)=%pa\n",
                                        &phys_atos);
                        else
                                dev_err(smmu->dev, "hard iova-to-phys (ATOS) failed\n");
                }
                ret = IRQ_NONE;
                resume = ARM_SMMU_RESUME_TERMINATE;
        }

        /*
         * If the client returns -EBUSY, do not clear FSR and do not RESUME
         * if stalled. This is required to keep the IOMMU client stalled on
         * the outstanding fault. This gives the client a chance to take any
         * debug action and then terminate the stalled transaction.
         * So, the sequence in case of stall on fault should be:
         * 1) Do not clear FSR or write to RESUME here
         * 2) Client takes any debug action
         * 3) Client terminates the stalled transaction and resumes the IOMMU
         * 4) Client clears FSR. The FSR should only be cleared after 3) and
         *    not before so that the fault remains outstanding. This ensures
         *    SCTLR.HUPCF has the desired effect if subsequent transactions also
         *    need to be terminated.
         */
        if (tmp != -EBUSY) {
                /* Clear the faulting FSR */
                arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, cfi.fsr);

                /* Retry or terminate any stalled transactions */
                if (cfi.fsr & ARM_SMMU_CB_FSR_SS)
                        arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME, resume);
        }

        return ret;
}

int qcom_tbu_probe(struct platform_device *pdev)
{
        struct of_phandle_args args = { .args_count = 2 };
        struct device_node *np = pdev->dev.of_node;
        struct device *dev = &pdev->dev;
        struct qcom_tbu *tbu;

        tbu = devm_kzalloc(dev, sizeof(*tbu), GFP_KERNEL);
        if (!tbu)
                return -ENOMEM;

        tbu->dev = dev;
        INIT_LIST_HEAD(&tbu->list);
        spin_lock_init(&tbu->halt_lock);

        if (of_parse_phandle_with_args(np, "qcom,stream-id-range", "#iommu-cells", 0, &args)) {
                dev_err(dev, "Cannot parse the 'qcom,stream-id-range' DT property\n");
                return -EINVAL;
        }

        tbu->smmu_np =  args.np;
        tbu->sid_range[0] = args.args[0];
        tbu->sid_range[1] = args.args[1];
        of_node_put(args.np);

        tbu->base = devm_of_iomap(dev, np, 0, NULL);
        if (IS_ERR(tbu->base))
                return PTR_ERR(tbu->base);

        tbu->clk = devm_clk_get_optional(dev, NULL);
        if (IS_ERR(tbu->clk))
                return PTR_ERR(tbu->clk);

        tbu->path = devm_of_icc_get(dev, NULL);
        if (IS_ERR(tbu->path))
                return PTR_ERR(tbu->path);

        guard(mutex)(&tbu_list_lock);
        list_add_tail(&tbu->list, &tbu_list);

        return 0;
}