root/arch/xtensa/kernel/perf_event.c
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Xtensa Performance Monitor Module driver
 * See Tensilica Debug User's Guide for PMU registers documentation.
 *
 * Copyright (C) 2015 Cadence Design Systems Inc.
 */

#include <linux/interrupt.h>
#include <linux/irqdomain.h>
#include <linux/module.h>
#include <linux/of.h>
#include <linux/perf_event.h>
#include <linux/platform_device.h>

#include <asm/core.h>
#include <asm/processor.h>
#include <asm/stacktrace.h>

#define XTENSA_HWVERSION_RG_2015_0      260000

#if XCHAL_HW_MIN_VERSION >= XTENSA_HWVERSION_RG_2015_0
#define XTENSA_PMU_ERI_BASE             0x00101000
#else
#define XTENSA_PMU_ERI_BASE             0x00001000
#endif

/* Global control/status for all perf counters */
#define XTENSA_PMU_PMG                  XTENSA_PMU_ERI_BASE
/* Perf counter values */
#define XTENSA_PMU_PM(i)                (XTENSA_PMU_ERI_BASE + 0x80 + (i) * 4)
/* Perf counter control registers */
#define XTENSA_PMU_PMCTRL(i)            (XTENSA_PMU_ERI_BASE + 0x100 + (i) * 4)
/* Perf counter status registers */
#define XTENSA_PMU_PMSTAT(i)            (XTENSA_PMU_ERI_BASE + 0x180 + (i) * 4)

#define XTENSA_PMU_PMG_PMEN             0x1

#define XTENSA_PMU_COUNTER_MASK         0xffffffffULL
#define XTENSA_PMU_COUNTER_MAX          0x7fffffff

#define XTENSA_PMU_PMCTRL_INTEN         0x00000001
#define XTENSA_PMU_PMCTRL_KRNLCNT       0x00000008
#define XTENSA_PMU_PMCTRL_TRACELEVEL    0x000000f0
#define XTENSA_PMU_PMCTRL_SELECT_SHIFT  8
#define XTENSA_PMU_PMCTRL_SELECT        0x00001f00
#define XTENSA_PMU_PMCTRL_MASK_SHIFT    16
#define XTENSA_PMU_PMCTRL_MASK          0xffff0000

#define XTENSA_PMU_MASK(select, mask) \
        (((select) << XTENSA_PMU_PMCTRL_SELECT_SHIFT) | \
         ((mask) << XTENSA_PMU_PMCTRL_MASK_SHIFT) | \
         XTENSA_PMU_PMCTRL_TRACELEVEL | \
         XTENSA_PMU_PMCTRL_INTEN)

#define XTENSA_PMU_PMSTAT_OVFL          0x00000001
#define XTENSA_PMU_PMSTAT_INTASRT       0x00000010

struct xtensa_pmu_events {
        /* Array of events currently on this core */
        struct perf_event *event[XCHAL_NUM_PERF_COUNTERS];
        /* Bitmap of used hardware counters */
        unsigned long used_mask[BITS_TO_LONGS(XCHAL_NUM_PERF_COUNTERS)];
};
static DEFINE_PER_CPU(struct xtensa_pmu_events, xtensa_pmu_events);

static const u32 xtensa_hw_ctl[] = {
        [PERF_COUNT_HW_CPU_CYCLES]              = XTENSA_PMU_MASK(0, 0x1),
        [PERF_COUNT_HW_INSTRUCTIONS]            = XTENSA_PMU_MASK(2, 0xffff),
        [PERF_COUNT_HW_CACHE_REFERENCES]        = XTENSA_PMU_MASK(10, 0x1),
        [PERF_COUNT_HW_CACHE_MISSES]            = XTENSA_PMU_MASK(12, 0x1),
        /* Taken and non-taken branches + taken loop ends */
        [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]     = XTENSA_PMU_MASK(2, 0x490),
        /* Instruction-related + other global stall cycles */
        [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = XTENSA_PMU_MASK(4, 0x1ff),
        /* Data-related global stall cycles */
        [PERF_COUNT_HW_STALLED_CYCLES_BACKEND]  = XTENSA_PMU_MASK(3, 0x1ff),
};

#define C(_x) PERF_COUNT_HW_CACHE_##_x

static const u32 xtensa_cache_ctl[][C(OP_MAX)][C(RESULT_MAX)] = {
        [C(L1D)] = {
                [C(OP_READ)] = {
                        [C(RESULT_ACCESS)]      = XTENSA_PMU_MASK(10, 0x1),
                        [C(RESULT_MISS)]        = XTENSA_PMU_MASK(10, 0x2),
                },
                [C(OP_WRITE)] = {
                        [C(RESULT_ACCESS)]      = XTENSA_PMU_MASK(11, 0x1),
                        [C(RESULT_MISS)]        = XTENSA_PMU_MASK(11, 0x2),
                },
        },
        [C(L1I)] = {
                [C(OP_READ)] = {
                        [C(RESULT_ACCESS)]      = XTENSA_PMU_MASK(8, 0x1),
                        [C(RESULT_MISS)]        = XTENSA_PMU_MASK(8, 0x2),
                },
        },
        [C(DTLB)] = {
                [C(OP_READ)] = {
                        [C(RESULT_ACCESS)]      = XTENSA_PMU_MASK(9, 0x1),
                        [C(RESULT_MISS)]        = XTENSA_PMU_MASK(9, 0x8),
                },
        },
        [C(ITLB)] = {
                [C(OP_READ)] = {
                        [C(RESULT_ACCESS)]      = XTENSA_PMU_MASK(7, 0x1),
                        [C(RESULT_MISS)]        = XTENSA_PMU_MASK(7, 0x8),
                },
        },
};

static int xtensa_pmu_cache_event(u64 config)
{
        unsigned int cache_type, cache_op, cache_result;
        int ret;

        cache_type = (config >>  0) & 0xff;
        cache_op = (config >>  8) & 0xff;
        cache_result = (config >> 16) & 0xff;

        if (cache_type >= ARRAY_SIZE(xtensa_cache_ctl) ||
            cache_op >= C(OP_MAX) ||
            cache_result >= C(RESULT_MAX))
                return -EINVAL;

        ret = xtensa_cache_ctl[cache_type][cache_op][cache_result];

        if (ret == 0)
                return -EINVAL;

        return ret;
}

static inline uint32_t xtensa_pmu_read_counter(int idx)
{
        return get_er(XTENSA_PMU_PM(idx));
}

static inline void xtensa_pmu_write_counter(int idx, uint32_t v)
{
        set_er(v, XTENSA_PMU_PM(idx));
}

static void xtensa_perf_event_update(struct perf_event *event,
                                     struct hw_perf_event *hwc, int idx)
{
        uint64_t prev_raw_count, new_raw_count;
        int64_t delta;

        do {
                prev_raw_count = local64_read(&hwc->prev_count);
                new_raw_count = xtensa_pmu_read_counter(event->hw.idx);
        } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
                                 new_raw_count) != prev_raw_count);

        delta = (new_raw_count - prev_raw_count) & XTENSA_PMU_COUNTER_MASK;

        local64_add(delta, &event->count);
        local64_sub(delta, &hwc->period_left);
}

static bool xtensa_perf_event_set_period(struct perf_event *event,
                                         struct hw_perf_event *hwc, int idx)
{
        bool rc = false;
        s64 left;

        if (!is_sampling_event(event)) {
                left = XTENSA_PMU_COUNTER_MAX;
        } else {
                s64 period = hwc->sample_period;

                left = local64_read(&hwc->period_left);
                if (left <= -period) {
                        left = period;
                        local64_set(&hwc->period_left, left);
                        hwc->last_period = period;
                        rc = true;
                } else if (left <= 0) {
                        left += period;
                        local64_set(&hwc->period_left, left);
                        hwc->last_period = period;
                        rc = true;
                }
                if (left > XTENSA_PMU_COUNTER_MAX)
                        left = XTENSA_PMU_COUNTER_MAX;
        }

        local64_set(&hwc->prev_count, -left);
        xtensa_pmu_write_counter(idx, -left);
        perf_event_update_userpage(event);

        return rc;
}

static void xtensa_pmu_enable(struct pmu *pmu)
{
        set_er(get_er(XTENSA_PMU_PMG) | XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG);
}

static void xtensa_pmu_disable(struct pmu *pmu)
{
        set_er(get_er(XTENSA_PMU_PMG) & ~XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG);
}

static int xtensa_pmu_event_init(struct perf_event *event)
{
        int ret;

        switch (event->attr.type) {
        case PERF_TYPE_HARDWARE:
                if (event->attr.config >= ARRAY_SIZE(xtensa_hw_ctl) ||
                    xtensa_hw_ctl[event->attr.config] == 0)
                        return -EINVAL;
                event->hw.config = xtensa_hw_ctl[event->attr.config];
                return 0;

        case PERF_TYPE_HW_CACHE:
                ret = xtensa_pmu_cache_event(event->attr.config);
                if (ret < 0)
                        return ret;
                event->hw.config = ret;
                return 0;

        case PERF_TYPE_RAW:
                /* Not 'previous counter' select */
                if ((event->attr.config & XTENSA_PMU_PMCTRL_SELECT) ==
                    (1 << XTENSA_PMU_PMCTRL_SELECT_SHIFT))
                        return -EINVAL;
                event->hw.config = (event->attr.config &
                                    (XTENSA_PMU_PMCTRL_KRNLCNT |
                                     XTENSA_PMU_PMCTRL_TRACELEVEL |
                                     XTENSA_PMU_PMCTRL_SELECT |
                                     XTENSA_PMU_PMCTRL_MASK)) |
                        XTENSA_PMU_PMCTRL_INTEN;
                return 0;

        default:
                return -ENOENT;
        }
}

/*
 * Starts/Stops a counter present on the PMU. The PMI handler
 * should stop the counter when perf_event_overflow() returns
 * !0. ->start() will be used to continue.
 */
static void xtensa_pmu_start(struct perf_event *event, int flags)
{
        struct hw_perf_event *hwc = &event->hw;
        int idx = hwc->idx;

        if (WARN_ON_ONCE(idx == -1))
                return;

        if (flags & PERF_EF_RELOAD) {
                WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
                xtensa_perf_event_set_period(event, hwc, idx);
        }

        hwc->state = 0;

        set_er(hwc->config, XTENSA_PMU_PMCTRL(idx));
}

static void xtensa_pmu_stop(struct perf_event *event, int flags)
{
        struct hw_perf_event *hwc = &event->hw;
        int idx = hwc->idx;

        if (!(hwc->state & PERF_HES_STOPPED)) {
                set_er(0, XTENSA_PMU_PMCTRL(idx));
                set_er(get_er(XTENSA_PMU_PMSTAT(idx)),
                       XTENSA_PMU_PMSTAT(idx));
                hwc->state |= PERF_HES_STOPPED;
        }

        if ((flags & PERF_EF_UPDATE) &&
            !(event->hw.state & PERF_HES_UPTODATE)) {
                xtensa_perf_event_update(event, &event->hw, idx);
                event->hw.state |= PERF_HES_UPTODATE;
        }
}

/*
 * Adds/Removes a counter to/from the PMU, can be done inside
 * a transaction, see the ->*_txn() methods.
 */
static int xtensa_pmu_add(struct perf_event *event, int flags)
{
        struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
        struct hw_perf_event *hwc = &event->hw;
        int idx = hwc->idx;

        if (__test_and_set_bit(idx, ev->used_mask)) {
                idx = find_first_zero_bit(ev->used_mask,
                                          XCHAL_NUM_PERF_COUNTERS);
                if (idx == XCHAL_NUM_PERF_COUNTERS)
                        return -EAGAIN;

                __set_bit(idx, ev->used_mask);
                hwc->idx = idx;
        }
        ev->event[idx] = event;

        hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;

        if (flags & PERF_EF_START)
                xtensa_pmu_start(event, PERF_EF_RELOAD);

        perf_event_update_userpage(event);
        return 0;
}

static void xtensa_pmu_del(struct perf_event *event, int flags)
{
        struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);

        xtensa_pmu_stop(event, PERF_EF_UPDATE);
        __clear_bit(event->hw.idx, ev->used_mask);
        perf_event_update_userpage(event);
}

static void xtensa_pmu_read(struct perf_event *event)
{
        xtensa_perf_event_update(event, &event->hw, event->hw.idx);
}

static int callchain_trace(struct stackframe *frame, void *data)
{
        struct perf_callchain_entry_ctx *entry = data;

        perf_callchain_store(entry, frame->pc);
        return 0;
}

void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
                           struct pt_regs *regs)
{
        xtensa_backtrace_kernel(regs, entry->max_stack,
                                callchain_trace, NULL, entry);
}

void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
                         struct pt_regs *regs)
{
        xtensa_backtrace_user(regs, entry->max_stack,
                              callchain_trace, entry);
}

void perf_event_print_debug(void)
{
        unsigned long flags;
        unsigned i;

        local_irq_save(flags);
        pr_info("CPU#%d: PMG: 0x%08lx\n", smp_processor_id(),
                get_er(XTENSA_PMU_PMG));
        for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i)
                pr_info("PM%d: 0x%08lx, PMCTRL%d: 0x%08lx, PMSTAT%d: 0x%08lx\n",
                        i, get_er(XTENSA_PMU_PM(i)),
                        i, get_er(XTENSA_PMU_PMCTRL(i)),
                        i, get_er(XTENSA_PMU_PMSTAT(i)));
        local_irq_restore(flags);
}

irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id)
{
        irqreturn_t rc = IRQ_NONE;
        struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
        unsigned i;

        for_each_set_bit(i, ev->used_mask, XCHAL_NUM_PERF_COUNTERS) {
                uint32_t v = get_er(XTENSA_PMU_PMSTAT(i));
                struct perf_event *event = ev->event[i];
                struct hw_perf_event *hwc = &event->hw;
                u64 last_period;

                if (!(v & XTENSA_PMU_PMSTAT_OVFL))
                        continue;

                set_er(v, XTENSA_PMU_PMSTAT(i));
                xtensa_perf_event_update(event, hwc, i);
                last_period = hwc->last_period;
                if (xtensa_perf_event_set_period(event, hwc, i)) {
                        struct perf_sample_data data;
                        struct pt_regs *regs = get_irq_regs();

                        perf_sample_data_init(&data, 0, last_period);
                        perf_event_overflow(event, &data, regs);
                }

                rc = IRQ_HANDLED;
        }
        return rc;
}

static struct pmu xtensa_pmu = {
        .pmu_enable = xtensa_pmu_enable,
        .pmu_disable = xtensa_pmu_disable,
        .event_init = xtensa_pmu_event_init,
        .add = xtensa_pmu_add,
        .del = xtensa_pmu_del,
        .start = xtensa_pmu_start,
        .stop = xtensa_pmu_stop,
        .read = xtensa_pmu_read,
};

static int xtensa_pmu_setup(unsigned int cpu)
{
        unsigned i;

        set_er(0, XTENSA_PMU_PMG);
        for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i) {
                set_er(0, XTENSA_PMU_PMCTRL(i));
                set_er(get_er(XTENSA_PMU_PMSTAT(i)), XTENSA_PMU_PMSTAT(i));
        }
        return 0;
}

static int __init xtensa_pmu_init(void)
{
        int ret;
        int irq = irq_create_mapping(NULL, XCHAL_PROFILING_INTERRUPT);

        ret = cpuhp_setup_state(CPUHP_AP_PERF_XTENSA_STARTING,
                                "perf/xtensa:starting", xtensa_pmu_setup,
                                NULL);
        if (ret) {
                pr_err("xtensa_pmu: failed to register CPU-hotplug.\n");
                return ret;
        }
#if XTENSA_FAKE_NMI
        enable_irq(irq);
#else
        ret = request_irq(irq, xtensa_pmu_irq_handler, IRQF_PERCPU,
                          "pmu", NULL);
        if (ret < 0)
                return ret;
#endif

        ret = perf_pmu_register(&xtensa_pmu, "cpu", PERF_TYPE_RAW);
        if (ret)
                free_irq(irq, NULL);

        return ret;
}
early_initcall(xtensa_pmu_init);