root/arch/powerpc/perf/core-fsl-emb.c
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Performance event support - Freescale Embedded Performance Monitor
 *
 * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
 * Copyright 2010 Freescale Semiconductor, Inc.
 */
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/perf_event.h>
#include <linux/percpu.h>
#include <linux/hardirq.h>
#include <asm/reg_fsl_emb.h>
#include <asm/pmc.h>
#include <asm/machdep.h>
#include <asm/firmware.h>
#include <asm/ptrace.h>

struct cpu_hw_events {
        int n_events;
        int disabled;
        u8  pmcs_enabled;
        struct perf_event *event[MAX_HWEVENTS];
};
static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);

static struct fsl_emb_pmu *ppmu;

/* Number of perf_events counting hardware events */
static atomic_t num_events;
/* Used to avoid races in calling reserve/release_pmc_hardware */
static DEFINE_MUTEX(pmc_reserve_mutex);

static void perf_event_interrupt(struct pt_regs *regs);

/*
 * Read one performance monitor counter (PMC).
 */
static unsigned long read_pmc(int idx)
{
        unsigned long val;

        switch (idx) {
        case 0:
                val = mfpmr(PMRN_PMC0);
                break;
        case 1:
                val = mfpmr(PMRN_PMC1);
                break;
        case 2:
                val = mfpmr(PMRN_PMC2);
                break;
        case 3:
                val = mfpmr(PMRN_PMC3);
                break;
        case 4:
                val = mfpmr(PMRN_PMC4);
                break;
        case 5:
                val = mfpmr(PMRN_PMC5);
                break;
        default:
                printk(KERN_ERR "oops trying to read PMC%d\n", idx);
                val = 0;
        }
        return val;
}

/*
 * Write one PMC.
 */
static void write_pmc(int idx, unsigned long val)
{
        switch (idx) {
        case 0:
                mtpmr(PMRN_PMC0, val);
                break;
        case 1:
                mtpmr(PMRN_PMC1, val);
                break;
        case 2:
                mtpmr(PMRN_PMC2, val);
                break;
        case 3:
                mtpmr(PMRN_PMC3, val);
                break;
        case 4:
                mtpmr(PMRN_PMC4, val);
                break;
        case 5:
                mtpmr(PMRN_PMC5, val);
                break;
        default:
                printk(KERN_ERR "oops trying to write PMC%d\n", idx);
        }

        isync();
}

/*
 * Write one local control A register
 */
static void write_pmlca(int idx, unsigned long val)
{
        switch (idx) {
        case 0:
                mtpmr(PMRN_PMLCA0, val);
                break;
        case 1:
                mtpmr(PMRN_PMLCA1, val);
                break;
        case 2:
                mtpmr(PMRN_PMLCA2, val);
                break;
        case 3:
                mtpmr(PMRN_PMLCA3, val);
                break;
        case 4:
                mtpmr(PMRN_PMLCA4, val);
                break;
        case 5:
                mtpmr(PMRN_PMLCA5, val);
                break;
        default:
                printk(KERN_ERR "oops trying to write PMLCA%d\n", idx);
        }

        isync();
}

/*
 * Write one local control B register
 */
static void write_pmlcb(int idx, unsigned long val)
{
        switch (idx) {
        case 0:
                mtpmr(PMRN_PMLCB0, val);
                break;
        case 1:
                mtpmr(PMRN_PMLCB1, val);
                break;
        case 2:
                mtpmr(PMRN_PMLCB2, val);
                break;
        case 3:
                mtpmr(PMRN_PMLCB3, val);
                break;
        case 4:
                mtpmr(PMRN_PMLCB4, val);
                break;
        case 5:
                mtpmr(PMRN_PMLCB5, val);
                break;
        default:
                printk(KERN_ERR "oops trying to write PMLCB%d\n", idx);
        }

        isync();
}

static void fsl_emb_pmu_read(struct perf_event *event)
{
        s64 val, delta, prev;

        if (event->hw.state & PERF_HES_STOPPED)
                return;

        /*
         * Performance monitor interrupts come even when interrupts
         * are soft-disabled, as long as interrupts are hard-enabled.
         * Therefore we treat them like NMIs.
         */
        do {
                prev = local64_read(&event->hw.prev_count);
                barrier();
                val = read_pmc(event->hw.idx);
        } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);

        /* The counters are only 32 bits wide */
        delta = (val - prev) & 0xfffffffful;
        local64_add(delta, &event->count);
        local64_sub(delta, &event->hw.period_left);
}

/*
 * Disable all events to prevent PMU interrupts and to allow
 * events to be added or removed.
 */
static void fsl_emb_pmu_disable(struct pmu *pmu)
{
        struct cpu_hw_events *cpuhw;
        unsigned long flags;

        local_irq_save(flags);
        cpuhw = this_cpu_ptr(&cpu_hw_events);

        if (!cpuhw->disabled) {
                cpuhw->disabled = 1;

                /*
                 * Check if we ever enabled the PMU on this cpu.
                 */
                if (!cpuhw->pmcs_enabled) {
                        ppc_enable_pmcs();
                        cpuhw->pmcs_enabled = 1;
                }

                if (atomic_read(&num_events)) {
                        /*
                         * Set the 'freeze all counters' bit, and disable
                         * interrupts.  The barrier is to make sure the
                         * mtpmr has been executed and the PMU has frozen
                         * the events before we return.
                         */

                        mtpmr(PMRN_PMGC0, PMGC0_FAC);
                        isync();
                }
        }
        local_irq_restore(flags);
}

/*
 * Re-enable all events if disable == 0.
 * If we were previously disabled and events were added, then
 * put the new config on the PMU.
 */
static void fsl_emb_pmu_enable(struct pmu *pmu)
{
        struct cpu_hw_events *cpuhw;
        unsigned long flags;

        local_irq_save(flags);
        cpuhw = this_cpu_ptr(&cpu_hw_events);
        if (!cpuhw->disabled)
                goto out;

        cpuhw->disabled = 0;
        ppc_set_pmu_inuse(cpuhw->n_events != 0);

        if (cpuhw->n_events > 0) {
                mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE);
                isync();
        }

 out:
        local_irq_restore(flags);
}

static int collect_events(struct perf_event *group, int max_count,
                          struct perf_event *ctrs[])
{
        int n = 0;
        struct perf_event *event;

        if (!is_software_event(group)) {
                if (n >= max_count)
                        return -1;
                ctrs[n] = group;
                n++;
        }
        for_each_sibling_event(event, group) {
                if (!is_software_event(event) &&
                    event->state != PERF_EVENT_STATE_OFF) {
                        if (n >= max_count)
                                return -1;
                        ctrs[n] = event;
                        n++;
                }
        }
        return n;
}

/* context locked on entry */
static int fsl_emb_pmu_add(struct perf_event *event, int flags)
{
        struct cpu_hw_events *cpuhw;
        int ret = -EAGAIN;
        int num_counters = ppmu->n_counter;
        u64 val;
        int i;

        perf_pmu_disable(event->pmu);
        cpuhw = &get_cpu_var(cpu_hw_events);

        if (event->hw.config & FSL_EMB_EVENT_RESTRICTED)
                num_counters = ppmu->n_restricted;

        /*
         * Allocate counters from top-down, so that restricted-capable
         * counters are kept free as long as possible.
         */
        for (i = num_counters - 1; i >= 0; i--) {
                if (cpuhw->event[i])
                        continue;

                break;
        }

        if (i < 0)
                goto out;

        event->hw.idx = i;
        cpuhw->event[i] = event;
        ++cpuhw->n_events;

        val = 0;
        if (event->hw.sample_period) {
                s64 left = local64_read(&event->hw.period_left);
                if (left < 0x80000000L)
                        val = 0x80000000L - left;
        }
        local64_set(&event->hw.prev_count, val);

        if (unlikely(!(flags & PERF_EF_START))) {
                event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
                val = 0;
        } else {
                event->hw.state &= ~(PERF_HES_STOPPED | PERF_HES_UPTODATE);
        }

        write_pmc(i, val);
        perf_event_update_userpage(event);

        write_pmlcb(i, event->hw.config >> 32);
        write_pmlca(i, event->hw.config_base);

        ret = 0;
 out:
        put_cpu_var(cpu_hw_events);
        perf_pmu_enable(event->pmu);
        return ret;
}

/* context locked on entry */
static void fsl_emb_pmu_del(struct perf_event *event, int flags)
{
        struct cpu_hw_events *cpuhw;
        int i = event->hw.idx;

        perf_pmu_disable(event->pmu);
        if (i < 0)
                goto out;

        fsl_emb_pmu_read(event);

        cpuhw = &get_cpu_var(cpu_hw_events);

        WARN_ON(event != cpuhw->event[event->hw.idx]);

        write_pmlca(i, 0);
        write_pmlcb(i, 0);
        write_pmc(i, 0);

        cpuhw->event[i] = NULL;
        event->hw.idx = -1;

        /*
         * TODO: if at least one restricted event exists, and we
         * just freed up a non-restricted-capable counter, and
         * there is a restricted-capable counter occupied by
         * a non-restricted event, migrate that event to the
         * vacated counter.
         */

        cpuhw->n_events--;

 out:
        perf_pmu_enable(event->pmu);
        put_cpu_var(cpu_hw_events);
}

static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags)
{
        unsigned long flags;
        unsigned long val;
        s64 left;

        if (event->hw.idx < 0 || !event->hw.sample_period)
                return;

        if (!(event->hw.state & PERF_HES_STOPPED))
                return;

        if (ef_flags & PERF_EF_RELOAD)
                WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));

        local_irq_save(flags);
        perf_pmu_disable(event->pmu);

        event->hw.state = 0;
        left = local64_read(&event->hw.period_left);
        val = 0;
        if (left < 0x80000000L)
                val = 0x80000000L - left;
        write_pmc(event->hw.idx, val);

        perf_event_update_userpage(event);
        perf_pmu_enable(event->pmu);
        local_irq_restore(flags);
}

static void fsl_emb_pmu_stop(struct perf_event *event, int ef_flags)
{
        unsigned long flags;

        if (event->hw.idx < 0 || !event->hw.sample_period)
                return;

        if (event->hw.state & PERF_HES_STOPPED)
                return;

        local_irq_save(flags);
        perf_pmu_disable(event->pmu);

        fsl_emb_pmu_read(event);
        event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
        write_pmc(event->hw.idx, 0);

        perf_event_update_userpage(event);
        perf_pmu_enable(event->pmu);
        local_irq_restore(flags);
}

/*
 * Release the PMU if this is the last perf_event.
 */
static void hw_perf_event_destroy(struct perf_event *event)
{
        if (!atomic_add_unless(&num_events, -1, 1)) {
                mutex_lock(&pmc_reserve_mutex);
                if (atomic_dec_return(&num_events) == 0)
                        release_pmc_hardware();
                mutex_unlock(&pmc_reserve_mutex);
        }
}

/*
 * Translate a generic cache event_id config to a raw event_id code.
 */
static int hw_perf_cache_event(u64 config, u64 *eventp)
{
        unsigned long type, op, result;
        int ev;

        if (!ppmu->cache_events)
                return -EINVAL;

        /* unpack config */
        type = config & 0xff;
        op = (config >> 8) & 0xff;
        result = (config >> 16) & 0xff;

        if (type >= PERF_COUNT_HW_CACHE_MAX ||
            op >= PERF_COUNT_HW_CACHE_OP_MAX ||
            result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
                return -EINVAL;

        ev = (*ppmu->cache_events)[type][op][result];
        if (ev == 0)
                return -EOPNOTSUPP;
        if (ev == -1)
                return -EINVAL;
        *eventp = ev;
        return 0;
}

static int fsl_emb_pmu_event_init(struct perf_event *event)
{
        u64 ev;
        struct perf_event *events[MAX_HWEVENTS];
        int n;
        int err;
        int num_restricted;
        int i;

        if (ppmu->n_counter > MAX_HWEVENTS) {
                WARN(1, "No. of perf counters (%d) is higher than max array size(%d)\n",
                        ppmu->n_counter, MAX_HWEVENTS);
                ppmu->n_counter = MAX_HWEVENTS;
        }

        switch (event->attr.type) {
        case PERF_TYPE_HARDWARE:
                ev = event->attr.config;
                if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
                        return -EOPNOTSUPP;
                ev = ppmu->generic_events[ev];
                break;

        case PERF_TYPE_HW_CACHE:
                err = hw_perf_cache_event(event->attr.config, &ev);
                if (err)
                        return err;
                break;

        case PERF_TYPE_RAW:
                ev = event->attr.config;
                break;

        default:
                return -ENOENT;
        }

        event->hw.config = ppmu->xlate_event(ev);
        if (!(event->hw.config & FSL_EMB_EVENT_VALID))
                return -EINVAL;

        /*
         * If this is in a group, check if it can go on with all the
         * other hardware events in the group.  We assume the event
         * hasn't been linked into its leader's sibling list at this point.
         */
        n = 0;
        if (event->group_leader != event) {
                n = collect_events(event->group_leader,
                                   ppmu->n_counter - 1, events);
                if (n < 0)
                        return -EINVAL;
        }

        if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) {
                num_restricted = 0;
                for (i = 0; i < n; i++) {
                        if (events[i]->hw.config & FSL_EMB_EVENT_RESTRICTED)
                                num_restricted++;
                }

                if (num_restricted >= ppmu->n_restricted)
                        return -EINVAL;
        }

        event->hw.idx = -1;

        event->hw.config_base = PMLCA_CE | PMLCA_FCM1 |
                                (u32)((ev << 16) & PMLCA_EVENT_MASK);

        if (event->attr.exclude_user)
                event->hw.config_base |= PMLCA_FCU;
        if (event->attr.exclude_kernel)
                event->hw.config_base |= PMLCA_FCS;
        if (event->attr.exclude_idle)
                return -ENOTSUPP;

        event->hw.last_period = event->hw.sample_period;
        local64_set(&event->hw.period_left, event->hw.last_period);

        /*
         * See if we need to reserve the PMU.
         * If no events are currently in use, then we have to take a
         * mutex to ensure that we don't race with another task doing
         * reserve_pmc_hardware or release_pmc_hardware.
         */
        err = 0;
        if (!atomic_inc_not_zero(&num_events)) {
                mutex_lock(&pmc_reserve_mutex);
                if (atomic_read(&num_events) == 0 &&
                    reserve_pmc_hardware(perf_event_interrupt))
                        err = -EBUSY;
                else
                        atomic_inc(&num_events);
                mutex_unlock(&pmc_reserve_mutex);

                mtpmr(PMRN_PMGC0, PMGC0_FAC);
                isync();
        }
        event->destroy = hw_perf_event_destroy;

        return err;
}

static struct pmu fsl_emb_pmu = {
        .pmu_enable     = fsl_emb_pmu_enable,
        .pmu_disable    = fsl_emb_pmu_disable,
        .event_init     = fsl_emb_pmu_event_init,
        .add            = fsl_emb_pmu_add,
        .del            = fsl_emb_pmu_del,
        .start          = fsl_emb_pmu_start,
        .stop           = fsl_emb_pmu_stop,
        .read           = fsl_emb_pmu_read,
};

/*
 * A counter has overflowed; update its count and record
 * things if requested.  Note that interrupts are hard-disabled
 * here so there is no possibility of being interrupted.
 */
static void record_and_restart(struct perf_event *event, unsigned long val,
                               struct pt_regs *regs)
{
        u64 period = event->hw.sample_period;
        const u64 last_period = event->hw.last_period;
        s64 prev, delta, left;
        int record = 0;

        if (event->hw.state & PERF_HES_STOPPED) {
                write_pmc(event->hw.idx, 0);
                return;
        }

        /* we don't have to worry about interrupts here */
        prev = local64_read(&event->hw.prev_count);
        delta = (val - prev) & 0xfffffffful;
        local64_add(delta, &event->count);

        /*
         * See if the total period for this event has expired,
         * and update for the next period.
         */
        val = 0;
        left = local64_read(&event->hw.period_left) - delta;
        if (period) {
                if (left <= 0) {
                        left += period;
                        if (left <= 0)
                                left = period;
                        record = 1;
                        event->hw.last_period = event->hw.sample_period;
                }
                if (left < 0x80000000LL)
                        val = 0x80000000LL - left;
        }

        write_pmc(event->hw.idx, val);
        local64_set(&event->hw.prev_count, val);
        local64_set(&event->hw.period_left, left);
        perf_event_update_userpage(event);

        /*
         * Finally record data if requested.
         */
        if (record) {
                struct perf_sample_data data;

                perf_sample_data_init(&data, 0, last_period);

                perf_event_overflow(event, &data, regs);
        }
}

static void perf_event_interrupt(struct pt_regs *regs)
{
        int i;
        struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
        struct perf_event *event;
        unsigned long val;

        for (i = 0; i < ppmu->n_counter; ++i) {
                event = cpuhw->event[i];

                val = read_pmc(i);
                if ((int)val < 0) {
                        if (event) {
                                /* event has overflowed */
                                record_and_restart(event, val, regs);
                        } else {
                                /*
                                 * Disabled counter is negative,
                                 * reset it just in case.
                                 */
                                write_pmc(i, 0);
                        }
                }
        }

        /* PMM will keep counters frozen until we return from the interrupt. */
        mtmsr(mfmsr() | MSR_PMM);
        mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE);
        isync();
}

static int fsl_emb_pmu_prepare_cpu(unsigned int cpu)
{
        struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);

        memset(cpuhw, 0, sizeof(*cpuhw));

        return 0;
}

int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
{
        if (ppmu)
                return -EBUSY;          /* something's already registered */

        ppmu = pmu;
        pr_info("%s performance monitor hardware support registered\n",
                pmu->name);

        perf_pmu_register(&fsl_emb_pmu, "cpu", PERF_TYPE_RAW);
        cpuhp_setup_state(CPUHP_PERF_POWER, "perf/powerpc:prepare",
                          fsl_emb_pmu_prepare_cpu, NULL);

        return 0;
}