root/arch/x86/events/amd/brs.c
// SPDX-License-Identifier: GPL-2.0
/*
 * Implement support for AMD Fam19h Branch Sampling feature
 * Based on specifications published in AMD PPR Fam19 Model 01
 *
 * Copyright 2021 Google LLC
 * Contributed by Stephane Eranian <eranian@google.com>
 */
#include <linux/kernel.h>
#include <linux/jump_label.h>
#include <asm/msr.h>
#include <asm/cpufeature.h>

#include "../perf_event.h"

#define BRS_POISON      0xFFFFFFFFFFFFFFFEULL /* mark limit of valid entries */

/* Debug Extension Configuration register layout */
union amd_debug_extn_cfg {
        __u64 val;
        struct {
                __u64   rsvd0:2,  /* reserved */
                        brsmen:1, /* branch sample enable */
                        rsvd4_3:2,/* reserved - must be 0x3 */
                        vb:1,     /* valid branches recorded */
                        rsvd2:10, /* reserved */
                        msroff:4, /* index of next entry to write */
                        rsvd3:4,  /* reserved */
                        pmc:3,    /* #PMC holding the sampling event */
                        rsvd4:37; /* reserved */
        };
};

static inline unsigned int brs_from(int idx)
{
        return MSR_AMD_SAMP_BR_FROM + 2 * idx;
}

static inline unsigned int brs_to(int idx)
{
        return MSR_AMD_SAMP_BR_FROM + 2 * idx + 1;
}

static __always_inline void set_debug_extn_cfg(u64 val)
{
        /* bits[4:3] must always be set to 11b */
        native_wrmsrq(MSR_AMD_DBG_EXTN_CFG, val | 3ULL << 3);
}

static __always_inline u64 get_debug_extn_cfg(void)
{
        return native_rdmsrq(MSR_AMD_DBG_EXTN_CFG);
}

static bool __init amd_brs_detect(void)
{
        if (!cpu_feature_enabled(X86_FEATURE_BRS))
                return false;

        switch (boot_cpu_data.x86) {
        case 0x19: /* AMD Fam19h (Zen3) */
                x86_pmu.lbr_nr = 16;

                /* No hardware filtering supported */
                x86_pmu.lbr_sel_map = NULL;
                x86_pmu.lbr_sel_mask = 0;
                break;
        default:
                return false;
        }

        return true;
}

/*
 * Current BRS implementation does not support branch type or privilege level
 * filtering. Therefore, this function simply enforces these limitations. No need for
 * a br_sel_map. Software filtering is not supported because it would not correlate well
 * with a sampling period.
 */
static int amd_brs_setup_filter(struct perf_event *event)
{
        u64 type = event->attr.branch_sample_type;

        /* No BRS support */
        if (!x86_pmu.lbr_nr)
                return -EOPNOTSUPP;

        /* Can only capture all branches, i.e., no filtering */
        if ((type & ~PERF_SAMPLE_BRANCH_PLM_ALL) != PERF_SAMPLE_BRANCH_ANY)
                return -EINVAL;

        return 0;
}

static inline int amd_is_brs_event(struct perf_event *e)
{
        return (e->hw.config & AMD64_RAW_EVENT_MASK) == AMD_FAM19H_BRS_EVENT;
}

int amd_brs_hw_config(struct perf_event *event)
{
        int ret = 0;

        /*
         * Due to interrupt holding, BRS is not recommended in
         * counting mode.
         */
        if (!is_sampling_event(event))
                return -EINVAL;

        /*
         * Due to the way BRS operates by holding the interrupt until
         * lbr_nr entries have been captured, it does not make sense
         * to allow sampling on BRS with an event that does not match
         * what BRS is capturing, i.e., retired taken branches.
         * Otherwise the correlation with the event's period is even
         * more loose:
         *
         * With retired taken branch:
         *   Effective P = P + 16 + X
         * With any other event:
         *   Effective P = P + Y + X
         *
         * Where X is the number of taken branches due to interrupt
         * skid. Skid is large.
         *
         * Where Y is the occurrences of the event while BRS is
         * capturing the lbr_nr entries.
         *
         * By using retired taken branches, we limit the impact on the
         * Y variable. We know it cannot be more than the depth of
         * BRS.
         */
        if (!amd_is_brs_event(event))
                return -EINVAL;

        /*
         * BRS implementation does not work with frequency mode
         * reprogramming of the period.
         */
        if (event->attr.freq)
                return -EINVAL;
        /*
         * The kernel subtracts BRS depth from period, so it must
         * be big enough.
         */
        if (event->attr.sample_period <= x86_pmu.lbr_nr)
                return -EINVAL;

        /*
         * Check if we can allow PERF_SAMPLE_BRANCH_STACK
         */
        ret = amd_brs_setup_filter(event);

        /* only set in case of success */
        if (!ret)
                event->hw.flags |= PERF_X86_EVENT_AMD_BRS;

        return ret;
}

/* tos = top of stack, i.e., last valid entry written */
static inline int amd_brs_get_tos(union amd_debug_extn_cfg *cfg)
{
        /*
         * msroff: index of next entry to write so top-of-stack is one off
         * if BRS is full then msroff is set back to 0.
         */
        return (cfg->msroff ? cfg->msroff : x86_pmu.lbr_nr) - 1;
}

/*
 * make sure we have a sane BRS offset to begin with
 * especially with kexec
 */
void amd_brs_reset(void)
{
        if (!cpu_feature_enabled(X86_FEATURE_BRS))
                return;

        /*
         * Reset config
         */
        set_debug_extn_cfg(0);

        /*
         * Mark first entry as poisoned
         */
        wrmsrq(brs_to(0), BRS_POISON);
}

int __init amd_brs_init(void)
{
        if (!amd_brs_detect())
                return -EOPNOTSUPP;

        pr_cont("%d-deep BRS, ", x86_pmu.lbr_nr);

        return 0;
}

void amd_brs_enable(void)
{
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        union amd_debug_extn_cfg cfg;

        /* Activate only on first user */
        if (++cpuc->brs_active > 1)
                return;

        cfg.val    = 0; /* reset all fields */
        cfg.brsmen = 1; /* enable branch sampling */

        /* Set enable bit */
        set_debug_extn_cfg(cfg.val);
}

void amd_brs_enable_all(void)
{
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        if (cpuc->lbr_users)
                amd_brs_enable();
}

void amd_brs_disable(void)
{
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        union amd_debug_extn_cfg cfg;

        /* Check if active (could be disabled via x86_pmu_disable_all()) */
        if (!cpuc->brs_active)
                return;

        /* Only disable for last user */
        if (--cpuc->brs_active)
                return;

        /*
         * Clear the brsmen bit but preserve the others as they contain
         * useful state such as vb and msroff
         */
        cfg.val = get_debug_extn_cfg();

        /*
         * When coming in on interrupt and BRS is full, then hw will have
         * already stopped BRS, no need to issue wrmsr again
         */
        if (cfg.brsmen) {
                cfg.brsmen = 0;
                set_debug_extn_cfg(cfg.val);
        }
}

void amd_brs_disable_all(void)
{
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        if (cpuc->lbr_users)
                amd_brs_disable();
}

static bool amd_brs_match_plm(struct perf_event *event, u64 to)
{
        int type = event->attr.branch_sample_type;
        int plm_k = PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_HV;
        int plm_u = PERF_SAMPLE_BRANCH_USER;

        if (!(type & plm_k) && kernel_ip(to))
                return 0;

        if (!(type & plm_u) && !kernel_ip(to))
                return 0;

        return 1;
}

/*
 * Caller must ensure amd_brs_inuse() is true before calling
 * return:
 */
void amd_brs_drain(void)
{
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        struct perf_event *event = cpuc->events[0];
        struct perf_branch_entry *br = cpuc->lbr_entries;
        union amd_debug_extn_cfg cfg;
        u32 i, nr = 0, num, tos, start;
        u32 shift = 64 - boot_cpu_data.x86_virt_bits;

        /*
         * BRS event forced on PMC0,
         * so check if there is an event.
         * It is possible to have lbr_users > 0 but the event
         * not yet scheduled due to long latency PMU irq
         */
        if (!event)
                goto empty;

        cfg.val = get_debug_extn_cfg();

        /* Sanity check [0-x86_pmu.lbr_nr] */
        if (WARN_ON_ONCE(cfg.msroff >= x86_pmu.lbr_nr))
                goto empty;

        /* No valid branch */
        if (cfg.vb == 0)
                goto empty;

        /*
         * msr.off points to next entry to be written
         * tos = most recent entry index = msr.off - 1
         * BRS register buffer saturates, so we know we have
         * start < tos and that we have to read from start to tos
         */
        start = 0;
        tos = amd_brs_get_tos(&cfg);

        num = tos - start + 1;

        /*
         * BRS is only one pass (saturation) from MSROFF to depth-1
         * MSROFF wraps to zero when buffer is full
         */
        for (i = 0; i < num; i++) {
                u32 brs_idx = tos - i;
                u64 from, to;

                rdmsrq(brs_to(brs_idx), to);

                /* Entry does not belong to us (as marked by kernel) */
                if (to == BRS_POISON)
                        break;

                /*
                 * Sign-extend SAMP_BR_TO to 64 bits, bits 61-63 are reserved.
                 * Necessary to generate proper virtual addresses suitable for
                 * symbolization
                 */
                to = (u64)(((s64)to << shift) >> shift);

                if (!amd_brs_match_plm(event, to))
                        continue;

                rdmsrq(brs_from(brs_idx), from);

                perf_clear_branch_entry_bitfields(br+nr);

                br[nr].from = from;
                br[nr].to   = to;

                nr++;
        }
empty:
        /* Record number of sampled branches */
        cpuc->lbr_stack.nr = nr;
}

/*
 * Poison most recent entry to prevent reuse by next task
 * required because BRS entry are not tagged by PID
 */
static void amd_brs_poison_buffer(void)
{
        union amd_debug_extn_cfg cfg;
        unsigned int idx;

        /* Get current state */
        cfg.val = get_debug_extn_cfg();

        /* idx is most recently written entry */
        idx = amd_brs_get_tos(&cfg);

        /* Poison target of entry */
        wrmsrq(brs_to(idx), BRS_POISON);
}

/*
 * On context switch in, we need to make sure no samples from previous user
 * are left in the BRS.
 *
 * On ctxswin, sched_in = true, called after the PMU has started
 * On ctxswout, sched_in = false, called before the PMU is stopped
 */
void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx,
                            struct task_struct *task, bool sched_in)
{
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);

        /* no active users */
        if (!cpuc->lbr_users)
                return;

        /*
         * On context switch in, we need to ensure we do not use entries
         * from previous BRS user on that CPU, so we poison the buffer as
         * a faster way compared to resetting all entries.
         */
        if (sched_in)
                amd_brs_poison_buffer();
}

/*
 * called from ACPI processor_idle.c or acpi_pad.c
 * with interrupts disabled
 */
void noinstr perf_amd_brs_lopwr_cb(bool lopwr_in)
{
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        union amd_debug_extn_cfg cfg;

        /*
         * on mwait in, we may end up in non C0 state.
         * we must disable branch sampling to avoid holding the NMI
         * for too long. We disable it in hardware but we
         * keep the state in cpuc, so we can re-enable.
         *
         * The hardware will deliver the NMI if needed when brsmen cleared
         */
        if (cpuc->brs_active) {
                cfg.val = get_debug_extn_cfg();
                cfg.brsmen = !lopwr_in;
                set_debug_extn_cfg(cfg.val);
        }
}

DEFINE_STATIC_CALL_NULL(perf_lopwr_cb, perf_amd_brs_lopwr_cb);
EXPORT_STATIC_CALL_TRAMP_GPL(perf_lopwr_cb);

void __init amd_brs_lopwr_init(void)
{
        static_call_update(perf_lopwr_cb, perf_amd_brs_lopwr_cb);
}