root/arch/x86/events/intel/ds.c
// SPDX-License-Identifier: GPL-2.0
#include <linux/bitops.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/sched/clock.h>

#include <asm/cpu_entry_area.h>
#include <asm/debugreg.h>
#include <asm/perf_event.h>
#include <asm/tlbflush.h>
#include <asm/insn.h>
#include <asm/io.h>
#include <asm/msr.h>
#include <asm/timer.h>

#include "../perf_event.h"

/* Waste a full page so it can be mapped into the cpu_entry_area */
DEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);

/* The size of a BTS record in bytes: */
#define BTS_RECORD_SIZE         24

#define PEBS_FIXUP_SIZE         PAGE_SIZE

/*
 * pebs_record_32 for p4 and core not supported

struct pebs_record_32 {
        u32 flags, ip;
        u32 ax, bc, cx, dx;
        u32 si, di, bp, sp;
};

 */

union omr_encoding {
        struct {
                u8 omr_source : 4;
                u8 omr_remote : 1;
                u8 omr_hitm : 1;
                u8 omr_snoop : 1;
                u8 omr_promoted : 1;
        };
        u8 omr_full;
};

union intel_x86_pebs_dse {
        u64 val;
        struct {
                unsigned int ld_dse:4;
                unsigned int ld_stlb_miss:1;
                unsigned int ld_locked:1;
                unsigned int ld_data_blk:1;
                unsigned int ld_addr_blk:1;
                unsigned int ld_reserved:24;
        };
        struct {
                unsigned int st_l1d_hit:1;
                unsigned int st_reserved1:3;
                unsigned int st_stlb_miss:1;
                unsigned int st_locked:1;
                unsigned int st_reserved2:26;
        };
        struct {
                unsigned int st_lat_dse:4;
                unsigned int st_lat_stlb_miss:1;
                unsigned int st_lat_locked:1;
                unsigned int ld_reserved3:26;
        };
        struct {
                unsigned int mtl_dse:5;
                unsigned int mtl_locked:1;
                unsigned int mtl_stlb_miss:1;
                unsigned int mtl_fwd_blk:1;
                unsigned int ld_reserved4:24;
        };
        struct {
                unsigned int lnc_dse:8;
                unsigned int ld_reserved5:2;
                unsigned int lnc_stlb_miss:1;
                unsigned int lnc_locked:1;
                unsigned int lnc_data_blk:1;
                unsigned int lnc_addr_blk:1;
                unsigned int ld_reserved6:18;
        };
        struct {
                unsigned int pnc_dse: 8;
                unsigned int pnc_l2_miss:1;
                unsigned int pnc_stlb_clean_hit:1;
                unsigned int pnc_stlb_any_hit:1;
                unsigned int pnc_stlb_miss:1;
                unsigned int pnc_locked:1;
                unsigned int pnc_data_blk:1;
                unsigned int pnc_addr_blk:1;
                unsigned int pnc_fb_full:1;
                unsigned int ld_reserved8:16;
        };
        struct {
                unsigned int arw_dse:8;
                unsigned int arw_l2_miss:1;
                unsigned int arw_xq_promotion:1;
                unsigned int arw_reissue:1;
                unsigned int arw_stlb_miss:1;
                unsigned int arw_locked:1;
                unsigned int arw_data_blk:1;
                unsigned int arw_addr_blk:1;
                unsigned int arw_fb_full:1;
                unsigned int ld_reserved9:16;
        };
};


/*
 * Map PEBS Load Latency Data Source encodings to generic
 * memory data source information
 */
#define P(a, b) PERF_MEM_S(a, b)
#define OP_LH (P(OP, LOAD) | P(LVL, HIT))
#define LEVEL(x) P(LVLNUM, x)
#define REM P(REMOTE, REMOTE)
#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))

/* Version for Sandy Bridge and later */
static u64 pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX] = {
        P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
        OP_LH | P(LVL, L1)  | LEVEL(L1) | P(SNOOP, NONE),  /* 0x01: L1 local */
        OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
        OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, NONE),  /* 0x03: L2 hit */
        OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, NONE),  /* 0x04: L3 hit */
        OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, MISS),  /* 0x05: L3 hit, snoop miss */
        OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, HIT),   /* 0x06: L3 hit, snoop hit */
        OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, HITM),  /* 0x07: L3 hit, snoop hitm */
        OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HIT),  /* 0x08: L3 miss snoop hit */
        OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
        OP_LH | P(LVL, LOC_RAM)  | LEVEL(RAM) | P(SNOOP, HIT),       /* 0x0a: L3 miss, shared */
        OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT),  /* 0x0b: L3 miss, shared */
        OP_LH | P(LVL, LOC_RAM)  | LEVEL(RAM) | SNOOP_NONE_MISS,     /* 0x0c: L3 miss, excl */
        OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* 0x0d: L3 miss, excl */
        OP_LH | P(LVL, IO)  | LEVEL(NA) | P(SNOOP, NONE), /* 0x0e: I/O */
        OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0f: uncached */
};

/* Patch up minor differences in the bits */
void __init intel_pmu_pebs_data_source_nhm(void)
{
        pebs_data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
        pebs_data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
        pebs_data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
}

static void __init __intel_pmu_pebs_data_source_skl(bool pmem, u64 *data_source)
{
        u64 pmem_or_l4 = pmem ? LEVEL(PMEM) : LEVEL(L4);

        data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT);
        data_source[0x09] = OP_LH | pmem_or_l4 | REM | P(SNOOP, HIT);
        data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE);
        data_source[0x0c] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOPX, FWD);
        data_source[0x0d] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOP, HITM);
}

void __init intel_pmu_pebs_data_source_skl(bool pmem)
{
        __intel_pmu_pebs_data_source_skl(pmem, pebs_data_source);
}

static void __init __intel_pmu_pebs_data_source_grt(u64 *data_source)
{
        data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
        data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
        data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD);
}

void __init intel_pmu_pebs_data_source_grt(void)
{
        __intel_pmu_pebs_data_source_grt(pebs_data_source);
}

void __init intel_pmu_pebs_data_source_adl(void)
{
        u64 *data_source;

        data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source;
        memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
        __intel_pmu_pebs_data_source_skl(false, data_source);

        data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
        memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
        __intel_pmu_pebs_data_source_grt(data_source);
}

static void __init __intel_pmu_pebs_data_source_cmt(u64 *data_source)
{
        data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD);
        data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
        data_source[0x0a] = OP_LH | P(LVL, LOC_RAM)  | LEVEL(RAM) | P(SNOOP, NONE);
        data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE);
        data_source[0x0c] = OP_LH | LEVEL(RAM) | REM | P(SNOOPX, FWD);
        data_source[0x0d] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, HITM);
}

void __init intel_pmu_pebs_data_source_mtl(void)
{
        u64 *data_source;

        data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source;
        memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
        __intel_pmu_pebs_data_source_skl(false, data_source);

        data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
        memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
        __intel_pmu_pebs_data_source_cmt(data_source);
}

void __init intel_pmu_pebs_data_source_arl_h(void)
{
        u64 *data_source;

        intel_pmu_pebs_data_source_lnl();

        data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_TINY_IDX].pebs_data_source;
        memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
        __intel_pmu_pebs_data_source_cmt(data_source);
}

void __init intel_pmu_pebs_data_source_cmt(void)
{
        __intel_pmu_pebs_data_source_cmt(pebs_data_source);
}

/* Version for Lion Cove and later */
static u64 lnc_pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX] = {
        P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),  /* 0x00: ukn L3 */
        OP_LH | P(LVL, L1)  | LEVEL(L1) | P(SNOOP, NONE),       /* 0x01: L1 hit */
        OP_LH | P(LVL, L1)  | LEVEL(L1) | P(SNOOP, NONE),       /* 0x02: L1 hit */
        OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE),      /* 0x03: LFB/L1 Miss Handling Buffer hit */
        0,                                                      /* 0x04: Reserved */
        OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, NONE),       /* 0x05: L2 Hit */
        OP_LH | LEVEL(L2_MHB) | P(SNOOP, NONE),                 /* 0x06: L2 Miss Handling Buffer Hit */
        0,                                                      /* 0x07: Reserved */
        OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, NONE),       /* 0x08: L3 Hit */
        0,                                                      /* 0x09: Reserved */
        0,                                                      /* 0x0a: Reserved */
        0,                                                      /* 0x0b: Reserved */
        OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOPX, FWD),       /* 0x0c: L3 Hit Snoop Fwd */
        OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, HITM),       /* 0x0d: L3 Hit Snoop HitM */
        0,                                                      /* 0x0e: Reserved */
        P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, HITM),  /* 0x0f: L3 Miss Snoop HitM */
        OP_LH | LEVEL(MSC) | P(SNOOP, NONE),                    /* 0x10: Memory-side Cache Hit */
        OP_LH | P(LVL, LOC_RAM)  | LEVEL(RAM) | P(SNOOP, NONE), /* 0x11: Local Memory Hit */
};

void __init intel_pmu_pebs_data_source_lnl(void)
{
        u64 *data_source;

        data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source;
        memcpy(data_source, lnc_pebs_data_source, sizeof(lnc_pebs_data_source));

        data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
        memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
        __intel_pmu_pebs_data_source_cmt(data_source);
}

/* Version for Panthercove and later */

/* L2 hit */
#define PNC_PEBS_DATA_SOURCE_MAX        16
static u64 pnc_pebs_l2_hit_data_source[PNC_PEBS_DATA_SOURCE_MAX] = {
        P(OP, LOAD) | P(LVL, NA) | LEVEL(NA) | P(SNOOP, NA),    /* 0x00: non-cache access */
        OP_LH               | LEVEL(L0) | P(SNOOP, NONE),       /* 0x01: L0 hit */
        OP_LH | P(LVL, L1)  | LEVEL(L1) | P(SNOOP, NONE),       /* 0x02: L1 hit */
        OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE),      /* 0x03: L1 Miss Handling Buffer hit */
        OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, NONE),       /* 0x04: L2 Hit Clean */
        0,                                                      /* 0x05: Reserved */
        0,                                                      /* 0x06: Reserved */
        OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, HIT),        /* 0x07: L2 Hit Snoop HIT */
        OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, HITM),       /* 0x08: L2 Hit Snoop Hit Modified */
        OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, MISS),       /* 0x09: Prefetch Promotion */
        OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, MISS),       /* 0x0a: Cross Core Prefetch Promotion */
        0,                                                      /* 0x0b: Reserved */
        0,                                                      /* 0x0c: Reserved */
        0,                                                      /* 0x0d: Reserved */
        0,                                                      /* 0x0e: Reserved */
        OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE),       /* 0x0f: uncached */
};

/* Version for Arctic Wolf and later */

/* L2 hit */
#define ARW_PEBS_DATA_SOURCE_MAX        16
static u64 arw_pebs_l2_hit_data_source[ARW_PEBS_DATA_SOURCE_MAX] = {
        P(OP, LOAD) | P(LVL, NA) | LEVEL(NA) | P(SNOOP, NA),    /* 0x00: non-cache access */
        OP_LH | P(LVL, L1)  | LEVEL(L1) | P(SNOOP, NONE),       /* 0x01: L1 hit */
        OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE),      /* 0x02: WCB Hit */
        OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, NONE),       /* 0x03: L2 Hit Clean */
        OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, HIT),        /* 0x04: L2 Hit Snoop HIT */
        OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, HITM),       /* 0x05: L2 Hit Snoop Hit Modified */
        OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE),       /* 0x06: uncached */
        0,                                                      /* 0x07: Reserved */
        0,                                                      /* 0x08: Reserved */
        0,                                                      /* 0x09: Reserved */
        0,                                                      /* 0x0a: Reserved */
        0,                                                      /* 0x0b: Reserved */
        0,                                                      /* 0x0c: Reserved */
        0,                                                      /* 0x0d: Reserved */
        0,                                                      /* 0x0e: Reserved */
        0,                                                      /* 0x0f: Reserved */
};

/* L2 miss */
#define OMR_DATA_SOURCE_MAX             16
static u64 omr_data_source[OMR_DATA_SOURCE_MAX] = {
        P(OP, LOAD) | P(LVL, NA) | LEVEL(NA) | P(SNOOP, NA),    /* 0x00: invalid */
        0,                                                      /* 0x01: Reserved */
        OP_LH | P(LVL, L3) | LEVEL(L3) | P(REGION, L_SHARE),    /* 0x02: local CA shared cache */
        OP_LH | P(LVL, L3) | LEVEL(L3) | P(REGION, L_NON_SHARE),/* 0x03: local CA non-shared cache */
        OP_LH | P(LVL, L3) | LEVEL(L3) | P(REGION, O_IO),       /* 0x04: other CA IO agent */
        OP_LH | P(LVL, L3) | LEVEL(L3) | P(REGION, O_SHARE),    /* 0x05: other CA shared cache */
        OP_LH | P(LVL, L3) | LEVEL(L3) | P(REGION, O_NON_SHARE),/* 0x06: other CA non-shared cache */
        OP_LH | LEVEL(RAM) | P(REGION, MMIO),                   /* 0x07: MMIO */
        OP_LH | LEVEL(RAM) | P(REGION, MEM0),                   /* 0x08: Memory region 0 */
        OP_LH | LEVEL(RAM) | P(REGION, MEM1),                   /* 0x09: Memory region 1 */
        OP_LH | LEVEL(RAM) | P(REGION, MEM2),                   /* 0x0a: Memory region 2 */
        OP_LH | LEVEL(RAM) | P(REGION, MEM3),                   /* 0x0b: Memory region 3 */
        OP_LH | LEVEL(RAM) | P(REGION, MEM4),                   /* 0x0c: Memory region 4 */
        OP_LH | LEVEL(RAM) | P(REGION, MEM5),                   /* 0x0d: Memory region 5 */
        OP_LH | LEVEL(RAM) | P(REGION, MEM6),                   /* 0x0e: Memory region 6 */
        OP_LH | LEVEL(RAM) | P(REGION, MEM7),                   /* 0x0f: Memory region 7 */
};

static u64 parse_omr_data_source(u8 dse)
{
        union omr_encoding omr;
        u64 val = 0;

        omr.omr_full = dse;
        val = omr_data_source[omr.omr_source];
        if (omr.omr_source > 0x1 && omr.omr_source < 0x7)
                val |= omr.omr_remote ? P(LVL, REM_CCE1) : 0;
        else if (omr.omr_source > 0x7)
                val |= omr.omr_remote ? P(LVL, REM_RAM1) : P(LVL, LOC_RAM);

        if (omr.omr_remote)
                val |= REM;

        if (omr.omr_source == 0x2) {
                u8 snoop = omr.omr_snoop | (omr.omr_promoted << 1);

                if (omr.omr_hitm)
                        val |= P(SNOOP, HITM);
                else if (snoop == 0x0)
                        val |= P(SNOOP, NA);
                else if (snoop == 0x1)
                        val |= P(SNOOP, MISS);
                else if (snoop == 0x2)
                        val |= P(SNOOP, HIT);
                else if (snoop == 0x3)
                        val |= P(SNOOP, NONE);
        } else if (omr.omr_source > 0x2 && omr.omr_source < 0x7) {
                val |= omr.omr_hitm ? P(SNOOP, HITM) : P(SNOOP, HIT);
                val |= omr.omr_snoop ? P(SNOOPX, FWD) : 0;
        } else {
                val |= P(SNOOP, NONE);
        }

        return val;
}

static u64 precise_store_data(u64 status)
{
        union intel_x86_pebs_dse dse;
        u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);

        dse.val = status;

        /*
         * bit 4: TLB access
         * 1 = stored missed 2nd level TLB
         *
         * so it either hit the walker or the OS
         * otherwise hit 2nd level TLB
         */
        if (dse.st_stlb_miss)
                val |= P(TLB, MISS);
        else
                val |= P(TLB, HIT);

        /*
         * bit 0: hit L1 data cache
         * if not set, then all we know is that
         * it missed L1D
         */
        if (dse.st_l1d_hit)
                val |= P(LVL, HIT);
        else
                val |= P(LVL, MISS);

        /*
         * bit 5: Locked prefix
         */
        if (dse.st_locked)
                val |= P(LOCK, LOCKED);

        return val;
}

static u64 precise_datala_hsw(struct perf_event *event, u64 status)
{
        union perf_mem_data_src dse;

        dse.val = PERF_MEM_NA;

        if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
                dse.mem_op = PERF_MEM_OP_STORE;
        else if (event->hw.flags & PERF_X86_EVENT_PEBS_LD_HSW)
                dse.mem_op = PERF_MEM_OP_LOAD;

        /*
         * L1 info only valid for following events:
         *
         * MEM_UOPS_RETIRED.STLB_MISS_STORES
         * MEM_UOPS_RETIRED.LOCK_STORES
         * MEM_UOPS_RETIRED.SPLIT_STORES
         * MEM_UOPS_RETIRED.ALL_STORES
         */
        if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) {
                if (status & 1)
                        dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
                else
                        dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
        }
        return dse.val;
}

static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock)
{
        /*
         * TLB access
         * 0 = did not miss 2nd level TLB
         * 1 = missed 2nd level TLB
         */
        if (tlb)
                *val |= P(TLB, MISS) | P(TLB, L2);
        else
                *val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);

        /* locked prefix */
        if (lock)
                *val |= P(LOCK, LOCKED);
}

/* Retrieve the latency data for e-core of ADL */
static u64 __grt_latency_data(struct perf_event *event, u64 status,
                               u8 dse, bool tlb, bool lock, bool blk)
{
        u64 val;

        WARN_ON_ONCE(is_hybrid() &&
                     hybrid_pmu(event->pmu)->pmu_type == hybrid_big);

        dse &= PERF_PEBS_DATA_SOURCE_GRT_MASK;
        val = hybrid_var(event->pmu, pebs_data_source)[dse];

        pebs_set_tlb_lock(&val, tlb, lock);

        if (blk)
                val |= P(BLK, DATA);
        else
                val |= P(BLK, NA);

        return val;
}

u64 grt_latency_data(struct perf_event *event, u64 status)
{
        union intel_x86_pebs_dse dse;

        dse.val = status;

        return __grt_latency_data(event, status, dse.ld_dse,
                                  dse.ld_locked, dse.ld_stlb_miss,
                                  dse.ld_data_blk);
}

/* Retrieve the latency data for e-core of MTL */
u64 cmt_latency_data(struct perf_event *event, u64 status)
{
        union intel_x86_pebs_dse dse;

        dse.val = status;

        return __grt_latency_data(event, status, dse.mtl_dse,
                                  dse.mtl_stlb_miss, dse.mtl_locked,
                                  dse.mtl_fwd_blk);
}

static u64 arw_latency_data(struct perf_event *event, u64 status)
{
        union intel_x86_pebs_dse dse;
        union perf_mem_data_src src;
        u64 val;

        dse.val = status;

        if (!dse.arw_l2_miss)
                val = arw_pebs_l2_hit_data_source[dse.arw_dse & 0xf];
        else
                val = parse_omr_data_source(dse.arw_dse);

        if (!val)
                val = P(OP, LOAD) | LEVEL(NA) | P(SNOOP, NA);

        if (dse.arw_stlb_miss)
                val |= P(TLB, MISS) | P(TLB, L2);
        else
                val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);

        if (dse.arw_locked)
                val |= P(LOCK, LOCKED);

        if (dse.arw_data_blk)
                val |= P(BLK, DATA);
        if (dse.arw_addr_blk)
                val |= P(BLK, ADDR);
        if (!dse.arw_data_blk && !dse.arw_addr_blk)
                val |= P(BLK, NA);

        src.val = val;
        if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
                src.mem_op = P(OP, STORE);

        return src.val;
}

static u64 lnc_latency_data(struct perf_event *event, u64 status)
{
        union intel_x86_pebs_dse dse;
        union perf_mem_data_src src;
        u64 val;

        dse.val = status;

        /* LNC core latency data */
        val = hybrid_var(event->pmu, pebs_data_source)[status & PERF_PEBS_DATA_SOURCE_MASK];
        if (!val)
                val = P(OP, LOAD) | LEVEL(NA) | P(SNOOP, NA);

        if (dse.lnc_stlb_miss)
                val |= P(TLB, MISS) | P(TLB, L2);
        else
                val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);

        if (dse.lnc_locked)
                val |= P(LOCK, LOCKED);

        if (dse.lnc_data_blk)
                val |= P(BLK, DATA);
        if (dse.lnc_addr_blk)
                val |= P(BLK, ADDR);
        if (!dse.lnc_data_blk && !dse.lnc_addr_blk)
                val |= P(BLK, NA);

        src.val = val;
        if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
                src.mem_op = P(OP, STORE);

        return src.val;
}

u64 lnl_latency_data(struct perf_event *event, u64 status)
{
        struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);

        if (pmu->pmu_type == hybrid_small)
                return cmt_latency_data(event, status);

        return lnc_latency_data(event, status);
}

u64 arl_h_latency_data(struct perf_event *event, u64 status)
{
        struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);

        if (pmu->pmu_type == hybrid_tiny)
                return cmt_latency_data(event, status);

        return lnl_latency_data(event, status);
}

u64 pnc_latency_data(struct perf_event *event, u64 status)
{
        union intel_x86_pebs_dse dse;
        union perf_mem_data_src src;
        u64 val;

        dse.val = status;

        if (!dse.pnc_l2_miss)
                val = pnc_pebs_l2_hit_data_source[dse.pnc_dse & 0xf];
        else
                val = parse_omr_data_source(dse.pnc_dse);

        if (!val)
                val = P(OP, LOAD) | LEVEL(NA) | P(SNOOP, NA);

        if (dse.pnc_stlb_miss)
                val |= P(TLB, MISS) | P(TLB, L2);
        else
                val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);

        if (dse.pnc_locked)
                val |= P(LOCK, LOCKED);

        if (dse.pnc_data_blk)
                val |= P(BLK, DATA);
        if (dse.pnc_addr_blk)
                val |= P(BLK, ADDR);
        if (!dse.pnc_data_blk && !dse.pnc_addr_blk)
                val |= P(BLK, NA);

        src.val = val;
        if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
                src.mem_op = P(OP, STORE);

        return src.val;
}

u64 nvl_latency_data(struct perf_event *event, u64 status)
{
        struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);

        if (pmu->pmu_type == hybrid_small)
                return arw_latency_data(event, status);

        return pnc_latency_data(event, status);
}

static u64 load_latency_data(struct perf_event *event, u64 status)
{
        union intel_x86_pebs_dse dse;
        u64 val;

        dse.val = status;

        /*
         * use the mapping table for bit 0-3
         */
        val = hybrid_var(event->pmu, pebs_data_source)[dse.ld_dse];

        /*
         * Nehalem models do not support TLB, Lock infos
         */
        if (x86_pmu.pebs_no_tlb) {
                val |= P(TLB, NA) | P(LOCK, NA);
                return val;
        }

        pebs_set_tlb_lock(&val, dse.ld_stlb_miss, dse.ld_locked);

        /*
         * Ice Lake and earlier models do not support block infos.
         */
        if (!x86_pmu.pebs_block) {
                val |= P(BLK, NA);
                return val;
        }
        /*
         * bit 6: load was blocked since its data could not be forwarded
         *        from a preceding store
         */
        if (dse.ld_data_blk)
                val |= P(BLK, DATA);

        /*
         * bit 7: load was blocked due to potential address conflict with
         *        a preceding store
         */
        if (dse.ld_addr_blk)
                val |= P(BLK, ADDR);

        if (!dse.ld_data_blk && !dse.ld_addr_blk)
                val |= P(BLK, NA);

        return val;
}

static u64 store_latency_data(struct perf_event *event, u64 status)
{
        union intel_x86_pebs_dse dse;
        union perf_mem_data_src src;
        u64 val;

        dse.val = status;

        /*
         * use the mapping table for bit 0-3
         */
        val = hybrid_var(event->pmu, pebs_data_source)[dse.st_lat_dse];

        pebs_set_tlb_lock(&val, dse.st_lat_stlb_miss, dse.st_lat_locked);

        val |= P(BLK, NA);

        /*
         * the pebs_data_source table is only for loads
         * so override the mem_op to say STORE instead
         */
        src.val = val;
        src.mem_op = P(OP,STORE);

        return src.val;
}

struct pebs_record_core {
        u64 flags, ip;
        u64 ax, bx, cx, dx;
        u64 si, di, bp, sp;
        u64 r8,  r9,  r10, r11;
        u64 r12, r13, r14, r15;
};

struct pebs_record_nhm {
        u64 flags, ip;
        u64 ax, bx, cx, dx;
        u64 si, di, bp, sp;
        u64 r8,  r9,  r10, r11;
        u64 r12, r13, r14, r15;
        u64 status, dla, dse, lat;
};

/*
 * Same as pebs_record_nhm, with two additional fields.
 */
struct pebs_record_hsw {
        u64 flags, ip;
        u64 ax, bx, cx, dx;
        u64 si, di, bp, sp;
        u64 r8,  r9,  r10, r11;
        u64 r12, r13, r14, r15;
        u64 status, dla, dse, lat;
        u64 real_ip, tsx_tuning;
};

union hsw_tsx_tuning {
        struct {
                u32 cycles_last_block     : 32,
                    hle_abort             : 1,
                    rtm_abort             : 1,
                    instruction_abort     : 1,
                    non_instruction_abort : 1,
                    retry                 : 1,
                    data_conflict         : 1,
                    capacity_writes       : 1,
                    capacity_reads        : 1;
        };
        u64         value;
};

#define PEBS_HSW_TSX_FLAGS      0xff00000000ULL

/* Same as HSW, plus TSC */

struct pebs_record_skl {
        u64 flags, ip;
        u64 ax, bx, cx, dx;
        u64 si, di, bp, sp;
        u64 r8,  r9,  r10, r11;
        u64 r12, r13, r14, r15;
        u64 status, dla, dse, lat;
        u64 real_ip, tsx_tuning;
        u64 tsc;
};

void init_debug_store_on_cpu(int cpu)
{
        struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;

        if (!ds)
                return;

        wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
                     (u32)((u64)(unsigned long)ds),
                     (u32)((u64)(unsigned long)ds >> 32));
}

void fini_debug_store_on_cpu(int cpu)
{
        if (!per_cpu(cpu_hw_events, cpu).ds)
                return;

        wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
}

static DEFINE_PER_CPU(void *, insn_buffer);

static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
{
        unsigned long start = (unsigned long)cea;
        phys_addr_t pa;
        size_t msz = 0;

        pa = virt_to_phys(addr);

        preempt_disable();
        for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
                cea_set_pte(cea, pa, prot);

        /*
         * This is a cross-CPU update of the cpu_entry_area, we must shoot down
         * all TLB entries for it.
         */
        flush_tlb_kernel_range(start, start + size);
        preempt_enable();
}

static void ds_clear_cea(void *cea, size_t size)
{
        unsigned long start = (unsigned long)cea;
        size_t msz = 0;

        preempt_disable();
        for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
                cea_set_pte(cea, 0, PAGE_NONE);

        flush_tlb_kernel_range(start, start + size);
        preempt_enable();
}

static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
{
        unsigned int order = get_order(size);
        int node = cpu_to_node(cpu);
        struct page *page;

        page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
        return page ? page_address(page) : NULL;
}

static void dsfree_pages(const void *buffer, size_t size)
{
        if (buffer)
                free_pages((unsigned long)buffer, get_order(size));
}

static int alloc_pebs_buffer(int cpu)
{
        struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
        struct debug_store *ds = hwev->ds;
        size_t bsiz = x86_pmu.pebs_buffer_size;
        int max, node = cpu_to_node(cpu);
        void *buffer, *insn_buff, *cea;

        if (!intel_pmu_has_pebs())
                return 0;

        buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
        if (unlikely(!buffer))
                return -ENOMEM;

        if (x86_pmu.arch_pebs) {
                hwev->pebs_vaddr = buffer;
                return 0;
        }

        /*
         * HSW+ already provides us the eventing ip; no need to allocate this
         * buffer then.
         */
        if (x86_pmu.intel_cap.pebs_format < 2) {
                insn_buff = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
                if (!insn_buff) {
                        dsfree_pages(buffer, bsiz);
                        return -ENOMEM;
                }
                per_cpu(insn_buffer, cpu) = insn_buff;
        }
        hwev->pebs_vaddr = buffer;
        /* Update the cpu entry area mapping */
        cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
        ds->pebs_buffer_base = (unsigned long) cea;
        ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL);
        ds->pebs_index = ds->pebs_buffer_base;
        max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size);
        ds->pebs_absolute_maximum = ds->pebs_buffer_base + max;
        return 0;
}

static void release_pebs_buffer(int cpu)
{
        struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
        void *cea;

        if (!intel_pmu_has_pebs())
                return;

        if (x86_pmu.ds_pebs) {
                kfree(per_cpu(insn_buffer, cpu));
                per_cpu(insn_buffer, cpu) = NULL;

                /* Clear the fixmap */
                cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
                ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
        }

        dsfree_pages(hwev->pebs_vaddr, x86_pmu.pebs_buffer_size);
        hwev->pebs_vaddr = NULL;
}

static int alloc_bts_buffer(int cpu)
{
        struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
        struct debug_store *ds = hwev->ds;
        void *buffer, *cea;
        int max;

        if (!x86_pmu.bts)
                return 0;

        buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu);
        if (unlikely(!buffer)) {
                WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
                return -ENOMEM;
        }
        hwev->ds_bts_vaddr = buffer;
        /* Update the fixmap */
        cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
        ds->bts_buffer_base = (unsigned long) cea;
        ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL);
        ds->bts_index = ds->bts_buffer_base;
        max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
        ds->bts_absolute_maximum = ds->bts_buffer_base +
                                        max * BTS_RECORD_SIZE;
        ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
                                        (max / 16) * BTS_RECORD_SIZE;
        return 0;
}

static void release_bts_buffer(int cpu)
{
        struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
        void *cea;

        if (!x86_pmu.bts)
                return;

        /* Clear the fixmap */
        cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
        ds_clear_cea(cea, BTS_BUFFER_SIZE);
        dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
        hwev->ds_bts_vaddr = NULL;
}

static int alloc_ds_buffer(int cpu)
{
        struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store;

        memset(ds, 0, sizeof(*ds));
        per_cpu(cpu_hw_events, cpu).ds = ds;
        return 0;
}

static void release_ds_buffer(int cpu)
{
        per_cpu(cpu_hw_events, cpu).ds = NULL;
}

void release_ds_buffers(void)
{
        int cpu;

        if (!x86_pmu.bts && !x86_pmu.ds_pebs)
                return;

        for_each_possible_cpu(cpu)
                release_ds_buffer(cpu);

        for_each_possible_cpu(cpu) {
                /*
                 * Again, ignore errors from offline CPUs, they will no longer
                 * observe cpu_hw_events.ds and not program the DS_AREA when
                 * they come up.
                 */
                fini_debug_store_on_cpu(cpu);
        }

        for_each_possible_cpu(cpu) {
                if (x86_pmu.ds_pebs)
                        release_pebs_buffer(cpu);
                release_bts_buffer(cpu);
        }
}

void reserve_ds_buffers(void)
{
        int bts_err = 0, pebs_err = 0;
        int cpu;

        x86_pmu.bts_active = 0;

        if (x86_pmu.ds_pebs)
                x86_pmu.pebs_active = 0;

        if (!x86_pmu.bts && !x86_pmu.ds_pebs)
                return;

        if (!x86_pmu.bts)
                bts_err = 1;

        if (!x86_pmu.ds_pebs)
                pebs_err = 1;

        for_each_possible_cpu(cpu) {
                if (alloc_ds_buffer(cpu)) {
                        bts_err = 1;
                        pebs_err = 1;
                }

                if (!bts_err && alloc_bts_buffer(cpu))
                        bts_err = 1;

                if (x86_pmu.ds_pebs && !pebs_err &&
                    alloc_pebs_buffer(cpu))
                        pebs_err = 1;

                if (bts_err && pebs_err)
                        break;
        }

        if (bts_err) {
                for_each_possible_cpu(cpu)
                        release_bts_buffer(cpu);
        }

        if (x86_pmu.ds_pebs && pebs_err) {
                for_each_possible_cpu(cpu)
                        release_pebs_buffer(cpu);
        }

        if (bts_err && pebs_err) {
                for_each_possible_cpu(cpu)
                        release_ds_buffer(cpu);
        } else {
                if (x86_pmu.bts && !bts_err)
                        x86_pmu.bts_active = 1;

                if (x86_pmu.ds_pebs && !pebs_err)
                        x86_pmu.pebs_active = 1;

                for_each_possible_cpu(cpu) {
                        /*
                         * Ignores wrmsr_on_cpu() errors for offline CPUs they
                         * will get this call through intel_pmu_cpu_starting().
                         */
                        init_debug_store_on_cpu(cpu);
                }
        }
}

inline int alloc_arch_pebs_buf_on_cpu(int cpu)
{
        if (!x86_pmu.arch_pebs)
                return 0;

        return alloc_pebs_buffer(cpu);
}

inline void release_arch_pebs_buf_on_cpu(int cpu)
{
        if (!x86_pmu.arch_pebs)
                return;

        release_pebs_buffer(cpu);
}

void init_arch_pebs_on_cpu(int cpu)
{
        struct cpu_hw_events *cpuc = per_cpu_ptr(&cpu_hw_events, cpu);
        u64 arch_pebs_base;

        if (!x86_pmu.arch_pebs)
                return;

        if (!cpuc->pebs_vaddr) {
                WARN(1, "Fail to allocate PEBS buffer on CPU %d\n", cpu);
                x86_pmu.pebs_active = 0;
                return;
        }

        /*
         * 4KB-aligned pointer of the output buffer
         * (__alloc_pages_node() return page aligned address)
         * Buffer Size = 4KB * 2^SIZE
         * contiguous physical buffer (__alloc_pages_node() with order)
         */
        arch_pebs_base = virt_to_phys(cpuc->pebs_vaddr) | PEBS_BUFFER_SHIFT;
        wrmsr_on_cpu(cpu, MSR_IA32_PEBS_BASE, (u32)arch_pebs_base,
                     (u32)(arch_pebs_base >> 32));
        x86_pmu.pebs_active = 1;
}

inline void fini_arch_pebs_on_cpu(int cpu)
{
        if (!x86_pmu.arch_pebs)
                return;

        wrmsr_on_cpu(cpu, MSR_IA32_PEBS_BASE, 0, 0);
}

/*
 * BTS
 */

struct event_constraint bts_constraint =
        EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0);

void intel_pmu_enable_bts(u64 config)
{
        unsigned long debugctlmsr;

        debugctlmsr = get_debugctlmsr();

        debugctlmsr |= DEBUGCTLMSR_TR;
        debugctlmsr |= DEBUGCTLMSR_BTS;
        if (config & ARCH_PERFMON_EVENTSEL_INT)
                debugctlmsr |= DEBUGCTLMSR_BTINT;

        if (!(config & ARCH_PERFMON_EVENTSEL_OS))
                debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS;

        if (!(config & ARCH_PERFMON_EVENTSEL_USR))
                debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR;

        update_debugctlmsr(debugctlmsr);
}

void intel_pmu_disable_bts(void)
{
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        unsigned long debugctlmsr;

        if (!cpuc->ds)
                return;

        debugctlmsr = get_debugctlmsr();

        debugctlmsr &=
                ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT |
                  DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR);

        update_debugctlmsr(debugctlmsr);
}

int intel_pmu_drain_bts_buffer(void)
{
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        struct debug_store *ds = cpuc->ds;
        struct bts_record {
                u64     from;
                u64     to;
                u64     flags;
        };
        struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
        struct bts_record *at, *base, *top;
        struct perf_output_handle handle;
        struct perf_event_header header;
        struct perf_sample_data data;
        unsigned long skip = 0;
        struct pt_regs regs;

        if (!event)
                return 0;

        if (!x86_pmu.bts_active)
                return 0;

        base = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
        top  = (struct bts_record *)(unsigned long)ds->bts_index;

        if (top <= base)
                return 0;

        memset(&regs, 0, sizeof(regs));

        ds->bts_index = ds->bts_buffer_base;

        perf_sample_data_init(&data, 0, event->hw.last_period);

        /*
         * BTS leaks kernel addresses in branches across the cpl boundary,
         * such as traps or system calls, so unless the user is asking for
         * kernel tracing (and right now it's not possible), we'd need to
         * filter them out. But first we need to count how many of those we
         * have in the current batch. This is an extra O(n) pass, however,
         * it's much faster than the other one especially considering that
         * n <= 2560 (BTS_BUFFER_SIZE / BTS_RECORD_SIZE * 15/16; see the
         * alloc_bts_buffer()).
         */
        for (at = base; at < top; at++) {
                /*
                 * Note that right now *this* BTS code only works if
                 * attr::exclude_kernel is set, but let's keep this extra
                 * check here in case that changes.
                 */
                if (event->attr.exclude_kernel &&
                    (kernel_ip(at->from) || kernel_ip(at->to)))
                        skip++;
        }

        /*
         * Prepare a generic sample, i.e. fill in the invariant fields.
         * We will overwrite the from and to address before we output
         * the sample.
         */
        rcu_read_lock();
        perf_prepare_sample(&data, event, &regs);
        perf_prepare_header(&header, &data, event, &regs);

        if (perf_output_begin(&handle, &data, event,
                              header.size * (top - base - skip)))
                goto unlock;

        for (at = base; at < top; at++) {
                /* Filter out any records that contain kernel addresses. */
                if (event->attr.exclude_kernel &&
                    (kernel_ip(at->from) || kernel_ip(at->to)))
                        continue;

                data.ip         = at->from;
                data.addr       = at->to;

                perf_output_sample(&handle, &header, &data, event);
        }

        perf_output_end(&handle);

        /* There's new data available. */
        event->hw.interrupts++;
        event->pending_kill = POLL_IN;
unlock:
        rcu_read_unlock();
        return 1;
}

void intel_pmu_drain_pebs_buffer(void)
{
        struct perf_sample_data data;

        static_call(x86_pmu_drain_pebs)(NULL, &data);
}

/*
 * PEBS
 */
struct event_constraint intel_core2_pebs_event_constraints[] = {
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
        INTEL_FLAGS_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
        INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
        /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01),
        EVENT_CONSTRAINT_END
};

struct event_constraint intel_atom_pebs_event_constraints[] = {
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
        INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
        /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01),
        /* Allow all events as PEBS with no flags */
        INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
        EVENT_CONSTRAINT_END
};

struct event_constraint intel_slm_pebs_event_constraints[] = {
        /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x1),
        /* Allow all events as PEBS with no flags */
        INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
        EVENT_CONSTRAINT_END
};

struct event_constraint intel_glm_pebs_event_constraints[] = {
        /* Allow all events as PEBS with no flags */
        INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
        EVENT_CONSTRAINT_END
};

struct event_constraint intel_grt_pebs_event_constraints[] = {
        /* Allow all events as PEBS with no flags */
        INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0x3),
        INTEL_HYBRID_LAT_CONSTRAINT(0x6d0, 0xf),
        EVENT_CONSTRAINT_END
};

struct event_constraint intel_arw_pebs_event_constraints[] = {
        /* Allow all events as PEBS with no flags */
        INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0xff),
        INTEL_HYBRID_LAT_CONSTRAINT(0x6d0, 0xff),
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x01d4, 0x1),
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x02d4, 0x2),
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x04d4, 0x4),
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x08d4, 0x8),
        EVENT_CONSTRAINT_END
};

struct event_constraint intel_nehalem_pebs_event_constraints[] = {
        INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
        INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
        INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf),    /* INST_RETIRED.ANY */
        INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
        INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
        INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
        INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
        INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
        /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
        EVENT_CONSTRAINT_END
};

struct event_constraint intel_westmere_pebs_event_constraints[] = {
        INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
        INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
        INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf),    /* INSTR_RETIRED.* */
        INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
        INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
        INTEL_FLAGS_EVENT_CONSTRAINT(0xc5, 0xf),    /* BR_MISP_RETIRED.* */
        INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
        INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
        INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
        /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
        EVENT_CONSTRAINT_END
};

struct event_constraint intel_snb_pebs_event_constraints[] = {
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
        INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
        INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
        /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
        INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
        INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
        INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
        INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
        /* Allow all events as PEBS with no flags */
        INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
        EVENT_CONSTRAINT_END
};

struct event_constraint intel_ivb_pebs_event_constraints[] = {
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
        INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
        INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
        /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
        /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
        INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
        INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
        INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
        INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
        /* Allow all events as PEBS with no flags */
        INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
        EVENT_CONSTRAINT_END
};

struct event_constraint intel_hsw_pebs_event_constraints[] = {
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
        INTEL_PLD_CONSTRAINT(0x01cd, 0xf),    /* MEM_TRANS_RETIRED.* */
        /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
        /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd2, 0xf),    /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd3, 0xf),    /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
        /* Allow all events as PEBS with no flags */
        INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
        EVENT_CONSTRAINT_END
};

struct event_constraint intel_bdw_pebs_event_constraints[] = {
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
        INTEL_PLD_CONSTRAINT(0x01cd, 0xf),    /* MEM_TRANS_RETIRED.* */
        /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
        /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf),    /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf),    /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
        /* Allow all events as PEBS with no flags */
        INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
        EVENT_CONSTRAINT_END
};


struct event_constraint intel_skl_pebs_event_constraints[] = {
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2),      /* INST_RETIRED.PREC_DIST */
        /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
        /* INST_RETIRED.TOTAL_CYCLES_PS (inv=1, cmask=16) (cycles:p). */
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
        INTEL_PLD_CONSTRAINT(0x1cd, 0xf),                     /* MEM_TRANS_RETIRED.* */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x22d0, 0xf), /* MEM_INST_RETIRED.LOCK_STORES */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf),    /* MEM_LOAD_RETIRED.* */
        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf),    /* MEM_LOAD_L3_HIT_RETIRED.* */
        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf),    /* MEM_LOAD_L3_MISS_RETIRED.* */
        /* Allow all events as PEBS with no flags */
        INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
        EVENT_CONSTRAINT_END
};

struct event_constraint intel_icl_pebs_event_constraints[] = {
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x100000000ULL),  /* old INST_RETIRED.PREC_DIST */
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x0100, 0x100000000ULL),  /* INST_RETIRED.PREC_DIST */
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),  /* SLOTS */

        INTEL_PLD_CONSTRAINT(0x1cd, 0xff),                      /* MEM_TRANS_RETIRED.LOAD_LATENCY */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf),   /* MEM_INST_RETIRED.STLB_MISS_LOADS */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf),   /* MEM_INST_RETIRED.STLB_MISS_STORES */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf),   /* MEM_INST_RETIRED.LOCK_LOADS */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf),   /* MEM_INST_RETIRED.SPLIT_LOADS */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf),   /* MEM_INST_RETIRED.SPLIT_STORES */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf),   /* MEM_INST_RETIRED.ALL_LOADS */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf),   /* MEM_INST_RETIRED.ALL_STORES */

        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), /* MEM_LOAD_*_RETIRED.* */

        INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),                /* MEM_INST_RETIRED.* */

        /*
         * Everything else is handled by PMU_FL_PEBS_ALL, because we
         * need the full constraints from the main table.
         */

        EVENT_CONSTRAINT_END
};

struct event_constraint intel_glc_pebs_event_constraints[] = {
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL),   /* INST_RETIRED.PREC_DIST */
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),

        INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xfe),
        INTEL_PLD_CONSTRAINT(0x1cd, 0xfe),
        INTEL_PSD_CONSTRAINT(0x2cd, 0x1),
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf),   /* MEM_INST_RETIRED.STLB_MISS_LOADS */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf),   /* MEM_INST_RETIRED.STLB_MISS_STORES */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf),   /* MEM_INST_RETIRED.LOCK_LOADS */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf),   /* MEM_INST_RETIRED.SPLIT_LOADS */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf),   /* MEM_INST_RETIRED.SPLIT_STORES */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf),   /* MEM_INST_RETIRED.ALL_LOADS */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf),   /* MEM_INST_RETIRED.ALL_STORES */

        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf),

        INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),

        /*
         * Everything else is handled by PMU_FL_PEBS_ALL, because we
         * need the full constraints from the main table.
         */

        EVENT_CONSTRAINT_END
};

struct event_constraint intel_lnc_pebs_event_constraints[] = {
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL),   /* INST_RETIRED.PREC_DIST */
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),

        INTEL_HYBRID_LDLAT_CONSTRAINT(0x1cd, 0x3fc),
        INTEL_HYBRID_STLAT_CONSTRAINT(0x2cd, 0x3),
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf),   /* MEM_INST_RETIRED.STLB_MISS_LOADS */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf),   /* MEM_INST_RETIRED.STLB_MISS_STORES */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf),   /* MEM_INST_RETIRED.LOCK_LOADS */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf),   /* MEM_INST_RETIRED.SPLIT_LOADS */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf),   /* MEM_INST_RETIRED.SPLIT_STORES */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf),   /* MEM_INST_RETIRED.ALL_LOADS */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf),   /* MEM_INST_RETIRED.ALL_STORES */

        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf),

        INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),

        /*
         * Everything else is handled by PMU_FL_PEBS_ALL, because we
         * need the full constraints from the main table.
         */

        EVENT_CONSTRAINT_END
};

struct event_constraint intel_pnc_pebs_event_constraints[] = {
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL),   /* INST_RETIRED.PREC_DIST */
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),

        INTEL_HYBRID_LDLAT_CONSTRAINT(0x1cd, 0xfc),
        INTEL_HYBRID_STLAT_CONSTRAINT(0x2cd, 0x3),
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf),   /* MEM_INST_RETIRED.STLB_MISS_LOADS */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf),   /* MEM_INST_RETIRED.STLB_MISS_STORES */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf),   /* MEM_INST_RETIRED.LOCK_LOADS */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf),   /* MEM_INST_RETIRED.SPLIT_LOADS */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf),   /* MEM_INST_RETIRED.SPLIT_STORES */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf),   /* MEM_INST_RETIRED.ALL_LOADS */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf),   /* MEM_INST_RETIRED.ALL_STORES */

        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf),

        INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),
        INTEL_FLAGS_EVENT_CONSTRAINT(0xd6, 0xf),

        /*
         * Everything else is handled by PMU_FL_PEBS_ALL, because we
         * need the full constraints from the main table.
         */

        EVENT_CONSTRAINT_END
};

struct event_constraint *intel_pebs_constraints(struct perf_event *event)
{
        struct event_constraint *pebs_constraints = hybrid(event->pmu, pebs_constraints);
        struct event_constraint *c;

        if (!event->attr.precise_ip)
                return NULL;

        if (pebs_constraints) {
                for_each_event_constraint(c, pebs_constraints) {
                        if (constraint_match(c, event->hw.config)) {
                                event->hw.flags |= c->flags;
                                return c;
                        }
                }
        }

        /*
         * Extended PEBS support
         * Makes the PEBS code search the normal constraints.
         */
        if (x86_pmu.flags & PMU_FL_PEBS_ALL)
                return NULL;

        return &emptyconstraint;
}

/*
 * We need the sched_task callback even for per-cpu events when we use
 * the large interrupt threshold, such that we can provide PID and TID
 * to PEBS samples.
 */
static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
{
        if (cpuc->n_pebs == cpuc->n_pebs_via_pt)
                return false;

        return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
}

void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
{
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);

        if (!sched_in && pebs_needs_sched_cb(cpuc))
                intel_pmu_drain_pebs_buffer();
}

static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
{
        struct debug_store *ds = cpuc->ds;
        int max_pebs_events = intel_pmu_max_num_pebs(cpuc->pmu);
        u64 threshold;
        int reserved;

        if (cpuc->n_pebs_via_pt)
                return;

        if (x86_pmu.flags & PMU_FL_PEBS_ALL)
                reserved = max_pebs_events + x86_pmu_max_num_counters_fixed(cpuc->pmu);
        else
                reserved = max_pebs_events;

        if (cpuc->n_pebs == cpuc->n_large_pebs) {
                threshold = ds->pebs_absolute_maximum -
                        reserved * cpuc->pebs_record_size;
        } else {
                threshold = ds->pebs_buffer_base + cpuc->pebs_record_size;
        }

        ds->pebs_interrupt_threshold = threshold;
}

#define PEBS_DATACFG_CNTRS(x)                                           \
        ((x >> PEBS_DATACFG_CNTR_SHIFT) & PEBS_DATACFG_CNTR_MASK)

#define PEBS_DATACFG_CNTR_BIT(x)                                        \
        (((1ULL << x) & PEBS_DATACFG_CNTR_MASK) << PEBS_DATACFG_CNTR_SHIFT)

#define PEBS_DATACFG_FIX(x)                                             \
        ((x >> PEBS_DATACFG_FIX_SHIFT) & PEBS_DATACFG_FIX_MASK)

#define PEBS_DATACFG_FIX_BIT(x)                                         \
        (((1ULL << (x)) & PEBS_DATACFG_FIX_MASK)                        \
         << PEBS_DATACFG_FIX_SHIFT)

static void adaptive_pebs_record_size_update(void)
{
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        u64 pebs_data_cfg = cpuc->pebs_data_cfg;
        int sz = sizeof(struct pebs_basic);

        if (pebs_data_cfg & PEBS_DATACFG_MEMINFO)
                sz += sizeof(struct pebs_meminfo);
        if (pebs_data_cfg & PEBS_DATACFG_GP)
                sz += sizeof(struct pebs_gprs);
        if (pebs_data_cfg & PEBS_DATACFG_XMMS)
                sz += sizeof(struct pebs_xmm);
        if (pebs_data_cfg & PEBS_DATACFG_LBRS)
                sz += x86_pmu.lbr_nr * sizeof(struct lbr_entry);
        if (pebs_data_cfg & (PEBS_DATACFG_METRICS | PEBS_DATACFG_CNTR)) {
                sz += sizeof(struct pebs_cntr_header);

                /* Metrics base and Metrics Data */
                if (pebs_data_cfg & PEBS_DATACFG_METRICS)
                        sz += 2 * sizeof(u64);

                if (pebs_data_cfg & PEBS_DATACFG_CNTR) {
                        sz += (hweight64(PEBS_DATACFG_CNTRS(pebs_data_cfg)) +
                               hweight64(PEBS_DATACFG_FIX(pebs_data_cfg))) *
                              sizeof(u64);
                }
        }

        cpuc->pebs_record_size = sz;
}

static void __intel_pmu_pebs_update_cfg(struct perf_event *event,
                                        int idx, u64 *pebs_data_cfg)
{
        if (is_metric_event(event)) {
                *pebs_data_cfg |= PEBS_DATACFG_METRICS;
                return;
        }

        *pebs_data_cfg |= PEBS_DATACFG_CNTR;

        if (idx >= INTEL_PMC_IDX_FIXED)
                *pebs_data_cfg |= PEBS_DATACFG_FIX_BIT(idx - INTEL_PMC_IDX_FIXED);
        else
                *pebs_data_cfg |= PEBS_DATACFG_CNTR_BIT(idx);
}


void intel_pmu_pebs_late_setup(struct cpu_hw_events *cpuc)
{
        struct perf_event *event;
        u64 pebs_data_cfg = 0;
        int i;

        for (i = 0; i < cpuc->n_events; i++) {
                event = cpuc->event_list[i];
                if (!is_pebs_counter_event_group(event))
                        continue;
                __intel_pmu_pebs_update_cfg(event, cpuc->assign[i], &pebs_data_cfg);
        }

        if (pebs_data_cfg & ~cpuc->pebs_data_cfg)
                cpuc->pebs_data_cfg |= pebs_data_cfg | PEBS_UPDATE_DS_SW;
}

#define PERF_PEBS_MEMINFO_TYPE  (PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC |   \
                                PERF_SAMPLE_PHYS_ADDR |                      \
                                PERF_SAMPLE_WEIGHT_TYPE |                    \
                                PERF_SAMPLE_TRANSACTION |                    \
                                PERF_SAMPLE_DATA_PAGE_SIZE)

static u64 pebs_update_adaptive_cfg(struct perf_event *event)
{
        struct perf_event_attr *attr = &event->attr;
        u64 sample_type = attr->sample_type;
        u64 pebs_data_cfg = 0;
        bool gprs, tsx_weight;

        if (!(sample_type & ~(PERF_SAMPLE_IP|PERF_SAMPLE_TIME)) &&
            attr->precise_ip > 1)
                return pebs_data_cfg;

        if (sample_type & PERF_PEBS_MEMINFO_TYPE)
                pebs_data_cfg |= PEBS_DATACFG_MEMINFO;

        /*
         * We need GPRs when:
         * + user requested them
         * + precise_ip < 2 for the non event IP
         * + For RTM TSX weight we need GPRs for the abort code.
         */
        gprs = ((sample_type & PERF_SAMPLE_REGS_INTR) &&
                (attr->sample_regs_intr & PEBS_GP_REGS)) ||
               ((sample_type & PERF_SAMPLE_REGS_USER) &&
                (attr->sample_regs_user & PEBS_GP_REGS));

        tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT_TYPE) &&
                     ((attr->config & INTEL_ARCH_EVENT_MASK) ==
                      x86_pmu.rtm_abort_event);

        if (gprs || (attr->precise_ip < 2) || tsx_weight)
                pebs_data_cfg |= PEBS_DATACFG_GP;

        if ((sample_type & PERF_SAMPLE_REGS_INTR) &&
            (attr->sample_regs_intr & PERF_REG_EXTENDED_MASK))
                pebs_data_cfg |= PEBS_DATACFG_XMMS;

        if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
                /*
                 * For now always log all LBRs. Could configure this
                 * later.
                 */
                pebs_data_cfg |= PEBS_DATACFG_LBRS |
                        ((x86_pmu.lbr_nr-1) << PEBS_DATACFG_LBR_SHIFT);
        }

        return pebs_data_cfg;
}

static void
pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
                  struct perf_event *event, bool add)
{
        struct pmu *pmu = event->pmu;

        /*
         * Make sure we get updated with the first PEBS event.
         * During removal, ->pebs_data_cfg is still valid for
         * the last PEBS event. Don't clear it.
         */
        if ((cpuc->n_pebs == 1) && add)
                cpuc->pebs_data_cfg = PEBS_UPDATE_DS_SW;

        if (needed_cb != pebs_needs_sched_cb(cpuc)) {
                if (!needed_cb)
                        perf_sched_cb_inc(pmu);
                else
                        perf_sched_cb_dec(pmu);

                cpuc->pebs_data_cfg |= PEBS_UPDATE_DS_SW;
        }

        /*
         * The PEBS record doesn't shrink on pmu::del(). Doing so would require
         * iterating all remaining PEBS events to reconstruct the config.
         */
        if (x86_pmu.intel_cap.pebs_baseline && add) {
                u64 pebs_data_cfg;

                pebs_data_cfg = pebs_update_adaptive_cfg(event);
                /*
                 * Be sure to update the thresholds when we change the record.
                 */
                if (pebs_data_cfg & ~cpuc->pebs_data_cfg)
                        cpuc->pebs_data_cfg |= pebs_data_cfg | PEBS_UPDATE_DS_SW;
        }
}

u64 intel_get_arch_pebs_data_config(struct perf_event *event)
{
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        u64 pebs_data_cfg = 0;
        u64 cntr_mask;

        if (WARN_ON(event->hw.idx < 0 || event->hw.idx >= X86_PMC_IDX_MAX))
                return 0;

        pebs_data_cfg |= pebs_update_adaptive_cfg(event);

        cntr_mask = (PEBS_DATACFG_CNTR_MASK << PEBS_DATACFG_CNTR_SHIFT) |
                    (PEBS_DATACFG_FIX_MASK << PEBS_DATACFG_FIX_SHIFT) |
                    PEBS_DATACFG_CNTR | PEBS_DATACFG_METRICS;
        pebs_data_cfg |= cpuc->pebs_data_cfg & cntr_mask;

        return pebs_data_cfg;
}

void intel_pmu_pebs_add(struct perf_event *event)
{
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        struct hw_perf_event *hwc = &event->hw;
        bool needed_cb = pebs_needs_sched_cb(cpuc);

        cpuc->n_pebs++;
        if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
                cpuc->n_large_pebs++;
        if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
                cpuc->n_pebs_via_pt++;

        pebs_update_state(needed_cb, cpuc, event, true);
}

static void intel_pmu_pebs_via_pt_disable(struct perf_event *event)
{
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);

        if (!is_pebs_pt(event))
                return;

        if (!(cpuc->pebs_enabled & ~PEBS_VIA_PT_MASK))
                cpuc->pebs_enabled &= ~PEBS_VIA_PT_MASK;
}

static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)
{
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        struct hw_perf_event *hwc = &event->hw;
        struct debug_store *ds = cpuc->ds;
        u64 value = ds->pebs_event_reset[hwc->idx];
        u32 base = MSR_RELOAD_PMC0;
        unsigned int idx = hwc->idx;

        if (!is_pebs_pt(event))
                return;

        if (!(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS))
                cpuc->pebs_enabled |= PEBS_PMI_AFTER_EACH_RECORD;

        cpuc->pebs_enabled |= PEBS_OUTPUT_PT;

        if (hwc->idx >= INTEL_PMC_IDX_FIXED) {
                base = MSR_RELOAD_FIXED_CTR0;
                idx = hwc->idx - INTEL_PMC_IDX_FIXED;
                if (x86_pmu.intel_cap.pebs_format < 5)
                        value = ds->pebs_event_reset[MAX_PEBS_EVENTS_FMT4 + idx];
                else
                        value = ds->pebs_event_reset[MAX_PEBS_EVENTS + idx];
        }
        wrmsrq(base + idx, value);
}

static inline void intel_pmu_drain_large_pebs(struct cpu_hw_events *cpuc)
{
        if (cpuc->n_pebs == cpuc->n_large_pebs &&
            cpuc->n_pebs != cpuc->n_pebs_via_pt)
                intel_pmu_drain_pebs_buffer();
}

static void __intel_pmu_pebs_enable(struct perf_event *event)
{
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        struct hw_perf_event *hwc = &event->hw;

        hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
        cpuc->pebs_enabled |= 1ULL << hwc->idx;
}

void intel_pmu_pebs_enable(struct perf_event *event)
{
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        u64 pebs_data_cfg = cpuc->pebs_data_cfg & ~PEBS_UPDATE_DS_SW;
        struct hw_perf_event *hwc = &event->hw;
        struct debug_store *ds = cpuc->ds;
        unsigned int idx = hwc->idx;

        __intel_pmu_pebs_enable(event);

        if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && (x86_pmu.version < 5))
                cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
        else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
                cpuc->pebs_enabled |= 1ULL << 63;

        if (x86_pmu.intel_cap.pebs_baseline) {
                hwc->config |= ICL_EVENTSEL_ADAPTIVE;
                if (pebs_data_cfg != cpuc->active_pebs_data_cfg) {
                        /*
                         * drain_pebs() assumes uniform record size;
                         * hence we need to drain when changing said
                         * size.
                         */
                        intel_pmu_drain_pebs_buffer();
                        adaptive_pebs_record_size_update();
                        wrmsrq(MSR_PEBS_DATA_CFG, pebs_data_cfg);
                        cpuc->active_pebs_data_cfg = pebs_data_cfg;
                }
        }
        if (cpuc->pebs_data_cfg & PEBS_UPDATE_DS_SW) {
                cpuc->pebs_data_cfg = pebs_data_cfg;
                pebs_update_threshold(cpuc);
        }

        if (idx >= INTEL_PMC_IDX_FIXED) {
                if (x86_pmu.intel_cap.pebs_format < 5)
                        idx = MAX_PEBS_EVENTS_FMT4 + (idx - INTEL_PMC_IDX_FIXED);
                else
                        idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED);
        }

        /*
         * Use auto-reload if possible to save a MSR write in the PMI.
         * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
         */
        if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
                ds->pebs_event_reset[idx] =
                        (u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
        } else {
                ds->pebs_event_reset[idx] = 0;
        }

        intel_pmu_pebs_via_pt_enable(event);
}

void intel_pmu_pebs_del(struct perf_event *event)
{
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        struct hw_perf_event *hwc = &event->hw;
        bool needed_cb = pebs_needs_sched_cb(cpuc);

        cpuc->n_pebs--;
        if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
                cpuc->n_large_pebs--;
        if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
                cpuc->n_pebs_via_pt--;

        pebs_update_state(needed_cb, cpuc, event, false);
}

static void __intel_pmu_pebs_disable(struct perf_event *event)
{
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        struct hw_perf_event *hwc = &event->hw;

        intel_pmu_drain_large_pebs(cpuc);
        cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
        hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
}

void intel_pmu_pebs_disable(struct perf_event *event)
{
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        struct hw_perf_event *hwc = &event->hw;

        __intel_pmu_pebs_disable(event);

        if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) &&
            (x86_pmu.version < 5))
                cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
        else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
                cpuc->pebs_enabled &= ~(1ULL << 63);

        intel_pmu_pebs_via_pt_disable(event);

        if (cpuc->enabled)
                wrmsrq(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
}

void intel_pmu_pebs_enable_all(void)
{
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);

        if (cpuc->pebs_enabled)
                wrmsrq(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
}

void intel_pmu_pebs_disable_all(void)
{
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);

        if (cpuc->pebs_enabled)
                __intel_pmu_pebs_disable_all();
}

static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
{
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        unsigned long from = cpuc->lbr_entries[0].from;
        unsigned long old_to, to = cpuc->lbr_entries[0].to;
        unsigned long ip = regs->ip;
        int is_64bit = 0;
        void *kaddr;
        int size;

        /*
         * We don't need to fixup if the PEBS assist is fault like
         */
        if (!x86_pmu.intel_cap.pebs_trap)
                return 1;

        /*
         * No LBR entry, no basic block, no rewinding
         */
        if (!cpuc->lbr_stack.nr || !from || !to)
                return 0;

        /*
         * Basic blocks should never cross user/kernel boundaries
         */
        if (kernel_ip(ip) != kernel_ip(to))
                return 0;

        /*
         * unsigned math, either ip is before the start (impossible) or
         * the basic block is larger than 1 page (sanity)
         */
        if ((ip - to) > PEBS_FIXUP_SIZE)
                return 0;

        /*
         * We sampled a branch insn, rewind using the LBR stack
         */
        if (ip == to) {
                set_linear_ip(regs, from);
                return 1;
        }

        size = ip - to;
        if (!kernel_ip(ip)) {
                int bytes;
                u8 *buf = this_cpu_read(insn_buffer);

                /* 'size' must fit our buffer, see above */
                bytes = copy_from_user_nmi(buf, (void __user *)to, size);
                if (bytes != 0)
                        return 0;

                kaddr = buf;
        } else {
                kaddr = (void *)to;
        }

        do {
                struct insn insn;

                old_to = to;

#ifdef CONFIG_X86_64
                is_64bit = kernel_ip(to) || any_64bit_mode(regs);
#endif
                insn_init(&insn, kaddr, size, is_64bit);

                /*
                 * Make sure there was not a problem decoding the instruction.
                 * This is doubly important because we have an infinite loop if
                 * insn.length=0.
                 */
                if (insn_get_length(&insn))
                        break;

                to += insn.length;
                kaddr += insn.length;
                size -= insn.length;
        } while (to < ip);

        if (to == ip) {
                set_linear_ip(regs, old_to);
                return 1;
        }

        /*
         * Even though we decoded the basic block, the instruction stream
         * never matched the given IP, either the TO or the IP got corrupted.
         */
        return 0;
}

static inline u64 intel_get_tsx_weight(u64 tsx_tuning)
{
        if (tsx_tuning) {
                union hsw_tsx_tuning tsx = { .value = tsx_tuning };
                return tsx.cycles_last_block;
        }
        return 0;
}

static inline u64 intel_get_tsx_transaction(u64 tsx_tuning, u64 ax)
{
        u64 txn = (tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;

        /* For RTM XABORTs also log the abort code from AX */
        if ((txn & PERF_TXN_TRANSACTION) && (ax & 1))
                txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
        return txn;
}

static inline u64 get_pebs_status(void *n)
{
        if (x86_pmu.intel_cap.pebs_format < 4)
                return ((struct pebs_record_nhm *)n)->status;
        return ((struct pebs_basic *)n)->applicable_counters;
}

#define PERF_X86_EVENT_PEBS_HSW_PREC \
                (PERF_X86_EVENT_PEBS_ST_HSW | \
                 PERF_X86_EVENT_PEBS_LD_HSW | \
                 PERF_X86_EVENT_PEBS_NA_HSW)

static u64 get_data_src(struct perf_event *event, u64 aux)
{
        u64 val = PERF_MEM_NA;
        int fl = event->hw.flags;
        bool fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);

        if (fl & PERF_X86_EVENT_PEBS_LDLAT)
                val = load_latency_data(event, aux);
        else if (fl & PERF_X86_EVENT_PEBS_STLAT)
                val = store_latency_data(event, aux);
        else if (fl & PERF_X86_EVENT_PEBS_LAT_HYBRID)
                val = x86_pmu.pebs_latency_data(event, aux);
        else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
                val = precise_datala_hsw(event, aux);
        else if (fst)
                val = precise_store_data(aux);
        return val;
}

static void setup_pebs_time(struct perf_event *event,
                            struct perf_sample_data *data,
                            u64 tsc)
{
        /* Converting to a user-defined clock is not supported yet. */
        if (event->attr.use_clockid != 0)
                return;

        /*
         * Doesn't support the conversion when the TSC is unstable.
         * The TSC unstable case is a corner case and very unlikely to
         * happen. If it happens, the TSC in a PEBS record will be
         * dropped and fall back to perf_event_clock().
         */
        if (!using_native_sched_clock() || !sched_clock_stable())
                return;

        data->time = native_sched_clock_from_tsc(tsc) + __sched_clock_offset;
        data->sample_flags |= PERF_SAMPLE_TIME;
}

#define PERF_SAMPLE_ADDR_TYPE   (PERF_SAMPLE_ADDR |             \
                                 PERF_SAMPLE_PHYS_ADDR |        \
                                 PERF_SAMPLE_DATA_PAGE_SIZE)

static void setup_pebs_fixed_sample_data(struct perf_event *event,
                                   struct pt_regs *iregs, void *__pebs,
                                   struct perf_sample_data *data,
                                   struct pt_regs *regs)
{
        /*
         * We cast to the biggest pebs_record but are careful not to
         * unconditionally access the 'extra' entries.
         */
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        struct pebs_record_skl *pebs = __pebs;
        u64 sample_type;
        int fll;

        if (pebs == NULL)
                return;

        sample_type = event->attr.sample_type;
        fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;

        perf_sample_data_init(data, 0, event->hw.last_period);

        /*
         * Use latency for weight (only avail with PEBS-LL)
         */
        if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE)) {
                data->weight.full = pebs->lat;
                data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
        }

        /*
         * data.data_src encodes the data source
         */
        if (sample_type & PERF_SAMPLE_DATA_SRC) {
                data->data_src.val = get_data_src(event, pebs->dse);
                data->sample_flags |= PERF_SAMPLE_DATA_SRC;
        }

        /*
         * We must however always use iregs for the unwinder to stay sane; the
         * record BP,SP,IP can point into thin air when the record is from a
         * previous PMI context or an (I)RET happened between the record and
         * PMI.
         */
        perf_sample_save_callchain(data, event, iregs);

        /*
         * We use the interrupt regs as a base because the PEBS record does not
         * contain a full regs set, specifically it seems to lack segment
         * descriptors, which get used by things like user_mode().
         *
         * In the simple case fix up only the IP for PERF_SAMPLE_IP.
         */
        *regs = *iregs;

        /*
         * Initialize regs_>flags from PEBS,
         * Clear exact bit (which uses x86 EFLAGS Reserved bit 3),
         * i.e., do not rely on it being zero:
         */
        regs->flags = pebs->flags & ~PERF_EFLAGS_EXACT;

        if (sample_type & PERF_SAMPLE_REGS_INTR) {
                regs->ax = pebs->ax;
                regs->bx = pebs->bx;
                regs->cx = pebs->cx;
                regs->dx = pebs->dx;
                regs->si = pebs->si;
                regs->di = pebs->di;

                regs->bp = pebs->bp;
                regs->sp = pebs->sp;

#ifndef CONFIG_X86_32
                regs->r8 = pebs->r8;
                regs->r9 = pebs->r9;
                regs->r10 = pebs->r10;
                regs->r11 = pebs->r11;
                regs->r12 = pebs->r12;
                regs->r13 = pebs->r13;
                regs->r14 = pebs->r14;
                regs->r15 = pebs->r15;
#endif
        }

        if (event->attr.precise_ip > 1) {
                /*
                 * Haswell and later processors have an 'eventing IP'
                 * (real IP) which fixes the off-by-1 skid in hardware.
                 * Use it when precise_ip >= 2 :
                 */
                if (x86_pmu.intel_cap.pebs_format >= 2) {
                        set_linear_ip(regs, pebs->real_ip);
                        regs->flags |= PERF_EFLAGS_EXACT;
                } else {
                        /* Otherwise, use PEBS off-by-1 IP: */
                        set_linear_ip(regs, pebs->ip);

                        /*
                         * With precise_ip >= 2, try to fix up the off-by-1 IP
                         * using the LBR. If successful, the fixup function
                         * corrects regs->ip and calls set_linear_ip() on regs:
                         */
                        if (intel_pmu_pebs_fixup_ip(regs))
                                regs->flags |= PERF_EFLAGS_EXACT;
                }
        } else {
                /*
                 * When precise_ip == 1, return the PEBS off-by-1 IP,
                 * no fixup attempted:
                 */
                set_linear_ip(regs, pebs->ip);
        }


        if ((sample_type & PERF_SAMPLE_ADDR_TYPE) &&
            x86_pmu.intel_cap.pebs_format >= 1) {
                data->addr = pebs->dla;
                data->sample_flags |= PERF_SAMPLE_ADDR;
        }

        if (x86_pmu.intel_cap.pebs_format >= 2) {
                /* Only set the TSX weight when no memory weight. */
                if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll) {
                        data->weight.full = intel_get_tsx_weight(pebs->tsx_tuning);
                        data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
                }
                if (sample_type & PERF_SAMPLE_TRANSACTION) {
                        data->txn = intel_get_tsx_transaction(pebs->tsx_tuning,
                                                              pebs->ax);
                        data->sample_flags |= PERF_SAMPLE_TRANSACTION;
                }
        }

        /*
         * v3 supplies an accurate time stamp, so we use that
         * for the time stamp.
         *
         * We can only do this for the default trace clock.
         */
        if (x86_pmu.intel_cap.pebs_format >= 3)
                setup_pebs_time(event, data, pebs->tsc);

        perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL);
}

static void adaptive_pebs_save_regs(struct pt_regs *regs,
                                    struct pebs_gprs *gprs)
{
        regs->ax = gprs->ax;
        regs->bx = gprs->bx;
        regs->cx = gprs->cx;
        regs->dx = gprs->dx;
        regs->si = gprs->si;
        regs->di = gprs->di;
        regs->bp = gprs->bp;
        regs->sp = gprs->sp;
#ifndef CONFIG_X86_32
        regs->r8 = gprs->r8;
        regs->r9 = gprs->r9;
        regs->r10 = gprs->r10;
        regs->r11 = gprs->r11;
        regs->r12 = gprs->r12;
        regs->r13 = gprs->r13;
        regs->r14 = gprs->r14;
        regs->r15 = gprs->r15;
#endif
}

static void intel_perf_event_update_pmc(struct perf_event *event, u64 pmc)
{
        int shift = 64 - x86_pmu.cntval_bits;
        struct hw_perf_event *hwc;
        u64 delta, prev_pmc;

        /*
         * A recorded counter may not have an assigned event in the
         * following cases. The value should be dropped.
         * - An event is deleted. There is still an active PEBS event.
         *   The PEBS record doesn't shrink on pmu::del().
         *   If the counter of the deleted event once occurred in a PEBS
         *   record, PEBS still records the counter until the counter is
         *   reassigned.
         * - An event is stopped for some reason, e.g., throttled.
         *   During this period, another event is added and takes the
         *   counter of the stopped event. The stopped event is assigned
         *   to another new and uninitialized counter, since the
         *   x86_pmu_start(RELOAD) is not invoked for a stopped event.
         *   The PEBS__DATA_CFG is updated regardless of the event state.
         *   The uninitialized counter can be recorded in a PEBS record.
         *   But the cpuc->events[uninitialized_counter] is always NULL,
         *   because the event is stopped. The uninitialized value is
         *   safely dropped.
         */
        if (!event)
                return;

        hwc = &event->hw;
        prev_pmc = local64_read(&hwc->prev_count);

        /* Only update the count when the PMU is disabled */
        WARN_ON(this_cpu_read(cpu_hw_events.enabled));
        local64_set(&hwc->prev_count, pmc);

        delta = (pmc << shift) - (prev_pmc << shift);
        delta >>= shift;

        local64_add(delta, &event->count);
        local64_sub(delta, &hwc->period_left);
}

static inline void __setup_pebs_counter_group(struct cpu_hw_events *cpuc,
                                              struct perf_event *event,
                                              struct pebs_cntr_header *cntr,
                                              void *next_record)
{
        int bit;

        for_each_set_bit(bit, (unsigned long *)&cntr->cntr, INTEL_PMC_MAX_GENERIC) {
                intel_perf_event_update_pmc(cpuc->events[bit], *(u64 *)next_record);
                next_record += sizeof(u64);
        }

        for_each_set_bit(bit, (unsigned long *)&cntr->fixed, INTEL_PMC_MAX_FIXED) {
                /* The slots event will be handled with perf_metric later */
                if ((cntr->metrics == INTEL_CNTR_METRICS) &&
                    (bit + INTEL_PMC_IDX_FIXED == INTEL_PMC_IDX_FIXED_SLOTS)) {
                        next_record += sizeof(u64);
                        continue;
                }
                intel_perf_event_update_pmc(cpuc->events[bit + INTEL_PMC_IDX_FIXED],
                                            *(u64 *)next_record);
                next_record += sizeof(u64);
        }

        /* HW will reload the value right after the overflow. */
        if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
                local64_set(&event->hw.prev_count, (u64)-event->hw.sample_period);

        if (cntr->metrics == INTEL_CNTR_METRICS) {
                static_call(intel_pmu_update_topdown_event)
                           (cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS],
                            (u64 *)next_record);
                next_record += 2 * sizeof(u64);
        }
}

#define PEBS_LATENCY_MASK                       0xffff

static inline void __setup_perf_sample_data(struct perf_event *event,
                                            struct pt_regs *iregs,
                                            struct perf_sample_data *data)
{
        perf_sample_data_init(data, 0, event->hw.last_period);

        /*
         * We must however always use iregs for the unwinder to stay sane; the
         * record BP,SP,IP can point into thin air when the record is from a
         * previous PMI context or an (I)RET happened between the record and
         * PMI.
         */
        perf_sample_save_callchain(data, event, iregs);
}

static inline void __setup_pebs_basic_group(struct perf_event *event,
                                            struct pt_regs *regs,
                                            struct perf_sample_data *data,
                                            u64 sample_type, u64 ip,
                                            u64 tsc, u16 retire)
{
        /* The ip in basic is EventingIP */
        set_linear_ip(regs, ip);
        regs->flags = PERF_EFLAGS_EXACT;
        setup_pebs_time(event, data, tsc);

        if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT)
                data->weight.var3_w = retire;
}

static inline void __setup_pebs_gpr_group(struct perf_event *event,
                                          struct pt_regs *regs,
                                          struct pebs_gprs *gprs,
                                          u64 sample_type)
{
        if (event->attr.precise_ip < 2) {
                set_linear_ip(regs, gprs->ip);
                regs->flags &= ~PERF_EFLAGS_EXACT;
        }

        if (sample_type & (PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER))
                adaptive_pebs_save_regs(regs, gprs);
}

static inline void __setup_pebs_meminfo_group(struct perf_event *event,
                                              struct perf_sample_data *data,
                                              u64 sample_type, u64 latency,
                                              u16 instr_latency, u64 address,
                                              u64 aux, u64 tsx_tuning, u64 ax)
{
        if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
                u64 tsx_latency = intel_get_tsx_weight(tsx_tuning);

                data->weight.var2_w = instr_latency;

                /*
                 * Although meminfo::latency is defined as a u64,
                 * only the lower 32 bits include the valid data
                 * in practice on Ice Lake and earlier platforms.
                 */
                if (sample_type & PERF_SAMPLE_WEIGHT)
                        data->weight.full = latency ?: tsx_latency;
                else
                        data->weight.var1_dw = (u32)latency ?: tsx_latency;

                data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
        }

        if (sample_type & PERF_SAMPLE_DATA_SRC) {
                data->data_src.val = get_data_src(event, aux);
                data->sample_flags |= PERF_SAMPLE_DATA_SRC;
        }

        if (sample_type & PERF_SAMPLE_ADDR_TYPE) {
                data->addr = address;
                data->sample_flags |= PERF_SAMPLE_ADDR;
        }

        if (sample_type & PERF_SAMPLE_TRANSACTION) {
                data->txn = intel_get_tsx_transaction(tsx_tuning, ax);
                data->sample_flags |= PERF_SAMPLE_TRANSACTION;
        }
}

/*
 * With adaptive PEBS the layout depends on what fields are configured.
 */
static void setup_pebs_adaptive_sample_data(struct perf_event *event,
                                            struct pt_regs *iregs, void *__pebs,
                                            struct perf_sample_data *data,
                                            struct pt_regs *regs)
{
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        u64 sample_type = event->attr.sample_type;
        struct pebs_basic *basic = __pebs;
        void *next_record = basic + 1;
        struct pebs_meminfo *meminfo = NULL;
        struct pebs_gprs *gprs = NULL;
        struct x86_perf_regs *perf_regs;
        u64 format_group;
        u16 retire;

        if (basic == NULL)
                return;

        perf_regs = container_of(regs, struct x86_perf_regs, regs);
        perf_regs->xmm_regs = NULL;

        format_group = basic->format_group;

        __setup_perf_sample_data(event, iregs, data);

        *regs = *iregs;

        /* basic group */
        retire = x86_pmu.flags & PMU_FL_RETIRE_LATENCY ?
                        basic->retire_latency : 0;
        __setup_pebs_basic_group(event, regs, data, sample_type,
                                 basic->ip, basic->tsc, retire);

        /*
         * The record for MEMINFO is in front of GP
         * But PERF_SAMPLE_TRANSACTION needs gprs->ax.
         * Save the pointer here but process later.
         */
        if (format_group & PEBS_DATACFG_MEMINFO) {
                meminfo = next_record;
                next_record = meminfo + 1;
        }

        if (format_group & PEBS_DATACFG_GP) {
                gprs = next_record;
                next_record = gprs + 1;

                __setup_pebs_gpr_group(event, regs, gprs, sample_type);
        }

        if (format_group & PEBS_DATACFG_MEMINFO) {
                u64 latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ?
                                meminfo->cache_latency : meminfo->mem_latency;
                u64 instr_latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ?
                                meminfo->instr_latency : 0;
                u64 ax = gprs ? gprs->ax : 0;

                __setup_pebs_meminfo_group(event, data, sample_type, latency,
                                           instr_latency, meminfo->address,
                                           meminfo->aux, meminfo->tsx_tuning,
                                           ax);
        }

        if (format_group & PEBS_DATACFG_XMMS) {
                struct pebs_xmm *xmm = next_record;

                next_record = xmm + 1;
                perf_regs->xmm_regs = xmm->xmm;
        }

        if (format_group & PEBS_DATACFG_LBRS) {
                struct lbr_entry *lbr = next_record;
                int num_lbr = ((format_group >> PEBS_DATACFG_LBR_SHIFT)
                                        & 0xff) + 1;
                next_record = next_record + num_lbr * sizeof(struct lbr_entry);

                if (has_branch_stack(event)) {
                        intel_pmu_store_pebs_lbrs(lbr);
                        intel_pmu_lbr_save_brstack(data, cpuc, event);
                }
        }

        if (format_group & (PEBS_DATACFG_CNTR | PEBS_DATACFG_METRICS)) {
                struct pebs_cntr_header *cntr = next_record;
                unsigned int nr;

                next_record += sizeof(struct pebs_cntr_header);
                /*
                 * The PEBS_DATA_CFG is a global register, which is the
                 * superset configuration for all PEBS events.
                 * For the PEBS record of non-sample-read group, ignore
                 * the counter snapshot fields.
                 */
                if (is_pebs_counter_event_group(event)) {
                        __setup_pebs_counter_group(cpuc, event, cntr, next_record);
                        data->sample_flags |= PERF_SAMPLE_READ;
                }

                nr = hweight32(cntr->cntr) + hweight32(cntr->fixed);
                if (cntr->metrics == INTEL_CNTR_METRICS)
                        nr += 2;
                next_record += nr * sizeof(u64);
        }

        WARN_ONCE(next_record != __pebs + basic->format_size,
                        "PEBS record size %u, expected %llu, config %llx\n",
                        basic->format_size,
                        (u64)(next_record - __pebs),
                        format_group);
}

static inline bool arch_pebs_record_continued(struct arch_pebs_header *header)
{
        /* Continue bit or null PEBS record indicates fragment follows. */
        return header->cont || !(header->format & GENMASK_ULL(63, 16));
}

static void setup_arch_pebs_sample_data(struct perf_event *event,
                                        struct pt_regs *iregs,
                                        void *__pebs,
                                        struct perf_sample_data *data,
                                        struct pt_regs *regs)
{
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        u64 sample_type = event->attr.sample_type;
        struct arch_pebs_header *header = NULL;
        struct arch_pebs_aux *meminfo = NULL;
        struct arch_pebs_gprs *gprs = NULL;
        struct x86_perf_regs *perf_regs;
        void *next_record;
        void *at = __pebs;

        if (at == NULL)
                return;

        perf_regs = container_of(regs, struct x86_perf_regs, regs);
        perf_regs->xmm_regs = NULL;

        __setup_perf_sample_data(event, iregs, data);

        *regs = *iregs;

again:
        header = at;
        next_record = at + sizeof(struct arch_pebs_header);
        if (header->basic) {
                struct arch_pebs_basic *basic = next_record;
                u16 retire = 0;

                next_record = basic + 1;

                if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT)
                        retire = basic->valid ? basic->retire : 0;
                __setup_pebs_basic_group(event, regs, data, sample_type,
                                 basic->ip, basic->tsc, retire);
        }

        /*
         * The record for MEMINFO is in front of GP
         * But PERF_SAMPLE_TRANSACTION needs gprs->ax.
         * Save the pointer here but process later.
         */
        if (header->aux) {
                meminfo = next_record;
                next_record = meminfo + 1;
        }

        if (header->gpr) {
                gprs = next_record;
                next_record = gprs + 1;

                __setup_pebs_gpr_group(event, regs,
                                       (struct pebs_gprs *)gprs,
                                       sample_type);
        }

        if (header->aux) {
                u64 ax = gprs ? gprs->ax : 0;

                __setup_pebs_meminfo_group(event, data, sample_type,
                                           meminfo->cache_latency,
                                           meminfo->instr_latency,
                                           meminfo->address, meminfo->aux,
                                           meminfo->tsx_tuning, ax);
        }

        if (header->xmm) {
                struct pebs_xmm *xmm;

                next_record += sizeof(struct arch_pebs_xer_header);

                xmm = next_record;
                perf_regs->xmm_regs = xmm->xmm;
                next_record = xmm + 1;
        }

        if (header->lbr) {
                struct arch_pebs_lbr_header *lbr_header = next_record;
                struct lbr_entry *lbr;
                int num_lbr;

                next_record = lbr_header + 1;
                lbr = next_record;

                num_lbr = header->lbr == ARCH_PEBS_LBR_NUM_VAR ?
                                lbr_header->depth :
                                header->lbr * ARCH_PEBS_BASE_LBR_ENTRIES;
                next_record += num_lbr * sizeof(struct lbr_entry);

                if (has_branch_stack(event)) {
                        intel_pmu_store_pebs_lbrs(lbr);
                        intel_pmu_lbr_save_brstack(data, cpuc, event);
                }
        }

        if (header->cntr) {
                struct arch_pebs_cntr_header *cntr = next_record;
                unsigned int nr;

                next_record += sizeof(struct arch_pebs_cntr_header);

                if (is_pebs_counter_event_group(event)) {
                        __setup_pebs_counter_group(cpuc, event,
                                (struct pebs_cntr_header *)cntr, next_record);
                        data->sample_flags |= PERF_SAMPLE_READ;
                }

                nr = hweight32(cntr->cntr) + hweight32(cntr->fixed);
                if (cntr->metrics == INTEL_CNTR_METRICS)
                        nr += 2;
                next_record += nr * sizeof(u64);
        }

        /* Parse followed fragments if there are. */
        if (arch_pebs_record_continued(header)) {
                at = at + header->size;
                goto again;
        }
}

static inline void *
get_next_pebs_record_by_bit(void *base, void *top, int bit)
{
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        void *at;
        u64 pebs_status;

        /*
         * fmt0 does not have a status bitfield (does not use
         * perf_record_nhm format)
         */
        if (x86_pmu.intel_cap.pebs_format < 1)
                return base;

        if (base == NULL)
                return NULL;

        for (at = base; at < top; at += cpuc->pebs_record_size) {
                unsigned long status = get_pebs_status(at);

                if (test_bit(bit, (unsigned long *)&status)) {
                        /* PEBS v3 has accurate status bits */
                        if (x86_pmu.intel_cap.pebs_format >= 3)
                                return at;

                        if (status == (1 << bit))
                                return at;

                        /* clear non-PEBS bit and re-check */
                        pebs_status = status & cpuc->pebs_enabled;
                        pebs_status &= PEBS_COUNTER_MASK;
                        if (pebs_status == (1 << bit))
                                return at;
                }
        }
        return NULL;
}

/*
 * Special variant of intel_pmu_save_and_restart() for auto-reload.
 */
static int
intel_pmu_save_and_restart_reload(struct perf_event *event, int count)
{
        struct hw_perf_event *hwc = &event->hw;
        int shift = 64 - x86_pmu.cntval_bits;
        u64 period = hwc->sample_period;
        u64 prev_raw_count, new_raw_count;
        s64 new, old;

        WARN_ON(!period);

        /*
         * drain_pebs() only happens when the PMU is disabled.
         */
        WARN_ON(this_cpu_read(cpu_hw_events.enabled));

        prev_raw_count = local64_read(&hwc->prev_count);
        new_raw_count = rdpmc(hwc->event_base_rdpmc);
        local64_set(&hwc->prev_count, new_raw_count);

        /*
         * Since the counter increments a negative counter value and
         * overflows on the sign switch, giving the interval:
         *
         *   [-period, 0]
         *
         * the difference between two consecutive reads is:
         *
         *   A) value2 - value1;
         *      when no overflows have happened in between,
         *
         *   B) (0 - value1) + (value2 - (-period));
         *      when one overflow happened in between,
         *
         *   C) (0 - value1) + (n - 1) * (period) + (value2 - (-period));
         *      when @n overflows happened in between.
         *
         * Here A) is the obvious difference, B) is the extension to the
         * discrete interval, where the first term is to the top of the
         * interval and the second term is from the bottom of the next
         * interval and C) the extension to multiple intervals, where the
         * middle term is the whole intervals covered.
         *
         * An equivalent of C, by reduction, is:
         *
         *   value2 - value1 + n * period
         */
        new = ((s64)(new_raw_count << shift) >> shift);
        old = ((s64)(prev_raw_count << shift) >> shift);
        local64_add(new - old + count * period, &event->count);

        local64_set(&hwc->period_left, -new);

        perf_event_update_userpage(event);

        return 0;
}

typedef void (*setup_fn)(struct perf_event *, struct pt_regs *, void *,
                         struct perf_sample_data *, struct pt_regs *);

static struct pt_regs dummy_iregs;

static __always_inline void
__intel_pmu_pebs_event(struct perf_event *event,
                       struct pt_regs *iregs,
                       struct pt_regs *regs,
                       struct perf_sample_data *data,
                       void *at,
                       setup_fn setup_sample)
{
        setup_sample(event, iregs, at, data, regs);
        perf_event_output(event, data, regs);
}

static __always_inline void
__intel_pmu_pebs_last_event(struct perf_event *event,
                            struct pt_regs *iregs,
                            struct pt_regs *regs,
                            struct perf_sample_data *data,
                            void *at,
                            int count,
                            setup_fn setup_sample)
{
        struct hw_perf_event *hwc = &event->hw;

        setup_sample(event, iregs, at, data, regs);
        if (iregs == &dummy_iregs) {
                /*
                 * The PEBS records may be drained in the non-overflow context,
                 * e.g., large PEBS + context switch. Perf should treat the
                 * last record the same as other PEBS records, and doesn't
                 * invoke the generic overflow handler.
                 */
                perf_event_output(event, data, regs);
        } else {
                /*
                 * All but the last records are processed.
                 * The last one is left to be able to call the overflow handler.
                 */
                perf_event_overflow(event, data, regs);
        }

        if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
                if ((is_pebs_counter_event_group(event))) {
                        /*
                         * The value of each sample has been updated when setup
                         * the corresponding sample data.
                         */
                        perf_event_update_userpage(event);
                } else {
                        /*
                         * Now, auto-reload is only enabled in fixed period mode.
                         * The reload value is always hwc->sample_period.
                         * May need to change it, if auto-reload is enabled in
                         * freq mode later.
                         */
                        intel_pmu_save_and_restart_reload(event, count);
                }
        } else {
                /*
                 * For a non-precise event, it's possible the
                 * counters-snapshotting records a positive value for the
                 * overflowed event. Then the HW auto-reload mechanism
                 * reset the counter to 0 immediately, because the
                 * pebs_event_reset is cleared if the PERF_X86_EVENT_AUTO_RELOAD
                 * is not set. The counter backwards may be observed in a
                 * PMI handler.
                 *
                 * Since the event value has been updated when processing the
                 * counters-snapshotting record, only needs to set the new
                 * period for the counter.
                 */
                if (is_pebs_counter_event_group(event))
                        static_call(x86_pmu_set_period)(event);
                else
                        intel_pmu_save_and_restart(event);
        }
}

static __always_inline void
__intel_pmu_pebs_events(struct perf_event *event,
                        struct pt_regs *iregs,
                        struct perf_sample_data *data,
                        void *base, void *top,
                        int bit, int count,
                        setup_fn setup_sample)
{
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        struct x86_perf_regs perf_regs;
        struct pt_regs *regs = &perf_regs.regs;
        void *at = get_next_pebs_record_by_bit(base, top, bit);
        int cnt = count;

        if (!iregs)
                iregs = &dummy_iregs;

        while (cnt > 1) {
                __intel_pmu_pebs_event(event, iregs, regs, data, at, setup_sample);
                at += cpuc->pebs_record_size;
                at = get_next_pebs_record_by_bit(at, top, bit);
                cnt--;
        }

        __intel_pmu_pebs_last_event(event, iregs, regs, data, at, count, setup_sample);
}

static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_data *data)
{
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        struct debug_store *ds = cpuc->ds;
        struct perf_event *event = cpuc->events[0]; /* PMC0 only */
        struct pebs_record_core *at, *top;
        int n;

        if (!x86_pmu.pebs_active)
                return;

        at  = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
        top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;

        /*
         * Whatever else happens, drain the thing
         */
        ds->pebs_index = ds->pebs_buffer_base;

        if (!test_bit(0, cpuc->active_mask))
                return;

        WARN_ON_ONCE(!event);

        if (!event->attr.precise_ip)
                return;

        n = top - at;
        if (n <= 0) {
                if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
                        intel_pmu_save_and_restart_reload(event, 0);
                return;
        }

        __intel_pmu_pebs_events(event, iregs, data, at, top, 0, n,
                                setup_pebs_fixed_sample_data);
}

static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, u64 mask)
{
        u64 pebs_enabled = cpuc->pebs_enabled & mask;
        struct perf_event *event;
        int bit;

        /*
         * The drain_pebs() could be called twice in a short period
         * for auto-reload event in pmu::read(). There are no
         * overflows have happened in between.
         * It needs to call intel_pmu_save_and_restart_reload() to
         * update the event->count for this case.
         */
        for_each_set_bit(bit, (unsigned long *)&pebs_enabled, X86_PMC_IDX_MAX) {
                event = cpuc->events[bit];
                if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
                        intel_pmu_save_and_restart_reload(event, 0);
        }
}

static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_data *data)
{
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        struct debug_store *ds = cpuc->ds;
        struct perf_event *event;
        void *base, *at, *top;
        short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
        short error[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
        int max_pebs_events = intel_pmu_max_num_pebs(NULL);
        int bit, i, size;
        u64 mask;

        if (!x86_pmu.pebs_active)
                return;

        base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
        top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;

        ds->pebs_index = ds->pebs_buffer_base;

        mask = x86_pmu.pebs_events_mask;
        size = max_pebs_events;
        if (x86_pmu.flags & PMU_FL_PEBS_ALL) {
                mask |= x86_pmu.fixed_cntr_mask64 << INTEL_PMC_IDX_FIXED;
                size = INTEL_PMC_IDX_FIXED + x86_pmu_max_num_counters_fixed(NULL);
        }

        if (unlikely(base >= top)) {
                intel_pmu_pebs_event_update_no_drain(cpuc, mask);
                return;
        }

        for (at = base; at < top; at += x86_pmu.pebs_record_size) {
                struct pebs_record_nhm *p = at;
                u64 pebs_status;

                pebs_status = p->status & cpuc->pebs_enabled;
                pebs_status &= mask;

                /* PEBS v3 has more accurate status bits */
                if (x86_pmu.intel_cap.pebs_format >= 3) {
                        for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
                                counts[bit]++;

                        continue;
                }

                /*
                 * On some CPUs the PEBS status can be zero when PEBS is
                 * racing with clearing of GLOBAL_STATUS.
                 *
                 * Normally we would drop that record, but in the
                 * case when there is only a single active PEBS event
                 * we can assume it's for that event.
                 */
                if (!pebs_status && cpuc->pebs_enabled &&
                        !(cpuc->pebs_enabled & (cpuc->pebs_enabled-1)))
                        pebs_status = p->status = cpuc->pebs_enabled;

                bit = find_first_bit((unsigned long *)&pebs_status,
                                     max_pebs_events);

                if (!(x86_pmu.pebs_events_mask & (1 << bit)))
                        continue;

                /*
                 * The PEBS hardware does not deal well with the situation
                 * when events happen near to each other and multiple bits
                 * are set. But it should happen rarely.
                 *
                 * If these events include one PEBS and multiple non-PEBS
                 * events, it doesn't impact PEBS record. The record will
                 * be handled normally. (slow path)
                 *
                 * If these events include two or more PEBS events, the
                 * records for the events can be collapsed into a single
                 * one, and it's not possible to reconstruct all events
                 * that caused the PEBS record. It's called collision.
                 * If collision happened, the record will be dropped.
                 */
                if (pebs_status != (1ULL << bit)) {
                        for_each_set_bit(i, (unsigned long *)&pebs_status, size)
                                error[i]++;
                        continue;
                }

                counts[bit]++;
        }

        for_each_set_bit(bit, (unsigned long *)&mask, size) {
                if ((counts[bit] == 0) && (error[bit] == 0))
                        continue;

                event = cpuc->events[bit];
                if (WARN_ON_ONCE(!event))
                        continue;

                if (WARN_ON_ONCE(!event->attr.precise_ip))
                        continue;

                /* log dropped samples number */
                if (error[bit]) {
                        perf_log_lost_samples(event, error[bit]);

                        if (iregs)
                                perf_event_account_interrupt(event);
                }

                if (counts[bit]) {
                        __intel_pmu_pebs_events(event, iregs, data, base,
                                                top, bit, counts[bit],
                                                setup_pebs_fixed_sample_data);
                }
        }
}

static __always_inline void
__intel_pmu_handle_pebs_record(struct pt_regs *iregs,
                               struct pt_regs *regs,
                               struct perf_sample_data *data,
                               void *at, u64 pebs_status,
                               short *counts, void **last,
                               setup_fn setup_sample)
{
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        struct perf_event *event;
        int bit;

        for_each_set_bit(bit, (unsigned long *)&pebs_status, X86_PMC_IDX_MAX) {
                event = cpuc->events[bit];

                if (WARN_ON_ONCE(!event) ||
                    WARN_ON_ONCE(!event->attr.precise_ip))
                        continue;

                if (counts[bit]++) {
                        __intel_pmu_pebs_event(event, iregs, regs, data,
                                               last[bit], setup_sample);
                }

                last[bit] = at;
        }
}

static __always_inline void
__intel_pmu_handle_last_pebs_record(struct pt_regs *iregs,
                                    struct pt_regs *regs,
                                    struct perf_sample_data *data,
                                    u64 mask, short *counts, void **last,
                                    setup_fn setup_sample)
{
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        struct perf_event *event;
        int bit;

        for_each_set_bit(bit, (unsigned long *)&mask, X86_PMC_IDX_MAX) {
                if (!counts[bit])
                        continue;

                event = cpuc->events[bit];

                __intel_pmu_pebs_last_event(event, iregs, regs, data, last[bit],
                                            counts[bit], setup_sample);
        }

}

static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data)
{
        short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
        void *last[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS];
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        struct debug_store *ds = cpuc->ds;
        struct x86_perf_regs perf_regs;
        struct pt_regs *regs = &perf_regs.regs;
        struct pebs_basic *basic;
        void *base, *at, *top;
        u64 mask;

        if (!x86_pmu.pebs_active)
                return;

        base = (struct pebs_basic *)(unsigned long)ds->pebs_buffer_base;
        top = (struct pebs_basic *)(unsigned long)ds->pebs_index;

        ds->pebs_index = ds->pebs_buffer_base;

        mask = hybrid(cpuc->pmu, pebs_events_mask) |
               (hybrid(cpuc->pmu, fixed_cntr_mask64) << INTEL_PMC_IDX_FIXED);
        mask &= cpuc->pebs_enabled;

        if (unlikely(base >= top)) {
                intel_pmu_pebs_event_update_no_drain(cpuc, mask);
                return;
        }

        if (!iregs)
                iregs = &dummy_iregs;

        /* Process all but the last event for each counter. */
        for (at = base; at < top; at += basic->format_size) {
                u64 pebs_status;

                basic = at;
                if (basic->format_size != cpuc->pebs_record_size)
                        continue;

                pebs_status = mask & basic->applicable_counters;
                __intel_pmu_handle_pebs_record(iregs, regs, data, at,
                                               pebs_status, counts, last,
                                               setup_pebs_adaptive_sample_data);
        }

        __intel_pmu_handle_last_pebs_record(iregs, regs, data, mask, counts, last,
                                            setup_pebs_adaptive_sample_data);
}

static void intel_pmu_drain_arch_pebs(struct pt_regs *iregs,
                                      struct perf_sample_data *data)
{
        short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
        void *last[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS];
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        union arch_pebs_index index;
        struct x86_perf_regs perf_regs;
        struct pt_regs *regs = &perf_regs.regs;
        void *base, *at, *top;
        u64 mask;

        rdmsrq(MSR_IA32_PEBS_INDEX, index.whole);

        if (unlikely(!index.wr)) {
                intel_pmu_pebs_event_update_no_drain(cpuc, X86_PMC_IDX_MAX);
                return;
        }

        base = cpuc->pebs_vaddr;
        top = cpuc->pebs_vaddr + (index.wr << ARCH_PEBS_INDEX_WR_SHIFT);

        index.wr = 0;
        index.full = 0;
        index.en = 1;
        if (cpuc->n_pebs == cpuc->n_large_pebs)
                index.thresh = ARCH_PEBS_THRESH_MULTI;
        else
                index.thresh = ARCH_PEBS_THRESH_SINGLE;
        wrmsrq(MSR_IA32_PEBS_INDEX, index.whole);

        mask = hybrid(cpuc->pmu, arch_pebs_cap).counters & cpuc->pebs_enabled;

        if (!iregs)
                iregs = &dummy_iregs;

        /* Process all but the last event for each counter. */
        for (at = base; at < top;) {
                struct arch_pebs_header *header;
                struct arch_pebs_basic *basic;
                u64 pebs_status;

                header = at;

                if (WARN_ON_ONCE(!header->size))
                        break;

                /* 1st fragment or single record must have basic group */
                if (!header->basic) {
                        at += header->size;
                        continue;
                }

                basic = at + sizeof(struct arch_pebs_header);
                pebs_status = mask & basic->applicable_counters;
                __intel_pmu_handle_pebs_record(iregs, regs, data, at,
                                               pebs_status, counts, last,
                                               setup_arch_pebs_sample_data);

                /* Skip non-last fragments */
                while (arch_pebs_record_continued(header)) {
                        if (!header->size)
                                break;
                        at += header->size;
                        header = at;
                }

                /* Skip last fragment or the single record */
                at += header->size;
        }

        __intel_pmu_handle_last_pebs_record(iregs, regs, data, mask,
                                            counts, last,
                                            setup_arch_pebs_sample_data);
}

static void __init intel_arch_pebs_init(void)
{
        /*
         * Current hybrid platforms always both support arch-PEBS or not
         * on all kinds of cores. So directly set x86_pmu.arch_pebs flag
         * if boot cpu supports arch-PEBS.
         */
        x86_pmu.arch_pebs = 1;
        x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
        x86_pmu.drain_pebs = intel_pmu_drain_arch_pebs;
        x86_pmu.pebs_capable = ~0ULL;
        x86_pmu.flags |= PMU_FL_PEBS_ALL;

        x86_pmu.pebs_enable = __intel_pmu_pebs_enable;
        x86_pmu.pebs_disable = __intel_pmu_pebs_disable;
}

/*
 * PEBS probe and setup
 */

static void __init intel_ds_pebs_init(void)
{
        /*
         * No support for 32bit formats
         */
        if (!boot_cpu_has(X86_FEATURE_DTES64))
                return;

        x86_pmu.ds_pebs = boot_cpu_has(X86_FEATURE_PEBS);
        x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
        if (x86_pmu.version <= 4)
                x86_pmu.pebs_no_isolation = 1;

        if (x86_pmu.ds_pebs) {
                char pebs_type = x86_pmu.intel_cap.pebs_trap ?  '+' : '-';
                char *pebs_qual = "";
                int format = x86_pmu.intel_cap.pebs_format;

                if (format < 4)
                        x86_pmu.intel_cap.pebs_baseline = 0;

                x86_pmu.pebs_enable = intel_pmu_pebs_enable;
                x86_pmu.pebs_disable = intel_pmu_pebs_disable;
                x86_pmu.pebs_enable_all = intel_pmu_pebs_enable_all;
                x86_pmu.pebs_disable_all = intel_pmu_pebs_disable_all;

                switch (format) {
                case 0:
                        pr_cont("PEBS fmt0%c, ", pebs_type);
                        x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
                        /*
                         * Using >PAGE_SIZE buffers makes the WRMSR to
                         * PERF_GLOBAL_CTRL in intel_pmu_enable_all()
                         * mysteriously hang on Core2.
                         *
                         * As a workaround, we don't do this.
                         */
                        x86_pmu.pebs_buffer_size = PAGE_SIZE;
                        x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
                        break;

                case 1:
                        pr_cont("PEBS fmt1%c, ", pebs_type);
                        x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
                        x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
                        break;

                case 2:
                        pr_cont("PEBS fmt2%c, ", pebs_type);
                        x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
                        x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
                        break;

                case 3:
                        pr_cont("PEBS fmt3%c, ", pebs_type);
                        x86_pmu.pebs_record_size =
                                                sizeof(struct pebs_record_skl);
                        x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
                        x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
                        break;

                case 6:
                        if (x86_pmu.intel_cap.pebs_baseline)
                                x86_pmu.large_pebs_flags |= PERF_SAMPLE_READ;
                        fallthrough;
                case 5:
                        x86_pmu.pebs_ept = 1;
                        fallthrough;
                case 4:
                        x86_pmu.drain_pebs = intel_pmu_drain_pebs_icl;
                        x86_pmu.pebs_record_size = sizeof(struct pebs_basic);
                        if (x86_pmu.intel_cap.pebs_baseline) {
                                x86_pmu.large_pebs_flags |=
                                        PERF_SAMPLE_BRANCH_STACK |
                                        PERF_SAMPLE_TIME;
                                x86_pmu.flags |= PMU_FL_PEBS_ALL;
                                x86_pmu.pebs_capable = ~0ULL;
                                pebs_qual = "-baseline";
                                x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
                        } else {
                                /* Only basic record supported */
                                x86_pmu.large_pebs_flags &=
                                        ~(PERF_SAMPLE_ADDR |
                                          PERF_SAMPLE_TIME |
                                          PERF_SAMPLE_DATA_SRC |
                                          PERF_SAMPLE_TRANSACTION |
                                          PERF_SAMPLE_REGS_USER |
                                          PERF_SAMPLE_REGS_INTR);
                        }
                        pr_cont("PEBS fmt%d%c%s, ", format, pebs_type, pebs_qual);

                        /*
                         * The PEBS-via-PT is not supported on hybrid platforms,
                         * because not all CPUs of a hybrid machine support it.
                         * The global x86_pmu.intel_cap, which only contains the
                         * common capabilities, is used to check the availability
                         * of the feature. The per-PMU pebs_output_pt_available
                         * in a hybrid machine should be ignored.
                         */
                        if (x86_pmu.intel_cap.pebs_output_pt_available) {
                                pr_cont("PEBS-via-PT, ");
                                x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
                        }

                        break;

                default:
                        pr_cont("no PEBS fmt%d%c, ", format, pebs_type);
                        x86_pmu.ds_pebs = 0;
                }
        }
}

void __init intel_pebs_init(void)
{
        if (x86_pmu.intel_cap.pebs_format == 0xf)
                intel_arch_pebs_init();
        else
                intel_ds_pebs_init();
}

void perf_restore_debug_store(void)
{
        struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);

        if (!x86_pmu.bts && !x86_pmu.ds_pebs)
                return;

        wrmsrq(MSR_IA32_DS_AREA, (unsigned long)ds);
}