#include <x86intrin.h>
#include "pmu.h"
#include "processor.h"
#define NUM_LOOPS 10
#define NUM_BRANCH_INSNS_RETIRED (NUM_LOOPS)
#define NUM_INSNS_PER_LOOP 6
#define NUM_EXTRA_INSNS 5
#define NUM_INSNS_RETIRED (NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS)
static uint32_t hardware_pmu_arch_events;
static uint8_t kvm_pmu_version;
static bool kvm_has_perf_caps;
#define X86_PMU_FEATURE_NULL \
({ \
struct kvm_x86_pmu_feature feature = {}; \
\
feature; \
})
static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
{
return !(*(u64 *)&event);
}
struct kvm_intel_pmu_event {
struct kvm_x86_pmu_feature gp_event;
struct kvm_x86_pmu_feature fixed_event;
};
static struct kvm_intel_pmu_event intel_event_to_feature(uint8_t idx)
{
const struct kvm_intel_pmu_event __intel_event_to_feature[] = {
[INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
[INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
[INTEL_ARCH_REFERENCE_CYCLES_INDEX] = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED },
[INTEL_ARCH_LLC_REFERENCES_INDEX] = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
[INTEL_ARCH_LLC_MISSES_INDEX] = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
[INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
[INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
[INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED },
[INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX] = { X86_PMU_FEATURE_TOPDOWN_BE_BOUND, X86_PMU_FEATURE_NULL },
[INTEL_ARCH_TOPDOWN_BAD_SPEC_INDEX] = { X86_PMU_FEATURE_TOPDOWN_BAD_SPEC, X86_PMU_FEATURE_NULL },
[INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX] = { X86_PMU_FEATURE_TOPDOWN_FE_BOUND, X86_PMU_FEATURE_NULL },
[INTEL_ARCH_TOPDOWN_RETIRING_INDEX] = { X86_PMU_FEATURE_TOPDOWN_RETIRING, X86_PMU_FEATURE_NULL },
[INTEL_ARCH_LBR_INSERTS_INDEX] = { X86_PMU_FEATURE_LBR_INSERTS, X86_PMU_FEATURE_NULL },
};
kvm_static_assert(ARRAY_SIZE(__intel_event_to_feature) == NR_INTEL_ARCH_EVENTS);
return __intel_event_to_feature[idx];
}
static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
void *guest_code,
uint8_t pmu_version,
uint64_t perf_capabilities)
{
struct kvm_vm *vm;
vm = vm_create_with_one_vcpu(vcpu, guest_code);
sync_global_to_guest(vm, kvm_pmu_version);
sync_global_to_guest(vm, hardware_pmu_arch_events);
if (kvm_has_perf_caps)
vcpu_set_msr(*vcpu, MSR_IA32_PERF_CAPABILITIES, perf_capabilities);
vcpu_set_cpuid_property(*vcpu, X86_PROPERTY_PMU_VERSION, pmu_version);
return vm;
}
static void run_vcpu(struct kvm_vcpu *vcpu)
{
struct ucall uc;
do {
vcpu_run(vcpu);
switch (get_ucall(vcpu, &uc)) {
case UCALL_SYNC:
break;
case UCALL_ABORT:
REPORT_GUEST_ASSERT(uc);
break;
case UCALL_PRINTF:
pr_info("%s", uc.buffer);
break;
case UCALL_DONE:
break;
default:
TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
}
} while (uc.cmd != UCALL_DONE);
}
static uint8_t guest_get_pmu_version(void)
{
return min_t(uint8_t, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION));
}
static void guest_assert_event_count(uint8_t idx, uint32_t pmc, uint32_t pmc_msr)
{
uint64_t count;
count = _rdpmc(pmc);
if (!(hardware_pmu_arch_events & BIT(idx)))
goto sanity_checks;
switch (idx) {
case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX:
if (this_pmu_has_errata(INSTRUCTIONS_RETIRED_OVERCOUNT))
GUEST_ASSERT(count >= NUM_INSNS_RETIRED);
else
GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
break;
case INTEL_ARCH_BRANCHES_RETIRED_INDEX:
if (this_pmu_has_errata(BRANCHES_RETIRED_OVERCOUNT))
GUEST_ASSERT(count >= NUM_BRANCH_INSNS_RETIRED);
else
GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED);
break;
case INTEL_ARCH_LLC_REFERENCES_INDEX:
case INTEL_ARCH_LLC_MISSES_INDEX:
if (!this_cpu_has(X86_FEATURE_CLFLUSHOPT) &&
!this_cpu_has(X86_FEATURE_CLFLUSH))
break;
fallthrough;
case INTEL_ARCH_CPU_CYCLES_INDEX:
case INTEL_ARCH_REFERENCE_CYCLES_INDEX:
case INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX:
case INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX:
GUEST_ASSERT_NE(count, 0);
break;
case INTEL_ARCH_TOPDOWN_SLOTS_INDEX:
case INTEL_ARCH_TOPDOWN_RETIRING_INDEX:
__GUEST_ASSERT(count >= NUM_INSNS_RETIRED,
"Expected top-down slots >= %u, got count = %lu",
NUM_INSNS_RETIRED, count);
break;
default:
break;
}
sanity_checks:
__asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
GUEST_ASSERT_EQ(_rdpmc(pmc), count);
wrmsr(pmc_msr, 0xdead);
GUEST_ASSERT_EQ(_rdpmc(pmc), 0xdead);
}
#define GUEST_MEASURE_EVENT(_msr, _value, clflush, FEP) \
do { \
__asm__ __volatile__("wrmsr\n\t" \
" mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t" \
"1:\n\t" \
FEP "enter $0, $0\n\t" \
clflush "\n\t" \
"mfence\n\t" \
"mov %[m], %%eax\n\t" \
FEP "leave\n\t" \
FEP "loop 1b\n\t" \
FEP "mov %%edi, %%ecx\n\t" \
FEP "xor %%eax, %%eax\n\t" \
FEP "xor %%edx, %%edx\n\t" \
"wrmsr\n\t" \
:: "a"((uint32_t)_value), "d"(_value >> 32), \
"c"(_msr), "D"(_msr), [m]"m"(kvm_pmu_version) \
); \
} while (0)
#define GUEST_TEST_EVENT(_idx, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \
do { \
wrmsr(_pmc_msr, 0); \
\
if (this_cpu_has(X86_FEATURE_CLFLUSHOPT)) \
GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt %[m]", FEP); \
else if (this_cpu_has(X86_FEATURE_CLFLUSH)) \
GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush %[m]", FEP); \
else \
GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP); \
\
guest_assert_event_count(_idx, _pmc, _pmc_msr); \
} while (0)
static void __guest_test_arch_event(uint8_t idx, uint32_t pmc, uint32_t pmc_msr,
uint32_t ctrl_msr, uint64_t ctrl_msr_value)
{
GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, "");
if (is_forced_emulation_enabled)
GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP);
}
static void guest_test_arch_event(uint8_t idx)
{
uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
uint32_t pmu_version = guest_get_pmu_version();
bool guest_has_perf_global_ctrl = pmu_version >= 2;
struct kvm_x86_pmu_feature gp_event, fixed_event;
uint32_t base_pmc_msr;
unsigned int i;
GUEST_ASSERT(pmu_version);
if (this_cpu_has(X86_FEATURE_PDCM) &&
rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
base_pmc_msr = MSR_IA32_PMC0;
else
base_pmc_msr = MSR_IA32_PERFCTR0;
gp_event = intel_event_to_feature(idx).gp_event;
GUEST_ASSERT_EQ(idx, gp_event.f.bit);
GUEST_ASSERT(nr_gp_counters);
for (i = 0; i < nr_gp_counters; i++) {
uint64_t eventsel = ARCH_PERFMON_EVENTSEL_OS |
ARCH_PERFMON_EVENTSEL_ENABLE |
intel_pmu_arch_events[idx];
wrmsr(MSR_P6_EVNTSEL0 + i, 0);
if (guest_has_perf_global_ctrl)
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i));
__guest_test_arch_event(idx, i, base_pmc_msr + i,
MSR_P6_EVNTSEL0 + i, eventsel);
}
if (!guest_has_perf_global_ctrl)
return;
fixed_event = intel_event_to_feature(idx).fixed_event;
if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event))
return;
i = fixed_event.f.bit;
wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
__guest_test_arch_event(idx, i | INTEL_RDPMC_FIXED,
MSR_CORE_PERF_FIXED_CTR0 + i,
MSR_CORE_PERF_GLOBAL_CTRL,
FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
}
static void guest_test_arch_events(void)
{
uint8_t i;
for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++)
guest_test_arch_event(i);
GUEST_DONE();
}
static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities,
uint8_t length, uint32_t unavailable_mask)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
if (!pmu_version)
return;
unavailable_mask &= GENMASK(X86_PROPERTY_PMU_EVENTS_MASK.hi_bit,
X86_PROPERTY_PMU_EVENTS_MASK.lo_bit);
vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events,
pmu_version, perf_capabilities);
vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH,
length);
vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EVENTS_MASK,
unavailable_mask);
run_vcpu(vcpu);
kvm_vm_free(vm);
}
#define MAX_NR_GP_COUNTERS 8
#define MAX_NR_FIXED_COUNTERS 3
#define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector) \
__GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector, \
"Expected %s on " #insn "(0x%x), got %s", \
expect_gp ? "#GP" : "no fault", msr, ex_str(vector)) \
#define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected) \
__GUEST_ASSERT(val == expected, \
"Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx", \
msr, expected, val);
static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success,
uint64_t expected_val)
{
uint8_t vector;
uint64_t val;
vector = rdpmc_safe(rdpmc_idx, &val);
GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
if (expect_success)
GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
if (!is_forced_emulation_enabled)
return;
vector = rdpmc_safe_fep(rdpmc_idx, &val);
GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
if (expect_success)
GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
}
static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters,
uint8_t nr_counters, uint32_t or_mask)
{
const bool pmu_has_fast_mode = !guest_get_pmu_version();
uint8_t i;
for (i = 0; i < nr_possible_counters; i++) {
const uint64_t test_val = 0xffff;
const uint32_t msr = base_msr + i;
const bool expect_success = i < nr_counters || (or_mask & BIT(i));
const uint64_t expected_val = expect_success ? test_val : 0;
const bool expect_gp = !expect_success && msr != MSR_P6_PERFCTR0 &&
msr != MSR_P6_PERFCTR1;
uint32_t rdpmc_idx;
uint8_t vector;
uint64_t val;
vector = wrmsr_safe(msr, test_val);
GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
vector = rdmsr_safe(msr, &val);
GUEST_ASSERT_PMC_MSR_ACCESS(RDMSR, msr, expect_gp, vector);
if (!expect_gp)
GUEST_ASSERT_PMC_VALUE(RDMSR, msr, val, expected_val);
rdpmc_idx = i;
if (base_msr == MSR_CORE_PERF_FIXED_CTR0)
rdpmc_idx |= INTEL_RDPMC_FIXED;
guest_test_rdpmc(rdpmc_idx, expect_success, expected_val);
GUEST_ASSERT(!expect_success || !pmu_has_fast_mode);
rdpmc_idx |= INTEL_RDPMC_FAST;
guest_test_rdpmc(rdpmc_idx, false, -1ull);
vector = wrmsr_safe(msr, 0);
GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
}
}
static void guest_test_gp_counters(void)
{
uint8_t pmu_version = guest_get_pmu_version();
uint8_t nr_gp_counters = 0;
uint32_t base_msr;
if (pmu_version)
nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
if (pmu_version > 1) {
uint64_t global_ctrl = rdmsr(MSR_CORE_PERF_GLOBAL_CTRL);
if (nr_gp_counters)
GUEST_ASSERT_EQ(global_ctrl, GENMASK_ULL(nr_gp_counters - 1, 0));
else
GUEST_ASSERT_EQ(global_ctrl, 0);
}
if (this_cpu_has(X86_FEATURE_PDCM) &&
rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
base_msr = MSR_IA32_PMC0;
else
base_msr = MSR_IA32_PERFCTR0;
guest_rd_wr_counters(base_msr, MAX_NR_GP_COUNTERS, nr_gp_counters, 0);
GUEST_DONE();
}
static void test_gp_counters(uint8_t pmu_version, uint64_t perf_capabilities,
uint8_t nr_gp_counters)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_gp_counters,
pmu_version, perf_capabilities);
vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_GP_COUNTERS,
nr_gp_counters);
run_vcpu(vcpu);
kvm_vm_free(vm);
}
static void guest_test_fixed_counters(void)
{
uint64_t supported_bitmask = 0;
uint8_t nr_fixed_counters = 0;
uint8_t i;
if (guest_get_pmu_version() >= 2)
nr_fixed_counters = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
if (guest_get_pmu_version() >= 5)
supported_bitmask = this_cpu_property(X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK);
guest_rd_wr_counters(MSR_CORE_PERF_FIXED_CTR0, MAX_NR_FIXED_COUNTERS,
nr_fixed_counters, supported_bitmask);
for (i = 0; i < MAX_NR_FIXED_COUNTERS; i++) {
uint8_t vector;
uint64_t val;
if (i >= nr_fixed_counters && !(supported_bitmask & BIT_ULL(i))) {
vector = wrmsr_safe(MSR_CORE_PERF_FIXED_CTR_CTRL,
FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
__GUEST_ASSERT(vector == GP_VECTOR,
"Expected #GP for counter %u in FIXED_CTR_CTRL", i);
vector = wrmsr_safe(MSR_CORE_PERF_GLOBAL_CTRL,
FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
__GUEST_ASSERT(vector == GP_VECTOR,
"Expected #GP for counter %u in PERF_GLOBAL_CTRL", i);
continue;
}
wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, 0);
wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
__asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
val = rdmsr(MSR_CORE_PERF_FIXED_CTR0 + i);
GUEST_ASSERT_NE(val, 0);
}
GUEST_DONE();
}
static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities,
uint8_t nr_fixed_counters,
uint32_t supported_bitmask)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_fixed_counters,
pmu_version, perf_capabilities);
vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK,
supported_bitmask);
vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_FIXED_COUNTERS,
nr_fixed_counters);
run_vcpu(vcpu);
kvm_vm_free(vm);
}
static void test_intel_counters(void)
{
uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
unsigned int i;
uint8_t v, j;
uint32_t k;
const uint64_t perf_caps[] = {
0,
PMU_CAP_FW_WRITES,
};
const uint32_t unavailable_masks[] = {
0x0,
0xffffffffu,
0xaaaaaaaau,
0x55555555u,
0xf0f0f0f0u,
0x0f0f0f0fu,
0xa0a0a0a0u,
0x0a0a0a0au,
0x50505050u,
0x05050505u,
};
uint8_t max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5);
TEST_ASSERT(this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH) <= NR_INTEL_ARCH_EVENTS,
"New architectural event(s) detected; please update this test (length = %u, mask = %x)",
this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH),
this_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK));
for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++) {
if (this_pmu_has(intel_event_to_feature(i).gp_event))
hardware_pmu_arch_events |= BIT(i);
}
for (v = 0; v <= max_pmu_version; v++) {
for (i = 0; i < ARRAY_SIZE(perf_caps); i++) {
if (!kvm_has_perf_caps && perf_caps[i])
continue;
pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n",
v, perf_caps[i]);
for (j = 0; j <= NR_INTEL_ARCH_EVENTS + 1; j++) {
for (k = 1; k < ARRAY_SIZE(unavailable_masks); k++)
test_arch_events(v, perf_caps[i], j, unavailable_masks[k]);
}
pr_info("Testing GP counters, PMU version %u, perf_caps = %lx\n",
v, perf_caps[i]);
for (j = 0; j <= nr_gp_counters; j++)
test_gp_counters(v, perf_caps[i], j);
pr_info("Testing fixed counters, PMU version %u, perf_caps = %lx\n",
v, perf_caps[i]);
for (j = 0; j <= nr_fixed_counters; j++) {
for (k = 0; k <= (BIT(nr_fixed_counters) - 1); k++)
test_fixed_counters(v, perf_caps[i], j, k);
}
}
}
}
int main(int argc, char *argv[])
{
TEST_REQUIRE(kvm_is_pmu_enabled());
TEST_REQUIRE(host_cpu_is_intel);
TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
kvm_pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
kvm_has_perf_caps = kvm_cpu_has(X86_FEATURE_PDCM);
test_intel_counters();
return 0;
}