#include <sys/cdefs.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/pcpu.h>
#include <sys/proc.h>
#include <sys/sysctl.h>
#include <sys/cpu.h>
#include <sys/x86_archext.h>
#include <sys/archsystm.h>
#include <sys/trap.h>
#include <machine/cpufunc.h>
#include <machine/psl.h>
#include <machine/md_var.h>
#include <machine/reg.h>
#include <machine/specialreg.h>
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
#include <sys/vmm_instruction_emul.h>
#include <sys/vmm_vm.h>
#include <sys/vmm_kernel.h>
#include "vmm_lapic.h"
#include "vmm_stat.h"
#include "vmm_ioport.h"
#include "vatpic.h"
#include "vlapic.h"
#include "vlapic_priv.h"
#include "vmcb.h"
#include "svm.h"
#include "svm_softc.h"
#include "svm_msr.h"
SYSCTL_DECL(_hw_vmm);
SYSCTL_NODE(_hw_vmm, OID_AUTO, svm, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
NULL);
#define AMD_TSC_MIN_FREQ 500000000
#define AMD_TSC_MAX_FREQ_RATIO 15
static uint32_t svm_feature = 0;
static int disable_npf_assist;
static VMM_STAT_AMD(VCPU_EXITINTINFO, "VM exits during event delivery");
static VMM_STAT_AMD(VCPU_INTINFO_INJECTED, "Events pending at VM entry");
static VMM_STAT_AMD(VMEXIT_VINTR, "VM exits due to interrupt window");
static int svm_setreg(void *arg, int vcpu, int ident, uint64_t val);
static int svm_getreg(void *arg, int vcpu, int ident, uint64_t *val);
static void flush_asid(struct svm_softc *sc, int vcpuid);
static __inline bool
has_flush_by_asid(void)
{
return ((svm_feature & CPUID_AMD_EDX_FLUSH_ASID) != 0);
}
static __inline bool
has_lbr_virt(void)
{
return ((svm_feature & CPUID_AMD_EDX_LBR_VIRT) != 0);
}
static __inline bool
has_decode_assist(void)
{
return ((svm_feature & CPUID_AMD_EDX_DECODE_ASSISTS) != 0);
}
static __inline bool
has_tsc_freq_ctl(void)
{
return ((svm_feature & CPUID_AMD_EDX_TSC_RATE_MSR) != 0);
}
static int
svm_init(void)
{
struct cpuid_regs regs = {
.cp_eax = 0x8000000a,
};
(void) cpuid_insn(NULL, ®s);
svm_feature = regs.cp_edx;
const uint32_t demand_bits =
(CPUID_AMD_EDX_NESTED_PAGING | CPUID_AMD_EDX_NRIPS);
VERIFY((svm_feature & demand_bits) == demand_bits);
return (0);
}
static void
svm_restore(void)
{
}
#define MSR_PENTIUM_START 0
#define MSR_PENTIUM_END 0x1FFF
#define MSR_AMD6TH_START 0xC0000000UL
#define MSR_AMD6TH_END 0xC0001FFFUL
#define MSR_AMD7TH_START 0xC0010000UL
#define MSR_AMD7TH_END 0xC0011FFFUL
static int
svm_msr_index(uint64_t msr, int *index, int *bit)
{
uint32_t base, off;
*index = -1;
*bit = (msr % 4) * 2;
base = 0;
if (msr <= MSR_PENTIUM_END) {
*index = msr / 4;
return (0);
}
base += (MSR_PENTIUM_END - MSR_PENTIUM_START + 1);
if (msr >= MSR_AMD6TH_START && msr <= MSR_AMD6TH_END) {
off = (msr - MSR_AMD6TH_START);
*index = (off + base) / 4;
return (0);
}
base += (MSR_AMD6TH_END - MSR_AMD6TH_START + 1);
if (msr >= MSR_AMD7TH_START && msr <= MSR_AMD7TH_END) {
off = (msr - MSR_AMD7TH_START);
*index = (off + base) / 4;
return (0);
}
return (EINVAL);
}
static void
svm_msr_perm(uint8_t *perm_bitmap, uint64_t msr, bool read, bool write)
{
int index, bit, error;
error = svm_msr_index(msr, &index, &bit);
KASSERT(error == 0, ("%s: invalid msr %lx", __func__, msr));
KASSERT(index >= 0 && index < SVM_MSR_BITMAP_SIZE,
("%s: invalid index %d for msr %lx", __func__, index, msr));
KASSERT(bit >= 0 && bit <= 6, ("%s: invalid bit position %d "
"msr %lx", __func__, bit, msr));
if (read)
perm_bitmap[index] &= ~(1UL << bit);
if (write)
perm_bitmap[index] &= ~(2UL << bit);
}
static void
svm_msr_rw_ok(uint8_t *perm_bitmap, uint64_t msr)
{
svm_msr_perm(perm_bitmap, msr, true, true);
}
static void
svm_msr_rd_ok(uint8_t *perm_bitmap, uint64_t msr)
{
svm_msr_perm(perm_bitmap, msr, true, false);
}
int
svm_get_intercept(struct svm_softc *sc, int vcpu, int idx, uint32_t bitmask)
{
struct vmcb_ctrl *ctrl;
KASSERT(idx >= 0 && idx < 5, ("invalid intercept index %d", idx));
ctrl = svm_get_vmcb_ctrl(sc, vcpu);
return (ctrl->intercept[idx] & bitmask ? 1 : 0);
}
void
svm_set_intercept(struct svm_softc *sc, int vcpu, int idx, uint32_t bitmask,
int enabled)
{
struct vmcb_ctrl *ctrl;
uint32_t oldval;
KASSERT(idx >= 0 && idx < 5, ("invalid intercept index %d", idx));
ctrl = svm_get_vmcb_ctrl(sc, vcpu);
oldval = ctrl->intercept[idx];
if (enabled)
ctrl->intercept[idx] |= bitmask;
else
ctrl->intercept[idx] &= ~bitmask;
if (ctrl->intercept[idx] != oldval) {
svm_set_dirty(sc, vcpu, VMCB_CACHE_I);
}
}
static void
vmcb_init(struct svm_softc *sc, int vcpu, uint64_t iopm_base_pa,
uint64_t msrpm_base_pa, uint64_t np_pml4)
{
struct vmcb_ctrl *ctrl;
struct vmcb_state *state;
uint32_t mask;
int n;
ctrl = svm_get_vmcb_ctrl(sc, vcpu);
state = svm_get_vmcb_state(sc, vcpu);
ctrl->iopm_base_pa = iopm_base_pa;
ctrl->msrpm_base_pa = msrpm_base_pa;
ctrl->np_ctrl = NP_ENABLE;
ctrl->n_cr3 = np_pml4;
for (n = 0; n < 16; n++) {
mask = (BIT(n) << 16) | BIT(n);
if (n == 0 || n == 2 || n == 3 || n == 4 || n == 8)
svm_disable_intercept(sc, vcpu, VMCB_CR_INTCPT, mask);
else
svm_enable_intercept(sc, vcpu, VMCB_CR_INTCPT, mask);
}
svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT,
VMCB_INTCPT_CR0_WRITE);
if (vcpu_trace_exceptions(sc->vm, vcpu)) {
for (n = 0; n < 32; n++) {
if (n == 2 || n == 9) {
continue;
}
svm_enable_intercept(sc, vcpu, VMCB_EXC_INTCPT, BIT(n));
}
} else {
svm_enable_intercept(sc, vcpu, VMCB_EXC_INTCPT, BIT(IDT_MC));
}
svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_IO);
svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_MSR);
svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_CPUID);
svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_INTR);
svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_INIT);
svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_NMI);
svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_SMI);
svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_RDPMC);
svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_SHUTDOWN);
svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT,
VMCB_INTCPT_FERR_FREEZE);
svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_HLT);
svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_MONITOR);
svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_MWAIT);
svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_INVD);
svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_INVLPGA);
svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_VMRUN);
svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_VMMCALL);
svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_VMLOAD);
svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_VMSAVE);
svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_STGI);
svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_CLGI);
svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_SKINIT);
if (vcpu_trap_wbinvd(sc->vm, vcpu) != 0) {
svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT,
VMCB_INTCPT_WBINVD);
}
ctrl->asid = 0;
ctrl->v_intr_ctrl |= V_INTR_MASKING;
if (has_lbr_virt()) {
ctrl->misc_ctrl |= LBR_VIRT_ENABLE;
}
state->efer = EFER_SVM;
state->g_pat = PAT_VALUE(0, PAT_WRITE_BACK) |
PAT_VALUE(1, PAT_WRITE_THROUGH) |
PAT_VALUE(2, PAT_UNCACHED) |
PAT_VALUE(3, PAT_UNCACHEABLE) |
PAT_VALUE(4, PAT_WRITE_BACK) |
PAT_VALUE(5, PAT_WRITE_THROUGH) |
PAT_VALUE(6, PAT_UNCACHED) |
PAT_VALUE(7, PAT_UNCACHEABLE);
state->dr6 = DBREG_DR6_RESERVED1;
state->dr7 = DBREG_DR7_RESERVED1;
}
static void *
svm_vminit(struct vm *vm)
{
struct svm_softc *svm_sc;
struct svm_vcpu *vcpu;
vm_paddr_t msrpm_pa, iopm_pa, pml4_pa;
int i;
uint16_t maxcpus;
svm_sc = kmem_zalloc(sizeof (*svm_sc), KM_SLEEP);
VERIFY3U(((uintptr_t)svm_sc & PAGE_MASK), ==, 0);
svm_sc->msr_bitmap = vmm_contig_alloc(SVM_MSR_BITMAP_SIZE);
if (svm_sc->msr_bitmap == NULL)
panic("contigmalloc of SVM MSR bitmap failed");
svm_sc->iopm_bitmap = vmm_contig_alloc(SVM_IO_BITMAP_SIZE);
if (svm_sc->iopm_bitmap == NULL)
panic("contigmalloc of SVM IO bitmap failed");
svm_sc->vm = vm;
svm_sc->nptp = vmspace_table_root(vm_get_vmspace(vm));
memset(svm_sc->msr_bitmap, 0xFF, SVM_MSR_BITMAP_SIZE);
svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_GSBASE);
svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_FSBASE);
svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_KGSBASE);
svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_STAR);
svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_LSTAR);
svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_CSTAR);
svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_SF_MASK);
svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_SYSENTER_CS_MSR);
svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_SYSENTER_ESP_MSR);
svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_SYSENTER_EIP_MSR);
svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_PAT);
svm_msr_rd_ok(svm_sc->msr_bitmap, MSR_TSC);
svm_msr_rd_ok(svm_sc->msr_bitmap, MSR_EFER);
memset(svm_sc->iopm_bitmap, 0xFF, SVM_IO_BITMAP_SIZE);
iopm_pa = vtophys(svm_sc->iopm_bitmap);
msrpm_pa = vtophys(svm_sc->msr_bitmap);
pml4_pa = svm_sc->nptp;
maxcpus = vm_get_maxcpus(svm_sc->vm);
for (i = 0; i < maxcpus; i++) {
vcpu = svm_get_vcpu(svm_sc, i);
vcpu->nextrip = ~0;
vcpu->lastcpu = NOCPU;
vcpu->vmcb_pa = vtophys(&vcpu->vmcb);
vmcb_init(svm_sc, i, iopm_pa, msrpm_pa, pml4_pa);
svm_msr_guest_init(svm_sc, i);
}
svm_pmu_init(svm_sc);
return (svm_sc);
}
static void
vm_exit_svm(struct vm_exit *vme, uint64_t code, uint64_t info1, uint64_t info2)
{
vme->exitcode = VM_EXITCODE_SVM;
vme->u.svm.exitcode = code;
vme->u.svm.exitinfo1 = info1;
vme->u.svm.exitinfo2 = info2;
}
static enum vm_cpu_mode
svm_vcpu_mode(struct vmcb *vmcb)
{
struct vmcb_state *state;
state = &vmcb->state;
if (state->efer & EFER_LMA) {
struct vmcb_segment *seg;
seg = vmcb_segptr(vmcb, VM_REG_GUEST_CS);
if (seg->attrib & VMCB_CS_ATTRIB_L)
return (CPU_MODE_64BIT);
else
return (CPU_MODE_COMPATIBILITY);
} else if (state->cr0 & CR0_PE) {
return (CPU_MODE_PROTECTED);
} else {
return (CPU_MODE_REAL);
}
}
static enum vm_paging_mode
svm_paging_mode(uint64_t cr0, uint64_t cr4, uint64_t efer)
{
if ((cr0 & CR0_PG) == 0)
return (PAGING_MODE_FLAT);
if ((cr4 & CR4_PAE) == 0)
return (PAGING_MODE_32);
if (efer & EFER_LME)
return (PAGING_MODE_64);
else
return (PAGING_MODE_PAE);
}
static void
svm_paging_info(struct vmcb *vmcb, struct vm_guest_paging *paging)
{
struct vmcb_state *state;
state = &vmcb->state;
paging->cr3 = state->cr3;
paging->cpl = state->cpl;
paging->cpu_mode = svm_vcpu_mode(vmcb);
paging->paging_mode = svm_paging_mode(state->cr0, state->cr4,
state->efer);
}
#define UNHANDLED 0
static int
svm_handle_inout(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
{
struct vmcb_ctrl *ctrl;
struct vmcb_state *state;
struct vm_inout *inout;
struct vie *vie;
uint64_t info1;
struct vm_guest_paging paging;
state = svm_get_vmcb_state(svm_sc, vcpu);
ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu);
inout = &vmexit->u.inout;
info1 = ctrl->exitinfo1;
inout->bytes = (info1 >> 4) & 0x7;
inout->flags = 0;
inout->flags |= (info1 & BIT(0)) ? INOUT_IN : 0;
inout->flags |= (info1 & BIT(3)) ? INOUT_REP : 0;
inout->flags |= (info1 & BIT(2)) ? INOUT_STR : 0;
inout->port = (uint16_t)(info1 >> 16);
inout->eax = (uint32_t)(state->rax);
svm_paging_info(svm_get_vmcb(svm_sc, vcpu), &paging);
vie = vm_vie_ctx(svm_sc->vm, vcpu);
if ((inout->flags & INOUT_STR) != 0) {
if (!has_decode_assist()) {
vmexit->exitcode = VM_EXITCODE_INST_EMUL;
bzero(&vmexit->u.inst_emul,
sizeof (vmexit->u.inst_emul));
vie_init_other(vie, &paging);
return (UNHANDLED);
}
inout->addrsize = 2 * ((info1 >> 7) & 0x7);
VERIFY(inout->addrsize == 2 || inout->addrsize == 4 ||
inout->addrsize == 8);
if (inout->flags & INOUT_IN) {
inout->segment = 0;
} else {
inout->segment = (info1 >> 10) & 0x7;
}
}
vmexit->exitcode = VM_EXITCODE_INOUT;
vie_init_inout(vie, inout, vmexit->inst_length, &paging);
vmexit->inst_length = 0;
return (UNHANDLED);
}
static int
npf_fault_type(uint64_t exitinfo1)
{
if (exitinfo1 & VMCB_NPF_INFO1_W)
return (PROT_WRITE);
else if (exitinfo1 & VMCB_NPF_INFO1_ID)
return (PROT_EXEC);
else
return (PROT_READ);
}
static bool
svm_npf_emul_fault(uint64_t exitinfo1)
{
if (exitinfo1 & VMCB_NPF_INFO1_ID) {
return (false);
}
if (exitinfo1 & VMCB_NPF_INFO1_GPT) {
return (false);
}
if ((exitinfo1 & VMCB_NPF_INFO1_GPA) == 0) {
return (false);
}
return (true);
}
static void
svm_handle_mmio_emul(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit,
uint64_t gpa)
{
struct vmcb_ctrl *ctrl;
struct vmcb *vmcb;
struct vie *vie;
struct vm_guest_paging paging;
struct vmcb_segment *seg;
char *inst_bytes = NULL;
uint8_t inst_len = 0;
vmcb = svm_get_vmcb(svm_sc, vcpu);
ctrl = &vmcb->ctrl;
vmexit->exitcode = VM_EXITCODE_MMIO_EMUL;
vmexit->u.mmio_emul.gpa = gpa;
vmexit->u.mmio_emul.gla = VIE_INVALID_GLA;
svm_paging_info(vmcb, &paging);
switch (paging.cpu_mode) {
case CPU_MODE_REAL:
seg = vmcb_segptr(vmcb, VM_REG_GUEST_CS);
vmexit->u.mmio_emul.cs_base = seg->base;
vmexit->u.mmio_emul.cs_d = 0;
break;
case CPU_MODE_PROTECTED:
case CPU_MODE_COMPATIBILITY:
seg = vmcb_segptr(vmcb, VM_REG_GUEST_CS);
vmexit->u.mmio_emul.cs_base = seg->base;
vmexit->u.mmio_emul.cs_d = (seg->attrib & VMCB_CS_ATTRIB_D) ?
1 : 0;
break;
default:
vmexit->u.mmio_emul.cs_base = 0;
vmexit->u.mmio_emul.cs_d = 0;
break;
}
if (has_decode_assist() && !disable_npf_assist) {
inst_len = ctrl->inst_len;
inst_bytes = (char *)ctrl->inst_bytes;
}
vie = vm_vie_ctx(svm_sc->vm, vcpu);
vie_init_mmio(vie, inst_bytes, inst_len, &paging, gpa);
}
#define SVM_CR0_MASK ~(CR0_CD | CR0_NW | 0xffffffff00000000)
static void
svm_set_cr0(struct svm_softc *svm_sc, int vcpu, uint64_t val, bool guest_write)
{
struct vmcb_state *state;
struct svm_regctx *regctx;
uint64_t masked, old, diff;
state = svm_get_vmcb_state(svm_sc, vcpu);
regctx = svm_get_guest_regctx(svm_sc, vcpu);
old = state->cr0 | (regctx->sctx_cr0_shadow & ~SVM_CR0_MASK);
diff = old ^ val;
if (diff == 0) {
return;
}
if ((diff & CR0_PG) != 0 || (diff & CR0_WP) != 0) {
flush_asid(svm_sc, vcpu);
}
if (guest_write) {
if ((diff & CR0_PG) != 0) {
uint64_t efer = state->efer;
if ((val & CR0_PG) != 0 && (efer & EFER_LME) != 0) {
state->efer |= EFER_LMA;
}
if ((val & CR0_PG) == 0 && (efer & EFER_LME) != 0) {
state->efer &= ~EFER_LMA;
}
}
}
masked = val & SVM_CR0_MASK;
regctx->sctx_cr0_shadow = val;
state->cr0 = masked;
svm_set_dirty(svm_sc, vcpu, VMCB_CACHE_CR);
if ((masked ^ val) != 0) {
svm_enable_intercept(svm_sc, vcpu, VMCB_CR_INTCPT,
BIT(0) | BIT(16));
} else {
svm_disable_intercept(svm_sc, vcpu, VMCB_CR_INTCPT,
BIT(0) | BIT(16));
}
svm_set_dirty(svm_sc, vcpu, VMCB_CACHE_I);
}
static void
svm_get_cr0(struct svm_softc *svm_sc, int vcpu, uint64_t *val)
{
struct vmcb *vmcb;
struct svm_regctx *regctx;
vmcb = svm_get_vmcb(svm_sc, vcpu);
regctx = svm_get_guest_regctx(svm_sc, vcpu);
*val = vmcb->state.cr0 | (regctx->sctx_cr0_shadow & ~SVM_CR0_MASK);
}
static void
svm_handle_cr0_read(struct svm_softc *svm_sc, int vcpu, enum vm_reg_name reg)
{
uint64_t val;
int err __maybe_unused;
svm_get_cr0(svm_sc, vcpu, &val);
err = svm_setreg(svm_sc, vcpu, reg, val);
ASSERT(err == 0);
}
static void
svm_handle_cr0_write(struct svm_softc *svm_sc, int vcpu, enum vm_reg_name reg)
{
struct vmcb_state *state;
uint64_t val;
int err __maybe_unused;
state = svm_get_vmcb_state(svm_sc, vcpu);
err = svm_getreg(svm_sc, vcpu, reg, &val);
ASSERT(err == 0);
if ((val & CR0_NW) != 0 && (val & CR0_CD) == 0) {
vm_inject_gp(svm_sc->vm, vcpu);
return;
}
if ((val & CR0_PG) != 0 && (val & CR0_PE) == 0) {
vm_inject_gp(svm_sc->vm, vcpu);
return;
}
if ((state->cr0 & CR0_PG) == 0 && (val & CR0_PG) != 0) {
if ((state->efer & EFER_LME) != 0 &&
(state->cr4 & CR4_PAE) == 0) {
vm_inject_gp(svm_sc->vm, vcpu);
return;
}
}
svm_set_cr0(svm_sc, vcpu, val, true);
}
static void
svm_inst_emul_other(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
{
struct vie *vie;
struct vm_guest_paging paging;
vmexit->exitcode = VM_EXITCODE_INST_EMUL;
bzero(&vmexit->u.inst_emul, sizeof (vmexit->u.inst_emul));
vie = vm_vie_ctx(svm_sc->vm, vcpu);
svm_paging_info(svm_get_vmcb(svm_sc, vcpu), &paging);
vie_init_other(vie, &paging);
vmexit->inst_length = 0;
}
static void
svm_update_virqinfo(struct svm_softc *sc, int vcpu)
{
struct vm *vm;
struct vlapic *vlapic;
struct vmcb_ctrl *ctrl;
vm = sc->vm;
vlapic = vm_lapic(vm, vcpu);
ctrl = svm_get_vmcb_ctrl(sc, vcpu);
vlapic_set_cr8(vlapic, ctrl->v_tpr);
KASSERT(ctrl->v_intr_vector == 0, ("%s: invalid "
"v_intr_vector %d", __func__, ctrl->v_intr_vector));
}
CTASSERT(VMCB_EVENTINJ_TYPE_INTR == VM_INTINFO_HWINTR);
CTASSERT(VMCB_EVENTINJ_TYPE_NMI == VM_INTINFO_NMI);
CTASSERT(VMCB_EVENTINJ_TYPE_EXCEPTION == VM_INTINFO_HWEXCP);
CTASSERT(VMCB_EVENTINJ_TYPE_INTn == VM_INTINFO_SWINTR);
CTASSERT(VMCB_EVENTINJ_EC_VALID == VM_INTINFO_DEL_ERRCODE);
CTASSERT(VMCB_EVENTINJ_VALID == VM_INTINFO_VALID);
static void
svm_stash_intinfo(struct svm_softc *svm_sc, int vcpu, uint64_t intinfo)
{
ASSERT(VMCB_EXITINTINFO_VALID(intinfo));
if (VM_INTINFO_TYPE(intinfo) == VM_INTINFO_NMI) {
intinfo &= ~VM_INTINFO_MASK_VECTOR;
intinfo |= IDT_NMI;
}
VERIFY0(vm_exit_intinfo(svm_sc->vm, vcpu, intinfo));
}
static void
svm_save_exitintinfo(struct svm_softc *svm_sc, int vcpu)
{
struct vmcb_ctrl *ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu);
uint64_t intinfo = ctrl->exitintinfo;
if (VMCB_EXITINTINFO_VALID(intinfo)) {
vmm_stat_incr(svm_sc->vm, vcpu, VCPU_EXITINTINFO, 1);
svm_stash_intinfo(svm_sc, vcpu, intinfo);
}
}
static __inline int
vintr_intercept_enabled(struct svm_softc *sc, int vcpu)
{
return (svm_get_intercept(sc, vcpu, VMCB_CTRL1_INTCPT,
VMCB_INTCPT_VINTR));
}
static void
svm_enable_intr_window_exiting(struct svm_softc *sc, int vcpu)
{
struct vmcb_ctrl *ctrl;
struct vmcb_state *state;
ctrl = svm_get_vmcb_ctrl(sc, vcpu);
state = svm_get_vmcb_state(sc, vcpu);
if ((ctrl->v_irq & V_IRQ) != 0 && ctrl->v_intr_vector == 0) {
KASSERT(ctrl->v_intr_prio & V_IGN_TPR,
("%s: invalid v_ign_tpr", __func__));
KASSERT(vintr_intercept_enabled(sc, vcpu),
("%s: vintr intercept should be enabled", __func__));
return;
}
VERIFY((ctrl->eventinj & VMCB_EVENTINJ_VALID) != 0 ||
(state->rflags & PSL_I) == 0 || ctrl->intr_shadow);
ctrl->v_irq |= V_IRQ;
ctrl->v_intr_prio |= V_IGN_TPR;
ctrl->v_intr_vector = 0;
svm_set_dirty(sc, vcpu, VMCB_CACHE_TPR);
svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_VINTR);
}
static void
svm_disable_intr_window_exiting(struct svm_softc *sc, int vcpu)
{
struct vmcb_ctrl *ctrl;
ctrl = svm_get_vmcb_ctrl(sc, vcpu);
if ((ctrl->v_irq & V_IRQ) == 0 && ctrl->v_intr_vector == 0) {
KASSERT(!vintr_intercept_enabled(sc, vcpu),
("%s: vintr intercept should be disabled", __func__));
return;
}
ctrl->v_irq &= ~V_IRQ;
ctrl->v_intr_vector = 0;
svm_set_dirty(sc, vcpu, VMCB_CACHE_TPR);
svm_disable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_VINTR);
}
static int
svm_nmi_blocked(struct svm_softc *sc, int vcpu)
{
return (svm_get_intercept(sc, vcpu, VMCB_CTRL1_INTCPT,
VMCB_INTCPT_IRET));
}
static void
svm_clear_nmi_blocking(struct svm_softc *sc, int vcpu)
{
struct vmcb_ctrl *ctrl;
KASSERT(svm_nmi_blocked(sc, vcpu), ("vNMI already unblocked"));
svm_disable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_IRET);
ctrl = svm_get_vmcb_ctrl(sc, vcpu);
ctrl->intr_shadow = 1;
}
static void
svm_inject_event(struct vmcb_ctrl *ctrl, uint64_t info)
{
ASSERT(VM_INTINFO_PENDING(info));
uint8_t vector = VM_INTINFO_VECTOR(info);
uint32_t type = VM_INTINFO_TYPE(info);
switch (type) {
case VM_INTINFO_NMI:
vector = IDT_NMI;
break;
case VM_INTINFO_HWINTR:
case VM_INTINFO_SWINTR:
break;
case VM_INTINFO_HWEXCP:
if (vector == IDT_NMI) {
type = VM_INTINFO_NMI;
}
VERIFY(vector < 32);
break;
default:
type = VM_INTINFO_SWINTR;
break;
}
ctrl->eventinj = VMCB_EVENTINJ_VALID | type | vector;
if (VM_INTINFO_HAS_ERRCODE(info)) {
ctrl->eventinj |= VMCB_EVENTINJ_EC_VALID;
ctrl->eventinj |= (uint64_t)VM_INTINFO_ERRCODE(info) << 32;
}
}
static void
svm_inject_nmi(struct svm_softc *sc, int vcpu)
{
struct vmcb_ctrl *ctrl = svm_get_vmcb_ctrl(sc, vcpu);
ASSERT(!svm_nmi_blocked(sc, vcpu));
ctrl->eventinj = VMCB_EVENTINJ_VALID | VMCB_EVENTINJ_TYPE_NMI;
vm_nmi_clear(sc->vm, vcpu);
svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_IRET);
}
static void
svm_inject_irq(struct svm_softc *sc, int vcpu, int vector)
{
struct vmcb_ctrl *ctrl = svm_get_vmcb_ctrl(sc, vcpu);
ASSERT(vector >= 0 && vector <= 255);
ctrl->eventinj = VMCB_EVENTINJ_VALID | vector;
}
#define EFER_MBZ_BITS 0xFFFFFFFFFFFF0200UL
static vm_msr_result_t
svm_write_efer(struct svm_softc *sc, int vcpu, uint64_t newval)
{
struct vmcb_state *state = svm_get_vmcb_state(sc, vcpu);
uint64_t lma;
int error;
newval &= ~0xFE;
if (newval & EFER_MBZ_BITS) {
return (VMR_GP);
}
const uint64_t changed = state->efer ^ newval;
if (changed & EFER_LME) {
if (state->cr0 & CR0_PG) {
return (VMR_GP);
}
}
if ((newval & EFER_LME) != 0 && (state->cr0 & CR0_PG) != 0) {
lma = EFER_LMA;
} else {
lma = 0;
}
if ((newval & EFER_LMA) != lma) {
return (VMR_GP);
}
if ((newval & EFER_NXE) != 0 &&
!vm_cpuid_capability(sc->vm, vcpu, VCC_NO_EXECUTE)) {
return (VMR_GP);
}
if ((newval & EFER_FFXSR) != 0 &&
!vm_cpuid_capability(sc->vm, vcpu, VCC_FFXSR)) {
return (VMR_GP);
}
if ((newval & EFER_TCE) != 0 &&
!vm_cpuid_capability(sc->vm, vcpu, VCC_TCE)) {
return (VMR_GP);
}
if (newval & EFER_LMSLE) {
return (VMR_GP);
}
error = svm_setreg(sc, vcpu, VM_REG_GUEST_EFER, newval);
VERIFY0(error);
return (VMR_OK);
}
static int
svm_handle_msr(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit,
bool is_wrmsr)
{
struct vmcb_state *state = svm_get_vmcb_state(svm_sc, vcpu);
struct svm_regctx *ctx = svm_get_guest_regctx(svm_sc, vcpu);
const uint32_t ecx = ctx->sctx_rcx;
vm_msr_result_t res;
uint64_t val = 0;
if (is_wrmsr) {
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_WRMSR, 1);
val = ctx->sctx_rdx << 32 | (uint32_t)state->rax;
if (vlapic_owned_msr(ecx)) {
struct vlapic *vlapic = vm_lapic(svm_sc->vm, vcpu);
res = vlapic_wrmsr(vlapic, ecx, val);
} else if (ecx == MSR_EFER) {
res = svm_write_efer(svm_sc, vcpu, val);
} else if (svm_pmu_owned_msr(ecx)) {
res = svm_pmu_wrmsr(svm_sc, vcpu, ecx, val);
} else {
res = svm_wrmsr(svm_sc, vcpu, ecx, val);
}
} else {
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_RDMSR, 1);
if (vlapic_owned_msr(ecx)) {
struct vlapic *vlapic = vm_lapic(svm_sc->vm, vcpu);
res = vlapic_rdmsr(vlapic, ecx, &val);
} else if (svm_pmu_owned_msr(ecx)) {
res = svm_pmu_rdmsr(svm_sc, vcpu, ecx, &val);
} else {
res = svm_rdmsr(svm_sc, vcpu, ecx, &val);
}
}
switch (res) {
case VMR_OK:
if (!is_wrmsr) {
state->rax = (uint32_t)val;
ctx->sctx_rdx = val >> 32;
}
return (1);
case VMR_GP:
vm_inject_gp(svm_sc->vm, vcpu);
return (1);
case VMR_UNHANLDED:
vmexit->exitcode = is_wrmsr ?
VM_EXITCODE_WRMSR : VM_EXITCODE_RDMSR;
vmexit->u.msr.code = ecx;
vmexit->u.msr.wval = val;
return (0);
default:
panic("unexpected msr result %u\n", res);
}
}
static void
svm_handle_rdpmc(struct svm_softc *svm_sc, int vcpu)
{
struct vmcb_state *state = svm_get_vmcb_state(svm_sc, vcpu);
struct svm_regctx *ctx = svm_get_guest_regctx(svm_sc, vcpu);
const uint32_t ecx = ctx->sctx_rcx;
uint64_t val = 0;
if (svm_pmu_rdpmc(svm_sc, vcpu, ecx, &val)) {
state->rax = (uint32_t)val;
ctx->sctx_rdx = val >> 32;
} else {
vm_inject_gp(svm_sc->vm, vcpu);
}
}
static int
nrip_valid(uint64_t exitcode)
{
switch (exitcode) {
case 0x00 ... 0x0F:
case 0x10 ... 0x1F:
case 0x20 ... 0x2F:
case 0x30 ... 0x3F:
case 0x43:
case 0x44:
case 0x45:
case 0x65 ... 0x7C:
case 0x80 ... 0x8D:
return (1);
default:
return (0);
}
}
static int
svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
{
struct vmcb *vmcb;
struct vmcb_state *state;
struct vmcb_ctrl *ctrl;
struct svm_regctx *ctx;
uint64_t code, info1, info2;
int handled;
ctx = svm_get_guest_regctx(svm_sc, vcpu);
vmcb = svm_get_vmcb(svm_sc, vcpu);
state = &vmcb->state;
ctrl = &vmcb->ctrl;
handled = 0;
code = ctrl->exitcode;
info1 = ctrl->exitinfo1;
info2 = ctrl->exitinfo2;
vmexit->exitcode = VM_EXITCODE_BOGUS;
vmexit->rip = state->rip;
vmexit->inst_length = nrip_valid(code) ? ctrl->nrip - state->rip : 0;
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_COUNT, 1);
if (code == VMCB_EXIT_INVALID) {
vm_exit_svm(vmexit, code, info1, info2);
return (0);
}
KASSERT((ctrl->eventinj & VMCB_EVENTINJ_VALID) == 0, ("%s: event "
"injection valid bit is set %lx", __func__, ctrl->eventinj));
KASSERT(vmexit->inst_length >= 0 && vmexit->inst_length <= 15,
("invalid inst_length %d: code (%lx), info1 (%lx), info2 (%lx)",
vmexit->inst_length, code, info1, info2));
svm_update_virqinfo(svm_sc, vcpu);
svm_save_exitintinfo(svm_sc, vcpu);
switch (code) {
case VMCB_EXIT_CR0_READ:
if (VMCB_CRx_INFO1_VALID(info1) != 0) {
svm_handle_cr0_read(svm_sc, vcpu,
vie_regnum_map(VMCB_CRx_INFO1_GPR(info1)));
handled = 1;
} else {
svm_inst_emul_other(svm_sc, vcpu, vmexit);
}
break;
case VMCB_EXIT_CR0_WRITE:
case VMCB_EXIT_CR0_SEL_WRITE:
if (VMCB_CRx_INFO1_VALID(info1) != 0) {
svm_handle_cr0_write(svm_sc, vcpu,
vie_regnum_map(VMCB_CRx_INFO1_GPR(info1)));
handled = 1;
} else {
svm_inst_emul_other(svm_sc, vcpu, vmexit);
}
break;
case VMCB_EXIT_IRET:
vmexit->inst_length = 0;
svm_clear_nmi_blocking(svm_sc, vcpu);
handled = 1;
break;
case VMCB_EXIT_VINTR:
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_VINTR, 1);
svm_disable_intr_window_exiting(svm_sc, vcpu);
handled = 1;
break;
case VMCB_EXIT_INTR:
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_EXTINT, 1);
handled = 1;
break;
case VMCB_EXIT_NMI:
case VMCB_EXIT_SMI:
case VMCB_EXIT_INIT:
handled = 1;
break;
case VMCB_EXIT_EXCP0 ... VMCB_EXIT_EXCP31: {
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_EXCEPTION, 1);
const uint8_t idtvec = code - VMCB_EXIT_EXCP0;
uint32_t errcode = 0;
bool reflect = true;
bool errcode_valid = false;
switch (idtvec) {
case IDT_MC:
reflect = false;
vmm_call_trap(T_MCE);
break;
case IDT_PF:
VERIFY0(svm_setreg(svm_sc, vcpu, VM_REG_GUEST_CR2,
info2));
case IDT_NP:
case IDT_SS:
case IDT_GP:
case IDT_AC:
case IDT_TS:
errcode_valid = true;
errcode = info1;
break;
case IDT_DF:
errcode_valid = true;
break;
case IDT_BP:
case IDT_OF:
case IDT_BR:
vmexit->inst_length = 0;
default:
errcode_valid = false;
break;
}
VERIFY0(vmexit->inst_length);
if (reflect) {
VERIFY0(vm_inject_exception(svm_sc->vm, vcpu, idtvec,
errcode_valid, errcode, false));
}
handled = 1;
break;
}
case VMCB_EXIT_MSR:
handled = svm_handle_msr(svm_sc, vcpu, vmexit, info1 != 0);
break;
case VMCB_EXIT_RDPMC:
svm_handle_rdpmc(svm_sc, vcpu);
handled = 1;
break;
case VMCB_EXIT_IO:
handled = svm_handle_inout(svm_sc, vcpu, vmexit);
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_INOUT, 1);
break;
case VMCB_EXIT_SHUTDOWN:
(void) vm_suspend(svm_sc->vm, VM_SUSPEND_TRIPLEFAULT, vcpu);
handled = 1;
break;
case VMCB_EXIT_INVLPGA:
vm_inject_ud(svm_sc->vm, vcpu);
handled = 1;
break;
case VMCB_EXIT_VMRUN:
case VMCB_EXIT_VMLOAD:
case VMCB_EXIT_VMSAVE:
case VMCB_EXIT_STGI:
case VMCB_EXIT_CLGI:
case VMCB_EXIT_SKINIT:
vm_inject_ud(svm_sc->vm, vcpu);
handled = 1;
break;
case VMCB_EXIT_INVD:
case VMCB_EXIT_WBINVD:
handled = 1;
break;
case VMCB_EXIT_VMMCALL:
vm_inject_ud(svm_sc->vm, vcpu);
handled = 1;
break;
case VMCB_EXIT_CPUID:
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_CPUID, 1);
vcpu_emulate_cpuid(svm_sc->vm, vcpu, &state->rax,
&ctx->sctx_rbx, &ctx->sctx_rcx, &ctx->sctx_rdx);
handled = 1;
break;
case VMCB_EXIT_HLT:
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_HLT, 1);
vmexit->exitcode = VM_EXITCODE_HLT;
vmexit->u.hlt.rflags = state->rflags;
break;
case VMCB_EXIT_PAUSE:
vmexit->exitcode = VM_EXITCODE_PAUSE;
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_PAUSE, 1);
break;
case VMCB_EXIT_NPF:
if (info1 & VMCB_NPF_INFO1_RSV) {
} else if (vm_mem_allocated(svm_sc->vm, vcpu, info2)) {
vmexit->exitcode = VM_EXITCODE_PAGING;
vmexit->u.paging.gpa = info2;
vmexit->u.paging.fault_type = npf_fault_type(info1);
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_NESTED_FAULT, 1);
} else if (svm_npf_emul_fault(info1)) {
svm_handle_mmio_emul(svm_sc, vcpu, vmexit, info2);
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_MMIO_EMUL, 1);
}
break;
case VMCB_EXIT_MONITOR:
vmexit->exitcode = VM_EXITCODE_MONITOR;
break;
case VMCB_EXIT_MWAIT:
vmexit->exitcode = VM_EXITCODE_MWAIT;
break;
default:
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_UNKNOWN, 1);
break;
}
DTRACE_PROBE3(vmm__vexit, int, vcpu, uint64_t, vmexit->rip, uint32_t,
code);
if (handled) {
vmexit->rip += vmexit->inst_length;
vmexit->inst_length = 0;
state->rip = vmexit->rip;
} else {
if (vmexit->exitcode == VM_EXITCODE_BOGUS) {
vm_exit_svm(vmexit, code, info1, info2);
} else {
}
}
return (handled);
}
static enum event_inject_state
svm_inject_events(struct svm_softc *sc, int vcpu)
{
struct vmcb_ctrl *ctrl;
struct vmcb_state *state;
struct svm_vcpu *vcpustate;
uint64_t intinfo;
enum event_inject_state ev_state;
state = svm_get_vmcb_state(sc, vcpu);
ctrl = svm_get_vmcb_ctrl(sc, vcpu);
vcpustate = svm_get_vcpu(sc, vcpu);
ev_state = EIS_CAN_INJECT;
if (vcpustate->nextrip != state->rip) {
ctrl->intr_shadow = 0;
}
if (ctrl->eventinj & VMCB_EVENTINJ_VALID) {
return (EIS_EV_EXISTING | EIS_REQ_EXIT);
}
if (vm_entry_intinfo(sc->vm, vcpu, &intinfo)) {
svm_inject_event(ctrl, intinfo);
vmm_stat_incr(sc->vm, vcpu, VCPU_INTINFO_INJECTED, 1);
ev_state = EIS_EV_INJECTED;
}
if (vm_nmi_pending(sc->vm, vcpu) && !svm_nmi_blocked(sc, vcpu)) {
if (ev_state == EIS_CAN_INJECT) {
if (ctrl->intr_shadow) {
return (EIS_GI_BLOCK);
}
svm_inject_nmi(sc, vcpu);
ev_state = EIS_EV_INJECTED;
} else {
return (ev_state | EIS_REQ_EXIT);
}
}
if (vm_extint_pending(sc->vm, vcpu)) {
int vector;
if (ev_state != EIS_CAN_INJECT) {
return (ev_state | EIS_REQ_EXIT);
}
if ((state->rflags & PSL_I) == 0 || ctrl->intr_shadow) {
return (EIS_GI_BLOCK);
}
vatpic_pending_intr(sc->vm, &vector);
KASSERT(vector >= 0 && vector <= 255,
("invalid vector %d from INTR", vector));
svm_inject_irq(sc, vcpu, vector);
vm_extint_clear(sc->vm, vcpu);
vatpic_intr_accepted(sc->vm, vector);
ev_state = EIS_EV_INJECTED;
}
return (ev_state);
}
static enum event_inject_state
svm_inject_vlapic(struct svm_softc *sc, int vcpu, struct vlapic *vlapic,
enum event_inject_state ev_state)
{
struct vmcb_ctrl *ctrl;
struct vmcb_state *state;
int vector;
uint8_t v_tpr;
state = svm_get_vmcb_state(sc, vcpu);
ctrl = svm_get_vmcb_ctrl(sc, vcpu);
v_tpr = vlapic_get_cr8(vlapic);
KASSERT(v_tpr <= 15, ("invalid v_tpr %x", v_tpr));
if (ctrl->v_tpr != v_tpr) {
ctrl->v_tpr = v_tpr;
svm_set_dirty(sc, vcpu, VMCB_CACHE_TPR);
}
if (ev_state != EIS_CAN_INJECT) {
return (ev_state);
}
if (!vlapic_pending_intr(vlapic, &vector)) {
return (EIS_CAN_INJECT);
}
KASSERT(vector >= 16 && vector <= 255,
("invalid vector %d from local APIC", vector));
if ((state->rflags & PSL_I) == 0 || ctrl->intr_shadow) {
return (EIS_GI_BLOCK);
}
svm_inject_irq(sc, vcpu, vector);
vlapic_intr_accepted(vlapic, vector);
return (EIS_EV_INJECTED);
}
static bool
svm_inject_recheck(struct svm_softc *sc, int vcpu,
enum event_inject_state ev_state)
{
struct vmcb_ctrl *ctrl;
ctrl = svm_get_vmcb_ctrl(sc, vcpu);
if (ev_state == EIS_CAN_INJECT) {
if (ctrl->intr_shadow != 0) {
return (false);
}
if (vm_nmi_pending(sc->vm, vcpu) &&
!svm_nmi_blocked(sc, vcpu)) {
return (true);
}
if (vm_extint_pending(sc->vm, vcpu)) {
return (true);
}
} else {
if ((ev_state & EIS_REQ_EXIT) != 0) {
poke_cpu(CPU->cpu_id);
} else {
svm_enable_intr_window_exiting(sc, vcpu);
}
}
return (false);
}
static void
check_asid(struct svm_softc *sc, int vcpuid, uint_t thiscpu, uint64_t nptgen)
{
struct svm_vcpu *vcpustate = svm_get_vcpu(sc, vcpuid);
struct vmcb_ctrl *ctrl = svm_get_vmcb_ctrl(sc, vcpuid);
uint8_t flush;
flush = hma_svm_asid_update(&vcpustate->hma_asid, has_flush_by_asid(),
vcpustate->nptgen != nptgen);
if (flush != VMCB_TLB_FLUSH_NOTHING) {
ctrl->asid = vcpustate->hma_asid.hsa_asid;
svm_set_dirty(sc, vcpuid, VMCB_CACHE_ASID);
}
ctrl->tlb_ctrl = flush;
vcpustate->nptgen = nptgen;
}
static void
flush_asid(struct svm_softc *sc, int vcpuid)
{
struct svm_vcpu *vcpustate = svm_get_vcpu(sc, vcpuid);
struct vmcb_ctrl *ctrl = svm_get_vmcb_ctrl(sc, vcpuid);
uint8_t flush;
const ulong_t iflag = intr_clear();
flush = hma_svm_asid_update(&vcpustate->hma_asid, has_flush_by_asid(),
true);
intr_restore(iflag);
ASSERT(flush != VMCB_TLB_FLUSH_NOTHING);
ctrl->asid = vcpustate->hma_asid.hsa_asid;
ctrl->tlb_ctrl = flush;
svm_set_dirty(sc, vcpuid, VMCB_CACHE_ASID);
}
static __inline void
svm_dr_enter_guest(struct svm_regctx *gctx)
{
gctx->host_dr7 = rdr7();
gctx->host_debugctl = rdmsr(MSR_DEBUGCTLMSR);
load_dr7(0);
wrmsr(MSR_DEBUGCTLMSR, 0);
gctx->host_dr0 = rdr0();
gctx->host_dr1 = rdr1();
gctx->host_dr2 = rdr2();
gctx->host_dr3 = rdr3();
gctx->host_dr6 = rdr6();
load_dr0(gctx->sctx_dr0);
load_dr1(gctx->sctx_dr1);
load_dr2(gctx->sctx_dr2);
load_dr3(gctx->sctx_dr3);
}
static __inline void
svm_dr_leave_guest(struct svm_regctx *gctx)
{
gctx->sctx_dr0 = rdr0();
gctx->sctx_dr1 = rdr1();
gctx->sctx_dr2 = rdr2();
gctx->sctx_dr3 = rdr3();
load_dr0(gctx->host_dr0);
load_dr1(gctx->host_dr1);
load_dr2(gctx->host_dr2);
load_dr3(gctx->host_dr3);
load_dr6(gctx->host_dr6);
wrmsr(MSR_DEBUGCTLMSR, gctx->host_debugctl);
load_dr7(gctx->host_dr7);
}
static void
svm_apply_tsc_adjust(struct svm_softc *svm_sc, int vcpuid)
{
const uint64_t offset = vcpu_tsc_offset(svm_sc->vm, vcpuid, true);
struct vmcb_ctrl *ctrl = svm_get_vmcb_ctrl(svm_sc, vcpuid);
if (ctrl->tsc_offset != offset) {
ctrl->tsc_offset = offset;
svm_set_dirty(svm_sc, vcpuid, VMCB_CACHE_I);
}
}
static int
svm_vmrun(void *arg, int vcpu, uint64_t rip)
{
struct svm_regctx *gctx;
struct svm_softc *svm_sc;
struct svm_vcpu *vcpustate;
struct vmcb_state *state;
struct vm_exit *vmexit;
struct vlapic *vlapic;
vm_client_t *vmc;
struct vm *vm;
uint64_t vmcb_pa;
int handled;
uint16_t ldt_sel;
svm_sc = arg;
vm = svm_sc->vm;
vcpustate = svm_get_vcpu(svm_sc, vcpu);
state = svm_get_vmcb_state(svm_sc, vcpu);
vmexit = vm_exitinfo(vm, vcpu);
vlapic = vm_lapic(vm, vcpu);
vmc = vm_get_vmclient(vm, vcpu);
gctx = svm_get_guest_regctx(svm_sc, vcpu);
vmcb_pa = svm_sc->vcpu[vcpu].vmcb_pa;
if (vcpustate->lastcpu != curcpu) {
vcpustate->hma_asid.hsa_gen = 0;
svm_set_dirty(svm_sc, vcpu, 0xffffffff);
vcpustate->lastcpu = curcpu;
vmm_stat_incr(vm, vcpu, VCPU_MIGRATIONS, 1);
}
svm_apply_tsc_adjust(svm_sc, vcpu);
svm_msr_guest_enter(svm_sc, vcpu);
VERIFY(!vcpustate->loaded && curthread->t_preempt != 0);
vcpustate->loaded = B_TRUE;
state->rip = rip;
do {
enum event_inject_state inject_state;
uint64_t nptgen;
inject_state = svm_inject_events(svm_sc, vcpu);
handled = 0;
const ulong_t iflag = intr_clear();
inject_state = svm_inject_vlapic(svm_sc, vcpu, vlapic,
inject_state);
if (vcpu_entry_bailout_checks(vm, vcpu, state->rip)) {
intr_restore(iflag);
break;
}
if (vcpu_run_state_pending(vm, vcpu)) {
intr_restore(iflag);
vm_exit_run_state(vm, vcpu, state->rip);
break;
}
if (svm_inject_recheck(svm_sc, vcpu, inject_state)) {
intr_restore(iflag);
handled = 1;
continue;
}
ldt_sel = sldt();
nptgen = vmc_table_enter(vmc);
check_asid(svm_sc, vcpu, curcpu, nptgen);
svm_pmu_enter(svm_sc, vcpu);
vcpu_ustate_change(vm, vcpu, VU_RUN);
svm_dr_enter_guest(gctx);
svm_apply_dirty(svm_sc, vcpu);
hma_svm_gif_disable();
svm_launch(vmcb_pa, gctx, get_pcpu());
hma_svm_gif_enable();
svm_dr_leave_guest(gctx);
vcpu_ustate_change(vm, vcpu, VU_EMU_KERN);
svm_pmu_exit(svm_sc, vcpu);
lldt(ldt_sel);
intr_restore(iflag);
vmc_table_exit(vmc);
vcpustate->nextrip = state->rip;
handled = svm_vmexit(svm_sc, vcpu, vmexit);
} while (handled);
svm_msr_guest_exit(svm_sc, vcpu);
ASSERT(interrupts_enabled());
VERIFY(vcpustate->loaded && curthread->t_preempt != 0);
vcpustate->loaded = B_FALSE;
return (0);
}
static void
svm_vmcleanup(void *arg)
{
struct svm_softc *sc = arg;
vmm_contig_free(sc->iopm_bitmap, SVM_IO_BITMAP_SIZE);
vmm_contig_free(sc->msr_bitmap, SVM_MSR_BITMAP_SIZE);
kmem_free(sc, sizeof (*sc));
}
static uint64_t *
swctx_regptr(struct svm_regctx *regctx, int reg)
{
switch (reg) {
case VM_REG_GUEST_RBX:
return (®ctx->sctx_rbx);
case VM_REG_GUEST_RCX:
return (®ctx->sctx_rcx);
case VM_REG_GUEST_RDX:
return (®ctx->sctx_rdx);
case VM_REG_GUEST_RDI:
return (®ctx->sctx_rdi);
case VM_REG_GUEST_RSI:
return (®ctx->sctx_rsi);
case VM_REG_GUEST_RBP:
return (®ctx->sctx_rbp);
case VM_REG_GUEST_R8:
return (®ctx->sctx_r8);
case VM_REG_GUEST_R9:
return (®ctx->sctx_r9);
case VM_REG_GUEST_R10:
return (®ctx->sctx_r10);
case VM_REG_GUEST_R11:
return (®ctx->sctx_r11);
case VM_REG_GUEST_R12:
return (®ctx->sctx_r12);
case VM_REG_GUEST_R13:
return (®ctx->sctx_r13);
case VM_REG_GUEST_R14:
return (®ctx->sctx_r14);
case VM_REG_GUEST_R15:
return (®ctx->sctx_r15);
case VM_REG_GUEST_DR0:
return (®ctx->sctx_dr0);
case VM_REG_GUEST_DR1:
return (®ctx->sctx_dr1);
case VM_REG_GUEST_DR2:
return (®ctx->sctx_dr2);
case VM_REG_GUEST_DR3:
return (®ctx->sctx_dr3);
default:
return (NULL);
}
}
static int
svm_getreg(void *arg, int vcpu, int ident, uint64_t *val)
{
struct svm_softc *sc;
struct vmcb *vmcb;
uint64_t *regp;
uint64_t *fieldp;
struct vmcb_segment *seg;
sc = arg;
vmcb = svm_get_vmcb(sc, vcpu);
regp = swctx_regptr(svm_get_guest_regctx(sc, vcpu), ident);
if (regp != NULL) {
*val = *regp;
return (0);
}
switch (ident) {
case VM_REG_GUEST_INTR_SHADOW:
*val = (vmcb->ctrl.intr_shadow != 0) ? 1 : 0;
break;
case VM_REG_GUEST_CR0:
svm_get_cr0(sc, vcpu, val);
break;
case VM_REG_GUEST_CR2:
case VM_REG_GUEST_CR3:
case VM_REG_GUEST_CR4:
case VM_REG_GUEST_DR6:
case VM_REG_GUEST_DR7:
case VM_REG_GUEST_EFER:
case VM_REG_GUEST_RAX:
case VM_REG_GUEST_RFLAGS:
case VM_REG_GUEST_RIP:
case VM_REG_GUEST_RSP:
fieldp = vmcb_regptr(vmcb, ident, NULL);
*val = *fieldp;
break;
case VM_REG_GUEST_CS:
case VM_REG_GUEST_DS:
case VM_REG_GUEST_ES:
case VM_REG_GUEST_FS:
case VM_REG_GUEST_GS:
case VM_REG_GUEST_SS:
case VM_REG_GUEST_LDTR:
case VM_REG_GUEST_TR:
seg = vmcb_segptr(vmcb, ident);
*val = seg->selector;
break;
case VM_REG_GUEST_GDTR:
case VM_REG_GUEST_IDTR:
return (EINVAL);
case VM_REG_GUEST_PDPTE0:
case VM_REG_GUEST_PDPTE1:
case VM_REG_GUEST_PDPTE2:
case VM_REG_GUEST_PDPTE3:
*val = 0;
break;
default:
return (EINVAL);
}
return (0);
}
static int
svm_setreg(void *arg, int vcpu, int ident, uint64_t val)
{
struct svm_softc *sc;
struct vmcb *vmcb;
uint64_t *regp;
uint64_t *fieldp;
uint32_t dirty;
struct vmcb_segment *seg;
sc = arg;
vmcb = svm_get_vmcb(sc, vcpu);
regp = swctx_regptr(svm_get_guest_regctx(sc, vcpu), ident);
if (regp != NULL) {
*regp = val;
return (0);
}
dirty = VMCB_CACHE_NONE;
switch (ident) {
case VM_REG_GUEST_INTR_SHADOW:
vmcb->ctrl.intr_shadow = (val != 0) ? 1 : 0;
break;
case VM_REG_GUEST_EFER:
fieldp = vmcb_regptr(vmcb, ident, &dirty);
*fieldp = val | EFER_SVM;
dirty |= VMCB_CACHE_CR;
break;
case VM_REG_GUEST_CR0:
svm_set_cr0(sc, vcpu, val, false);
break;
case VM_REG_GUEST_CR2:
case VM_REG_GUEST_CR3:
case VM_REG_GUEST_CR4:
case VM_REG_GUEST_DR6:
case VM_REG_GUEST_DR7:
case VM_REG_GUEST_RAX:
case VM_REG_GUEST_RFLAGS:
case VM_REG_GUEST_RIP:
case VM_REG_GUEST_RSP:
fieldp = vmcb_regptr(vmcb, ident, &dirty);
*fieldp = val;
break;
case VM_REG_GUEST_CS:
case VM_REG_GUEST_DS:
case VM_REG_GUEST_ES:
case VM_REG_GUEST_SS:
case VM_REG_GUEST_FS:
case VM_REG_GUEST_GS:
case VM_REG_GUEST_LDTR:
case VM_REG_GUEST_TR:
dirty |= VMCB_CACHE_SEG;
seg = vmcb_segptr(vmcb, ident);
seg->selector = (uint16_t)val;
break;
case VM_REG_GUEST_GDTR:
case VM_REG_GUEST_IDTR:
return (EINVAL);
case VM_REG_GUEST_PDPTE0:
case VM_REG_GUEST_PDPTE1:
case VM_REG_GUEST_PDPTE2:
case VM_REG_GUEST_PDPTE3:
break;
default:
return (EINVAL);
}
if (dirty != VMCB_CACHE_NONE) {
svm_set_dirty(sc, vcpu, dirty);
}
return (0);
}
static int
svm_setdesc(void *arg, int vcpu, int reg, const struct seg_desc *desc)
{
struct vmcb *vmcb;
struct svm_softc *sc;
struct vmcb_segment *seg;
sc = arg;
vmcb = svm_get_vmcb(sc, vcpu);
switch (reg) {
case VM_REG_GUEST_CS:
case VM_REG_GUEST_DS:
case VM_REG_GUEST_ES:
case VM_REG_GUEST_SS:
case VM_REG_GUEST_FS:
case VM_REG_GUEST_GS:
case VM_REG_GUEST_LDTR:
case VM_REG_GUEST_TR:
svm_set_dirty(sc, vcpu, VMCB_CACHE_SEG);
seg = vmcb_segptr(vmcb, reg);
seg->attrib = VMCB_ACCESS2ATTR(desc->access);
if (SEG_DESC_UNUSABLE(desc->access)) {
seg->attrib &= ~0x80;
}
if (reg == VM_REG_GUEST_SS) {
vmcb->state.cpl = SEG_DESC_DPL(desc->access);
}
break;
case VM_REG_GUEST_GDTR:
case VM_REG_GUEST_IDTR:
svm_set_dirty(sc, vcpu, VMCB_CACHE_DT);
seg = vmcb_segptr(vmcb, reg);
break;
default:
return (EINVAL);
}
ASSERT(seg != NULL);
seg->base = desc->base;
seg->limit = desc->limit;
return (0);
}
static int
svm_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc)
{
struct vmcb *vmcb;
struct svm_softc *sc;
struct vmcb_segment *seg;
sc = arg;
vmcb = svm_get_vmcb(sc, vcpu);
switch (reg) {
case VM_REG_GUEST_DS:
case VM_REG_GUEST_ES:
case VM_REG_GUEST_FS:
case VM_REG_GUEST_GS:
case VM_REG_GUEST_SS:
case VM_REG_GUEST_LDTR:
seg = vmcb_segptr(vmcb, reg);
desc->access = VMCB_ATTR2ACCESS(seg->attrib);
if ((desc->access & 0x80) == 0) {
desc->access |= 0x10000;
}
if (reg == VM_REG_GUEST_SS) {
desc->access &=
~(SEG_DESC_DPL_MASK << SEG_DESC_DPL_SHIFT);
desc->access |=
(vmcb->state.cpl & SEG_DESC_DPL_MASK) <<
SEG_DESC_DPL_SHIFT;
}
break;
case VM_REG_GUEST_CS:
case VM_REG_GUEST_TR:
seg = vmcb_segptr(vmcb, reg);
desc->access = VMCB_ATTR2ACCESS(seg->attrib);
break;
case VM_REG_GUEST_GDTR:
case VM_REG_GUEST_IDTR:
seg = vmcb_segptr(vmcb, reg);
desc->access = 0;
break;
default:
return (EINVAL);
}
ASSERT(seg != NULL);
desc->base = seg->base;
desc->limit = seg->limit;
return (0);
}
static int
svm_get_msr(void *arg, int vcpu, uint32_t msr, uint64_t *valp)
{
struct svm_softc *sc = arg;
struct vmcb *vmcb = svm_get_vmcb(sc, vcpu);
const uint64_t *msrp = vmcb_msr_ptr(vmcb, msr, NULL);
if (msrp != NULL) {
*valp = *msrp;
return (0);
}
return (EINVAL);
}
static int
svm_set_msr(void *arg, int vcpu, uint32_t msr, uint64_t val)
{
struct svm_softc *sc = arg;
struct vmcb *vmcb = svm_get_vmcb(sc, vcpu);
uint32_t dirty = 0;
uint64_t *msrp = vmcb_msr_ptr(vmcb, msr, &dirty);
if (msrp == NULL) {
return (EINVAL);
}
switch (msr) {
case MSR_EFER:
*msrp = val | EFER_SVM;
break;
default:
*msrp = val;
break;
}
if (dirty != 0) {
svm_set_dirty(sc, vcpu, dirty);
}
return (0);
}
static int
svm_setcap(void *arg, int vcpu, int type, int val)
{
struct svm_softc *sc;
int error;
sc = arg;
error = 0;
switch (type) {
case VM_CAP_HALT_EXIT:
svm_set_intercept(sc, vcpu, VMCB_CTRL1_INTCPT,
VMCB_INTCPT_HLT, val);
break;
case VM_CAP_PAUSE_EXIT:
svm_set_intercept(sc, vcpu, VMCB_CTRL1_INTCPT,
VMCB_INTCPT_PAUSE, val);
break;
default:
error = ENOENT;
break;
}
return (error);
}
static int
svm_getcap(void *arg, int vcpu, int type, int *retval)
{
struct svm_softc *sc;
int error;
sc = arg;
error = 0;
switch (type) {
case VM_CAP_HALT_EXIT:
*retval = svm_get_intercept(sc, vcpu, VMCB_CTRL1_INTCPT,
VMCB_INTCPT_HLT);
break;
case VM_CAP_PAUSE_EXIT:
*retval = svm_get_intercept(sc, vcpu, VMCB_CTRL1_INTCPT,
VMCB_INTCPT_PAUSE);
break;
default:
error = ENOENT;
break;
}
return (error);
}
static struct vlapic *
svm_vlapic_init(void *arg, int vcpuid)
{
struct svm_softc *svm_sc;
struct vlapic *vlapic;
svm_sc = arg;
vlapic = kmem_zalloc(sizeof (struct vlapic), KM_SLEEP);
vlapic->vm = svm_sc->vm;
vlapic->vcpuid = vcpuid;
vlapic->apic_page = (struct LAPIC *)&svm_sc->apic_page[vcpuid];
vlapic_init(vlapic);
return (vlapic);
}
static void
svm_vlapic_cleanup(void *arg, struct vlapic *vlapic)
{
vlapic_cleanup(vlapic);
kmem_free(vlapic, sizeof (struct vlapic));
}
static void
svm_pause(void *arg, int vcpu)
{
struct svm_softc *sc = arg;
struct vmcb_ctrl *ctrl = svm_get_vmcb_ctrl(sc, vcpu);
const uint64_t intinfo = ctrl->eventinj;
if ((intinfo & VMCB_EVENTINJ_VALID) != 0) {
svm_stash_intinfo(sc, vcpu, intinfo);
ctrl->eventinj = 0;
}
svm_disable_intr_window_exiting(sc, vcpu);
svm_disable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_IRET);
}
static void
svm_savectx(void *arg, int vcpu)
{
struct svm_softc *sc = arg;
ASSERT(!hma_svm_gif_is_disabled());
if (sc->vcpu[vcpu].loaded) {
svm_msr_guest_exit(sc, vcpu);
}
}
static void
svm_restorectx(void *arg, int vcpu)
{
struct svm_softc *sc = arg;
if (sc->vcpu[vcpu].loaded) {
svm_msr_guest_enter(sc, vcpu);
}
}
static freqratio_res_t
svm_freq_ratio(uint64_t guest_hz, uint64_t host_hz, uint64_t *mult)
{
if (guest_hz == host_hz) {
return (FR_SCALING_NOT_NEEDED);
}
if (!has_tsc_freq_ctl()) {
return (FR_SCALING_NOT_SUPPORTED);
}
if ((guest_hz < AMD_TSC_MIN_FREQ) ||
(guest_hz >= (host_hz * AMD_TSC_MAX_FREQ_RATIO))) {
return (FR_OUT_OF_RANGE);
}
uint64_t m = vmm_calc_freq_multiplier(guest_hz, host_hz,
AMD_TSCM_FRAC_SIZE);
*mult = m;
return (FR_VALID);
}
struct vmm_ops vmm_ops_amd = {
.init = svm_init,
.resume = svm_restore,
.vminit = svm_vminit,
.vmrun = svm_vmrun,
.vmcleanup = svm_vmcleanup,
.vmgetreg = svm_getreg,
.vmsetreg = svm_setreg,
.vmgetdesc = svm_getdesc,
.vmsetdesc = svm_setdesc,
.vmgetcap = svm_getcap,
.vmsetcap = svm_setcap,
.vlapic_init = svm_vlapic_init,
.vlapic_cleanup = svm_vlapic_cleanup,
.vmpause = svm_pause,
.vmsavectx = svm_savectx,
.vmrestorectx = svm_restorectx,
.vmgetmsr = svm_get_msr,
.vmsetmsr = svm_set_msr,
.vmfreqratio = svm_freq_ratio,
.fr_intsize = AMD_TSCM_INT_SIZE,
.fr_fracsize = AMD_TSCM_FRAC_SIZE,
};