#include <sys/stat.h>
#include <sys/types.h>
#include <dev/ic/i8253reg.h>
#include <dev/isa/isareg.h>
#include <machine/pte.h>
#include <machine/specialreg.h>
#include <machine/vmmvar.h>
#include <errno.h>
#include <string.h>
#include <unistd.h>
#include <zlib.h>
#include "atomicio.h"
#include "fw_cfg.h"
#include "i8253.h"
#include "i8259.h"
#include "loadfile.h"
#include "mc146818.h"
#include "ns8250.h"
#include "pci.h"
#include "virtio.h"
typedef uint8_t (*io_fn_t)(struct vm_run_params *);
#define LOWMEM_KB 576
#define MAX_PORTS 65536
io_fn_t ioports_map[MAX_PORTS];
int translate_gva(struct vm_exit*, uint64_t, uint64_t *, int);
static int loadfile_bios(gzFile, off_t, struct vcpu_reg_state *);
static int vcpu_exit_eptviolation(struct vm_run_params *);
static void vcpu_exit_inout(struct vm_run_params *);
extern struct vmd_vm *current_vm;
extern int con_fd;
static const struct vcpu_reg_state vcpu_init_flat64 = {
.vrs_gprs[VCPU_REGS_RFLAGS] = 0x2,
.vrs_gprs[VCPU_REGS_RIP] = 0x0,
.vrs_gprs[VCPU_REGS_RSP] = 0x0,
.vrs_crs[VCPU_REGS_CR0] = CR0_ET | CR0_PE | CR0_PG,
.vrs_crs[VCPU_REGS_CR3] = PML4_PAGE,
.vrs_crs[VCPU_REGS_CR4] = CR4_PAE | CR4_PSE,
.vrs_crs[VCPU_REGS_PDPTE0] = 0ULL,
.vrs_crs[VCPU_REGS_PDPTE1] = 0ULL,
.vrs_crs[VCPU_REGS_PDPTE2] = 0ULL,
.vrs_crs[VCPU_REGS_PDPTE3] = 0ULL,
.vrs_sregs[VCPU_REGS_CS] = { 0x8, 0xFFFFFFFF, 0xC09F, 0x0},
.vrs_sregs[VCPU_REGS_DS] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0},
.vrs_sregs[VCPU_REGS_ES] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0},
.vrs_sregs[VCPU_REGS_FS] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0},
.vrs_sregs[VCPU_REGS_GS] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0},
.vrs_sregs[VCPU_REGS_SS] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0},
.vrs_gdtr = { 0x0, 0xFFFF, 0x0, 0x0},
.vrs_idtr = { 0x0, 0xFFFF, 0x0, 0x0},
.vrs_sregs[VCPU_REGS_LDTR] = { 0x0, 0xFFFF, 0x0082, 0x0},
.vrs_sregs[VCPU_REGS_TR] = { 0x0, 0xFFFF, 0x008B, 0x0},
.vrs_msrs[VCPU_REGS_EFER] = EFER_LME | EFER_LMA,
.vrs_drs[VCPU_REGS_DR0] = 0x0,
.vrs_drs[VCPU_REGS_DR1] = 0x0,
.vrs_drs[VCPU_REGS_DR2] = 0x0,
.vrs_drs[VCPU_REGS_DR3] = 0x0,
.vrs_drs[VCPU_REGS_DR6] = 0xFFFF0FF0,
.vrs_drs[VCPU_REGS_DR7] = 0x400,
.vrs_msrs[VCPU_REGS_STAR] = 0ULL,
.vrs_msrs[VCPU_REGS_LSTAR] = 0ULL,
.vrs_msrs[VCPU_REGS_CSTAR] = 0ULL,
.vrs_msrs[VCPU_REGS_SFMASK] = 0ULL,
.vrs_msrs[VCPU_REGS_KGSBASE] = 0ULL,
.vrs_msrs[VCPU_REGS_MISC_ENABLE] = 0ULL,
.vrs_crs[VCPU_REGS_XCR0] = XFEATURE_X87
};
static const struct vcpu_reg_state vcpu_init_flat16 = {
.vrs_gprs[VCPU_REGS_RFLAGS] = 0x2,
.vrs_gprs[VCPU_REGS_RIP] = 0xFFF0,
.vrs_gprs[VCPU_REGS_RSP] = 0x0,
.vrs_crs[VCPU_REGS_CR0] = 0x60000010,
.vrs_crs[VCPU_REGS_CR3] = 0,
.vrs_sregs[VCPU_REGS_CS] = { 0xF000, 0xFFFF, 0x809F, 0xF0000},
.vrs_sregs[VCPU_REGS_DS] = { 0x0, 0xFFFF, 0x8093, 0x0},
.vrs_sregs[VCPU_REGS_ES] = { 0x0, 0xFFFF, 0x8093, 0x0},
.vrs_sregs[VCPU_REGS_FS] = { 0x0, 0xFFFF, 0x8093, 0x0},
.vrs_sregs[VCPU_REGS_GS] = { 0x0, 0xFFFF, 0x8093, 0x0},
.vrs_sregs[VCPU_REGS_SS] = { 0x0, 0xFFFF, 0x8093, 0x0},
.vrs_gdtr = { 0x0, 0xFFFF, 0x0, 0x0},
.vrs_idtr = { 0x0, 0xFFFF, 0x0, 0x0},
.vrs_sregs[VCPU_REGS_LDTR] = { 0x0, 0xFFFF, 0x0082, 0x0},
.vrs_sregs[VCPU_REGS_TR] = { 0x0, 0xFFFF, 0x008B, 0x0},
.vrs_msrs[VCPU_REGS_EFER] = 0ULL,
.vrs_drs[VCPU_REGS_DR0] = 0x0,
.vrs_drs[VCPU_REGS_DR1] = 0x0,
.vrs_drs[VCPU_REGS_DR2] = 0x0,
.vrs_drs[VCPU_REGS_DR3] = 0x0,
.vrs_drs[VCPU_REGS_DR6] = 0xFFFF0FF0,
.vrs_drs[VCPU_REGS_DR7] = 0x400,
.vrs_msrs[VCPU_REGS_STAR] = 0ULL,
.vrs_msrs[VCPU_REGS_LSTAR] = 0ULL,
.vrs_msrs[VCPU_REGS_CSTAR] = 0ULL,
.vrs_msrs[VCPU_REGS_SFMASK] = 0ULL,
.vrs_msrs[VCPU_REGS_KGSBASE] = 0ULL,
.vrs_crs[VCPU_REGS_XCR0] = XFEATURE_X87
};
void
create_memory_map(struct vmd_vm *vm)
{
struct vmop_create_params *vmc = &vm->vm_params;
size_t len, mem_bytes;
size_t above_1m = 0, above_4g = 0;
mem_bytes = vmc->vmc_memranges[0].vmr_size;
vmc->vmc_nmemranges = 0;
if (mem_bytes == 0 || mem_bytes > VMM_MAX_VM_MEM_SIZE)
return;
len = LOWMEM_KB * 1024;
vmc->vmc_memranges[0].vmr_gpa = 0x0;
vmc->vmc_memranges[0].vmr_size = len;
vmc->vmc_memranges[0].vmr_type = VM_MEM_RAM;
mem_bytes -= len;
len = MB(1) - (LOWMEM_KB * 1024);
vmc->vmc_memranges[1].vmr_gpa = LOWMEM_KB * 1024;
vmc->vmc_memranges[1].vmr_size = len;
vmc->vmc_memranges[1].vmr_type = VM_MEM_RESERVED;
mem_bytes -= len;
if (mem_bytes <= MB(4)) {
vmc->vmc_memranges[2].vmr_gpa = PCI_MMIO_BAR_END;
vmc->vmc_memranges[2].vmr_size = MB(4);
vmc->vmc_memranges[2].vmr_type = VM_MEM_RESERVED;
vmc->vmc_nmemranges = 3;
return;
}
if (mem_bytes > PCI_MMIO_BAR_BASE - MB(1)) {
above_1m = PCI_MMIO_BAR_BASE - MB(1);
above_4g = mem_bytes - above_1m;
} else {
above_1m = mem_bytes;
above_4g = 0;
}
vmc->vmc_memranges[2].vmr_gpa = MB(1);
vmc->vmc_memranges[2].vmr_size = above_1m;
vmc->vmc_memranges[2].vmr_type = VM_MEM_RAM;
vmc->vmc_memranges[3].vmr_gpa = PCI_MMIO_BAR_BASE;
vmc->vmc_memranges[3].vmr_size = PCI_MMIO_BAR_END -
PCI_MMIO_BAR_BASE + 1;
vmc->vmc_memranges[3].vmr_type = VM_MEM_MMIO;
vmc->vmc_memranges[4].vmr_gpa = PCI_MMIO_BAR_END + 1;
vmc->vmc_memranges[4].vmr_size = MB(4);
vmc->vmc_memranges[4].vmr_type = VM_MEM_RESERVED;
if (above_4g > 0) {
vmc->vmc_memranges[5].vmr_gpa = GB(4);
vmc->vmc_memranges[5].vmr_size = above_4g;
vmc->vmc_memranges[5].vmr_type = VM_MEM_RAM;
vmc->vmc_nmemranges = 6;
} else
vmc->vmc_nmemranges = 5;
}
int
load_firmware(struct vmd_vm *vm, struct vcpu_reg_state *vrs)
{
int ret;
gzFile fp;
struct stat sb;
memcpy(vrs, &vcpu_init_flat64, sizeof(*vrs));
if ((fp = gzdopen(vm->vm_kernel, "r")) == NULL)
fatalx("failed to open kernel - exiting");
ret = loadfile_elf(fp, vm, vrs, vm->vm_params.vmc_bootdevice);
if (ret && errno == ENOEXEC && vm->vm_kernel != -1 &&
gzdirect(fp) && (ret = fstat(vm->vm_kernel, &sb)) == 0)
ret = loadfile_bios(fp, sb.st_size, vrs);
gzclose(fp);
return (ret);
}
int
loadfile_bios(gzFile fp, off_t size, struct vcpu_reg_state *vrs)
{
off_t off = 0;
size_t lower_sz = size;
if (size < 15) {
log_warnx("bios image too small");
return (-1);
}
if (size > (off_t)MB(4)) {
log_warnx("bios image too large (> 4 MiB)");
return (-1);
}
memcpy(vrs, &vcpu_init_flat16, sizeof(*vrs));
if (gzrewind(fp) == -1)
return (-1);
off = GB(4) - size;
if (mread(fp, off, size) != (size_t)size) {
errno = EIO;
return (-1);
}
if (gzrewind(fp) == -1)
return (-1);
lower_sz = MB(1) - (LOWMEM_KB * 1024);
lower_sz = MIN((off_t)lower_sz, size);
if (gzseek(fp, size - lower_sz, SEEK_SET) == -1)
return (-1);
off = MB(1) - lower_sz;
if (mread(fp, off, lower_sz) != lower_sz)
return (-1);
log_debug("%s: loaded BIOS image", __func__);
return (0);
}
int
init_emulated_hw(struct vmd_vm *vm, int child_cdrom,
int child_disks[][VM_MAX_BASE_PER_DISK], int *child_taps)
{
struct vmop_create_params *vmc = &vm->vm_params;
size_t i;
uint64_t memlo, memhi;
memlo = memhi = 0;
for (i = 0; i < vmc->vmc_nmemranges; i++) {
if (vmc->vmc_memranges[i].vmr_gpa == MB(1) &&
vmc->vmc_memranges[i].vmr_size > (15 * MB(1)))
memlo = vmc->vmc_memranges[i].vmr_size - (15 * MB(1));
else if (vmc->vmc_memranges[i].vmr_gpa == GB(4))
memhi = vmc->vmc_memranges[i].vmr_size;
}
memset(&ioports_map, 0, sizeof(io_fn_t) * MAX_PORTS);
i8253_init(vm->vm_vmmid);
ioports_map[TIMER_CTRL] = vcpu_exit_i8253;
ioports_map[TIMER_BASE + TIMER_CNTR0] = vcpu_exit_i8253;
ioports_map[TIMER_BASE + TIMER_CNTR1] = vcpu_exit_i8253;
ioports_map[TIMER_BASE + TIMER_CNTR2] = vcpu_exit_i8253;
ioports_map[PCKBC_AUX] = vcpu_exit_i8253_misc;
mc146818_init(vm->vm_vmmid, memlo, memhi);
ioports_map[IO_RTC] = vcpu_exit_mc146818;
ioports_map[IO_RTC + 1] = vcpu_exit_mc146818;
i8259_init();
ioports_map[IO_ICU1] = vcpu_exit_i8259;
ioports_map[IO_ICU1 + 1] = vcpu_exit_i8259;
ioports_map[IO_ICU2] = vcpu_exit_i8259;
ioports_map[IO_ICU2 + 1] = vcpu_exit_i8259;
ioports_map[ELCR0] = vcpu_exit_elcr;
ioports_map[ELCR1] = vcpu_exit_elcr;
ns8250_init(con_fd, vm->vm_vmmid);
for (i = COM1_DATA; i <= COM1_SCR; i++)
ioports_map[i] = vcpu_exit_com;
for (i = VM_PCI_IO_BAR_BASE; i <= VM_PCI_IO_BAR_END; i++)
ioports_map[i] = vcpu_exit_pci;
ioports_map[PCI_MODE1_ADDRESS_REG] = vcpu_exit_pci;
ioports_map[PCI_MODE1_DATA_REG] = vcpu_exit_pci;
ioports_map[PCI_MODE1_DATA_REG + 1] = vcpu_exit_pci;
ioports_map[PCI_MODE1_DATA_REG + 2] = vcpu_exit_pci;
ioports_map[PCI_MODE1_DATA_REG + 3] = vcpu_exit_pci;
pci_init();
if (virtio_init(current_vm, child_cdrom, child_disks, child_taps))
return (1);
fw_cfg_init(vmc);
ioports_map[FW_CFG_IO_SELECT] = vcpu_exit_fw_cfg;
ioports_map[FW_CFG_IO_DATA] = vcpu_exit_fw_cfg;
ioports_map[FW_CFG_IO_DMA_ADDR_HIGH] = vcpu_exit_fw_cfg_dma;
ioports_map[FW_CFG_IO_DMA_ADDR_LOW] = vcpu_exit_fw_cfg_dma;
return (0);
}
void
pause_vm_md(struct vmd_vm *vm)
{
i8253_stop();
mc146818_stop();
ns8250_stop();
virtio_stop(vm);
}
void
unpause_vm_md(struct vmd_vm *vm)
{
i8253_start();
mc146818_start();
ns8250_start();
virtio_start(vm);
}
void
vcpu_exit_inout(struct vm_run_params *vrp)
{
struct vm_exit *vei = vrp->vrp_exit;
uint8_t intr = 0xFF;
if (vei->vei.vei_rep || vei->vei.vei_string) {
#ifdef MMIO_DEBUG
log_info("%s: %s%s%s %d-byte, enc=%d, data=0x%08x, port=0x%04x",
__func__,
vei->vei.vei_rep == 0 ? "" : "REP ",
vei->vei.vei_dir == VEI_DIR_IN ? "IN" : "OUT",
vei->vei.vei_string == 0 ? "" : "S",
vei->vei.vei_size, vei->vei.vei_encoding,
vei->vei.vei_data, vei->vei.vei_port);
log_info("%s: ECX = 0x%llx, RDX = 0x%llx, RSI = 0x%llx",
__func__,
vei->vrs.vrs_gprs[VCPU_REGS_RCX],
vei->vrs.vrs_gprs[VCPU_REGS_RDX],
vei->vrs.vrs_gprs[VCPU_REGS_RSI]);
#endif
fatalx("%s: can't emulate REP prefixed IN(S)/OUT(S)",
__func__);
}
if (ioports_map[vei->vei.vei_port] != NULL)
intr = ioports_map[vei->vei.vei_port](vrp);
else if (vei->vei.vei_dir == VEI_DIR_IN)
set_return_data(vei, 0xFFFFFFFF);
vei->vrs.vrs_gprs[VCPU_REGS_RIP] += vei->vei.vei_insn_len;
if (intr != 0xFF)
vcpu_assert_irq(vrp->vrp_vm_id, vrp->vrp_vcpu_id, intr);
}
int
vcpu_exit(struct vm_run_params *vrp)
{
int ret;
switch (vrp->vrp_exit_reason) {
case VMX_EXIT_INT_WINDOW:
case SVM_VMEXIT_VINTR:
case VMX_EXIT_CPUID:
case VMX_EXIT_EXTINT:
case SVM_VMEXIT_INTR:
case SVM_VMEXIT_MSR:
case SVM_VMEXIT_CPUID:
break;
case SVM_VMEXIT_NPF:
case VMX_EXIT_EPT_VIOLATION:
ret = vcpu_exit_eptviolation(vrp);
if (ret)
return (ret);
break;
case VMX_EXIT_IO:
case SVM_VMEXIT_IOIO:
vcpu_exit_inout(vrp);
break;
case VMX_EXIT_HLT:
case SVM_VMEXIT_HLT:
vcpu_halt(vrp->vrp_vcpu_id);
break;
case VMX_EXIT_TRIPLE_FAULT:
case SVM_VMEXIT_SHUTDOWN:
return (EAGAIN);
default:
log_debug("unknown exit reason 0x%x", vrp->vrp_exit_reason);
}
return (0);
}
static int
vcpu_exit_eptviolation(struct vm_run_params *vrp)
{
struct vm_exit *ve = vrp->vrp_exit;
int ret = 0;
#if MMIO_NOTYET
struct x86_insn insn;
uint64_t va, pa;
size_t len = 15;
#endif
switch (ve->vee.vee_fault_type) {
case VEE_FAULT_HANDLED:
break;
#if MMIO_NOTYET
case VEE_FAULT_MMIO_ASSIST:
if (ve->vee.vee_insn_info & VEE_LEN_VALID)
len = ve->vee.vee_insn_len;
if (len > 15)
fatalx("%s: invalid instruction length %lu", __func__,
len);
if (!(ve->vee.vee_insn_info & VEE_BYTES_VALID)) {
memset(ve->vee.vee_insn_bytes, 0,
sizeof(ve->vee.vee_insn_bytes));
va = ve->vrs.vrs_gprs[VCPU_REGS_RIP];
if ((va & PAGE_MASK) + len > PAGE_SIZE) {
log_warnx("%s: instruction might cross page "
"boundary", __func__);
ret = EINVAL;
break;
}
ret = translate_gva(ve, va, &pa, PROT_EXEC);
if (ret != 0) {
log_warnx("%s: failed gva translation",
__func__);
break;
}
ret = read_mem(pa, ve->vee.vee_insn_bytes, len);
if (ret != 0) {
log_warnx("%s: failed to fetch instruction "
"bytes from 0x%llx", __func__, pa);
break;
}
}
ret = insn_decode(ve, &insn);
if (ret == 0)
ret = insn_emulate(ve, &insn);
break;
#endif
case VEE_FAULT_PROTECT:
log_debug("EPT Violation: rip=0x%llx",
ve->vrs.vrs_gprs[VCPU_REGS_RIP]);
ret = EFAULT;
break;
default:
fatalx("invalid fault_type %d", ve->vee.vee_fault_type);
}
return (ret);
}
uint8_t
vcpu_exit_pci(struct vm_run_params *vrp)
{
struct vm_exit *vei = vrp->vrp_exit;
uint8_t intr;
intr = 0xFF;
switch (vei->vei.vei_port) {
case PCI_MODE1_ADDRESS_REG:
pci_handle_address_reg(vrp);
break;
case PCI_MODE1_DATA_REG:
case PCI_MODE1_DATA_REG + 1:
case PCI_MODE1_DATA_REG + 2:
case PCI_MODE1_DATA_REG + 3:
pci_handle_data_reg(vrp);
break;
case VM_PCI_IO_BAR_BASE ... VM_PCI_IO_BAR_END:
intr = pci_handle_io(vrp);
break;
default:
log_warnx("unknown PCI register 0x%04x", vei->vei.vei_port);
break;
}
return (intr);
}
struct vm_mem_range *
find_gpa_range(struct vmop_create_params *vmc, paddr_t gpa, size_t len)
{
size_t i, n;
struct vm_mem_range *vmr;
for (i = 0; i < vmc->vmc_nmemranges; i++) {
vmr = &vmc->vmc_memranges[i];
if (gpa < vmr->vmr_gpa + vmr->vmr_size)
break;
}
if (i == vmc->vmc_nmemranges)
return (NULL);
n = vmr->vmr_size - (gpa - vmr->vmr_gpa);
if (len < n)
len = 0;
else
len -= n;
gpa = vmr->vmr_gpa + vmr->vmr_size;
for (i = i + 1; len != 0 && i < vmc->vmc_nmemranges; i++) {
vmr = &vmc->vmc_memranges[i];
if (gpa != vmr->vmr_gpa)
return (NULL);
if (len <= vmr->vmr_size)
len = 0;
else
len -= vmr->vmr_size;
gpa = vmr->vmr_gpa + vmr->vmr_size;
}
if (len != 0)
return (NULL);
return (vmr);
}
int
write_mem(paddr_t dst, const void *buf, size_t len)
{
const char *from = buf;
char *to;
size_t n, off;
struct vm_mem_range *vmr;
vmr = find_gpa_range(¤t_vm->vm_params, dst, len);
if (vmr == NULL) {
errno = EINVAL;
log_warn("%s: failed - invalid memory range dst = 0x%lx, "
"len = 0x%zx", __func__, dst, len);
return (EINVAL);
}
off = dst - vmr->vmr_gpa;
while (len != 0) {
n = vmr->vmr_size - off;
if (len < n)
n = len;
to = (char *)vmr->vmr_va + off;
if (buf == NULL)
memset(to, 0, n);
else {
memcpy(to, from, n);
from += n;
}
len -= n;
off = 0;
vmr++;
}
return (0);
}
int
read_mem(paddr_t src, void *buf, size_t len)
{
char *from, *to = buf;
size_t n, off;
struct vm_mem_range *vmr;
vmr = find_gpa_range(¤t_vm->vm_params, src, len);
if (vmr == NULL) {
errno = EINVAL;
log_warn("%s: failed - invalid memory range src = 0x%lx, "
"len = 0x%zx", __func__, src, len);
return (EINVAL);
}
off = src - vmr->vmr_gpa;
while (len != 0) {
n = vmr->vmr_size - off;
if (len < n)
n = len;
from = (char *)vmr->vmr_va + off;
memcpy(to, from, n);
to += n;
len -= n;
off = 0;
vmr++;
}
return (0);
}
void *
hvaddr_mem(paddr_t gpa, size_t len)
{
struct vm_mem_range *vmr;
size_t off;
vmr = find_gpa_range(¤t_vm->vm_params, gpa, len);
if (vmr == NULL) {
log_warnx("%s: failed - invalid gpa: 0x%lx\n", __func__, gpa);
errno = EFAULT;
return (NULL);
}
off = gpa - vmr->vmr_gpa;
if (len > (vmr->vmr_size - off)) {
log_warnx("%s: failed - invalid memory range: gpa=0x%lx, "
"len=%zu", __func__, gpa, len);
errno = EINVAL;
return (NULL);
}
return ((char *)vmr->vmr_va + off);
}
void
vcpu_assert_irq(uint32_t vmm_id, uint32_t vcpu_id, int irq)
{
i8259_assert_irq(irq);
if (i8259_is_pending()) {
if (vcpu_intr(vmm_id, vcpu_id, 1))
fatalx("%s: can't assert INTR", __func__);
vcpu_unhalt(vcpu_id);
vcpu_signal_run(vcpu_id);
}
}
void
vcpu_deassert_irq(uint32_t vmm_id, uint32_t vcpu_id, int irq)
{
i8259_deassert_irq(irq);
if (!i8259_is_pending()) {
if (vcpu_intr(vmm_id, vcpu_id, 0))
fatalx("%s: can't deassert INTR for vmm_id %d, "
"vcpu_id %d", __func__, vmm_id, vcpu_id);
}
}
void
set_return_data(struct vm_exit *vei, uint32_t data)
{
switch (vei->vei.vei_size) {
case 1:
vei->vei.vei_data &= ~0xFF;
vei->vei.vei_data |= (uint8_t)data;
break;
case 2:
vei->vei.vei_data &= ~0xFFFF;
vei->vei.vei_data |= (uint16_t)data;
break;
case 4:
vei->vei.vei_data = data;
break;
}
}
void
get_input_data(struct vm_exit *vei, uint32_t *data)
{
switch (vei->vei.vei_size) {
case 1:
*data &= 0xFFFFFF00;
*data |= (uint8_t)vei->vei.vei_data;
break;
case 2:
*data &= 0xFFFF0000;
*data |= (uint16_t)vei->vei.vei_data;
break;
case 4:
*data = vei->vei.vei_data;
break;
default:
log_warnx("%s: invalid i/o size %d", __func__,
vei->vei.vei_size);
}
}
int
translate_gva(struct vm_exit* exit, uint64_t va, uint64_t* pa, int mode)
{
int level, shift, pdidx;
uint64_t pte, pt_paddr, pte_paddr, mask, low_mask, high_mask;
uint64_t shift_width, pte_size;
struct vcpu_reg_state *vrs;
vrs = &exit->vrs;
if (!pa)
return (EINVAL);
if (!(vrs->vrs_crs[VCPU_REGS_CR0] & CR0_PG)) {
log_debug("%s: unpaged, va=pa=0x%llx", __func__, va);
*pa = va;
return (0);
}
pt_paddr = vrs->vrs_crs[VCPU_REGS_CR3];
log_debug("%s: guest %%cr0=0x%llx, %%cr3=0x%llx", __func__,
vrs->vrs_crs[VCPU_REGS_CR0], vrs->vrs_crs[VCPU_REGS_CR3]);
if (vrs->vrs_crs[VCPU_REGS_CR0] & CR0_PE) {
if (vrs->vrs_crs[VCPU_REGS_CR4] & CR4_PAE) {
pte_size = sizeof(uint64_t);
shift_width = 9;
if (vrs->vrs_msrs[VCPU_REGS_EFER] & EFER_LMA) {
level = 4;
mask = L4_MASK;
shift = L4_SHIFT;
} else {
level = 3;
mask = L3_MASK;
shift = L3_SHIFT;
}
} else {
level = 2;
shift_width = 10;
mask = 0xFFC00000;
shift = 22;
pte_size = sizeof(uint32_t);
}
} else
return (EINVAL);
for (;level > 0; level--) {
pdidx = (va & mask) >> shift;
pte_paddr = (pt_paddr) + (pdidx * pte_size);
log_debug("%s: read pte level %d @ GPA 0x%llx", __func__,
level, pte_paddr);
if (read_mem(pte_paddr, &pte, pte_size)) {
log_warn("%s: failed to read pte", __func__);
return (EFAULT);
}
log_debug("%s: PTE @ 0x%llx = 0x%llx", __func__, pte_paddr,
pte);
if (!(pte & PG_V))
return (EFAULT);
if ((mode == PROT_WRITE) && !(pte & PG_RW))
return (EPERM);
if ((exit->cpl > 0) && !(pte & PG_u))
return (EPERM);
pte = pte | PG_U;
if (mode == PROT_WRITE)
pte = pte | PG_M;
if (write_mem(pte_paddr, &pte, pte_size)) {
log_warn("%s: failed to write back flags to pte",
__func__);
return (EIO);
}
if (pte & PG_PS)
break;
if (level > 1) {
pt_paddr = pte & PG_FRAME;
shift -= shift_width;
mask = mask >> shift_width;
}
}
low_mask = (1 << shift) - 1;
high_mask = (((uint64_t)1ULL << ((pte_size * 8) - 1)) - 1) ^ low_mask;
*pa = (pte & high_mask) | (va & low_mask);
log_debug("%s: final GPA for GVA 0x%llx = 0x%llx\n", __func__, va, *pa);
return (0);
}
int
intr_pending(struct vmd_vm *vm)
{
return i8259_is_pending();
}
int
intr_ack(struct vmd_vm *vm)
{
return i8259_ack();
}
void
intr_toggle_el(struct vmd_vm *vm, int irq, int val)
{
pic_set_elcr(irq, val);
}