#include <sys/types.h>
#include <sys/conf.h>
#include <sys/cpuvar.h>
#include <sys/ioccom.h>
#include <sys/stat.h>
#include <sys/vmsystm.h>
#include <sys/ddi.h>
#include <sys/mkdev.h>
#include <sys/sunddi.h>
#include <sys/fs/dv_node.h>
#include <sys/cpuset.h>
#include <sys/id_space.h>
#include <sys/fs/sdev_plugin.h>
#include <sys/smt.h>
#include <sys/kstat.h>
#include <sys/kernel.h>
#include <sys/hma.h>
#include <sys/x86_archext.h>
#include <x86/apicreg.h>
#include <sys/vmm.h>
#include <sys/vmm_kernel.h>
#include <sys/vmm_instruction_emul.h>
#include <sys/vmm_dev.h>
#include <sys/vmm_impl.h>
#include <sys/vmm_drv.h>
#include <sys/vmm_vm.h>
#include <sys/vmm_reservoir.h>
#include <vm/seg_dev.h>
#include "io/ppt.h"
#include "io/vatpic.h"
#include "io/vioapic.h"
#include "io/vrtc.h"
#include "io/vhpet.h"
#include "io/vpmtmr.h"
#include "vmm_lapic.h"
#include "vmm_stat.h"
#include "vmm_util.h"
static kmutex_t vmmdev_mtx;
static dev_info_t *vmmdev_dip;
static hma_reg_t *vmmdev_hma_reg;
static uint_t vmmdev_hma_ref;
static sdev_plugin_hdl_t vmmdev_sdev_hdl;
static kmutex_t vmm_mtx;
static list_t vmm_list;
static id_space_t *vmm_minors;
static void *vmm_statep;
int vmm_allow_state_writes = 1;
static const char *vmmdev_hvm_name = "bhyve";
#define VMM_SDEV_ROOT "/dev/vmm"
extern int vmx_x86_supported(const char **);
struct vmm_hold {
list_node_t vmh_node;
vmm_softc_t *vmh_sc;
boolean_t vmh_release_req;
uint_t vmh_ioport_hook_cnt;
uint_t vmh_mmio_hook_cnt;
};
struct vmm_lease {
list_node_t vml_node;
struct vm *vml_vm;
vm_client_t *vml_vmclient;
boolean_t vml_expired;
boolean_t vml_break_deferred;
boolean_t (*vml_expire_func)(void *);
void *vml_expire_arg;
struct vmm_hold *vml_hold;
};
typedef enum vmm_destroy_opts {
VDO_DEFAULT = 0,
VDO_NO_CLEAN_ZSD = (1 << 0),
VDO_ATTEMPT_WAIT = (1 << 1),
} vmm_destroy_opts_t;
static void vmm_hma_release(void);
static int vmm_destroy_locked(vmm_softc_t *, vmm_destroy_opts_t, bool *);
static int vmm_drv_block_hook(vmm_softc_t *, boolean_t);
static void vmm_lease_block(vmm_softc_t *);
static void vmm_lease_unblock(vmm_softc_t *);
static int vmm_kstat_alloc(vmm_softc_t *, minor_t, const cred_t *);
static void vmm_kstat_init(vmm_softc_t *);
static void vmm_kstat_fini(vmm_softc_t *);
static vmm_devmem_entry_t *
vmmdev_devmem_find(vmm_softc_t *sc, int segid)
{
vmm_devmem_entry_t *ent = NULL;
list_t *dl = &sc->vmm_devmem_list;
for (ent = list_head(dl); ent != NULL; ent = list_next(dl, ent)) {
if (ent->vde_segid == segid) {
return (ent);
}
}
return (NULL);
}
static int
vmmdev_get_memseg(vmm_softc_t *sc, struct vm_memseg *mseg)
{
int error;
bool sysmem;
error = vm_get_memseg(sc->vmm_vm, mseg->segid, &mseg->len, &sysmem,
NULL);
if (error || mseg->len == 0)
return (error);
if (!sysmem) {
vmm_devmem_entry_t *de;
de = vmmdev_devmem_find(sc, mseg->segid);
if (de != NULL) {
(void) strlcpy(mseg->name, de->vde_name,
sizeof (mseg->name));
}
} else {
bzero(mseg->name, sizeof (mseg->name));
}
return (error);
}
static int
vmmdev_devmem_create(vmm_softc_t *sc, struct vm_memseg *mseg, const char *name)
{
off_t map_offset;
vmm_devmem_entry_t *entry;
if (list_is_empty(&sc->vmm_devmem_list)) {
map_offset = VM_DEVMEM_START;
} else {
entry = list_tail(&sc->vmm_devmem_list);
if (sum_overflows_off(entry->vde_off, (off_t)entry->vde_len)) {
return (ERANGE);
}
map_offset = entry->vde_off + (off_t)entry->vde_len;
}
entry = kmem_zalloc(sizeof (*entry), KM_SLEEP);
entry->vde_segid = mseg->segid;
entry->vde_len = mseg->len;
entry->vde_off = map_offset;
(void) strlcpy(entry->vde_name, name, sizeof (entry->vde_name));
list_insert_tail(&sc->vmm_devmem_list, entry);
return (0);
}
static boolean_t
vmmdev_devmem_segid(vmm_softc_t *sc, off_t off, off_t len, int *segidp,
off_t *map_offp)
{
list_t *dl = &sc->vmm_devmem_list;
vmm_devmem_entry_t *de = NULL;
VERIFY(off >= VM_DEVMEM_START);
if (sum_overflows_off(off, len)) {
return (B_FALSE);
}
const off_t map_end = off + len;
for (de = list_head(dl); de != NULL; de = list_next(dl, de)) {
const off_t item_end = de->vde_off + de->vde_len;
if (de->vde_off <= off && item_end >= map_end) {
*segidp = de->vde_segid;
*map_offp = off - de->vde_off;
return (B_TRUE);
}
}
return (B_FALSE);
}
static void
vmmdev_devmem_purge(vmm_softc_t *sc)
{
vmm_devmem_entry_t *entry;
while ((entry = list_remove_head(&sc->vmm_devmem_list)) != NULL) {
kmem_free(entry, sizeof (*entry));
}
}
static int
vmmdev_alloc_memseg(vmm_softc_t *sc, struct vm_memseg *mseg)
{
int error;
bool sysmem = true;
if (VM_MEMSEG_NAME(mseg)) {
sysmem = false;
}
error = vm_alloc_memseg(sc->vmm_vm, mseg->segid, mseg->len, sysmem);
if (error == 0) {
error = vmmdev_devmem_create(sc, mseg, mseg->name);
if (error != 0) {
vm_free_memseg(sc->vmm_vm, mseg->segid);
}
}
return (error);
}
static void
vcpu_lock_one(vmm_softc_t *sc, int vcpu)
{
ASSERT(vcpu >= 0 && vcpu < VM_MAXCPU);
VERIFY0(vcpu_set_state(sc->vmm_vm, vcpu, VCPU_FROZEN, true));
}
static void
vcpu_unlock_one(vmm_softc_t *sc, int vcpu)
{
ASSERT(vcpu >= 0 && vcpu < VM_MAXCPU);
VERIFY3U(vcpu_get_state(sc->vmm_vm, vcpu, NULL), ==, VCPU_FROZEN);
VERIFY0(vcpu_set_state(sc->vmm_vm, vcpu, VCPU_IDLE, false));
}
static void
vmm_read_lock(vmm_softc_t *sc)
{
rw_enter(&sc->vmm_rwlock, RW_READER);
}
static void
vmm_read_unlock(vmm_softc_t *sc)
{
rw_exit(&sc->vmm_rwlock);
}
static void
vmm_write_lock(vmm_softc_t *sc)
{
int maxcpus;
maxcpus = vm_get_maxcpus(sc->vmm_vm);
for (int vcpu = 0; vcpu < maxcpus; vcpu++) {
vcpu_lock_one(sc, vcpu);
}
vmm_lease_block(sc);
rw_enter(&sc->vmm_rwlock, RW_WRITER);
VERIFY(maxcpus == vm_get_maxcpus(sc->vmm_vm));
}
static void
vmm_write_unlock(vmm_softc_t *sc)
{
int maxcpus;
vmm_lease_unblock(sc);
VERIFY(rw_write_held(&sc->vmm_rwlock));
rw_exit(&sc->vmm_rwlock);
maxcpus = vm_get_maxcpus(sc->vmm_vm);
for (int vcpu = 0; vcpu < maxcpus; vcpu++) {
vcpu_unlock_one(sc, vcpu);
}
}
static int
vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
cred_t *credp, int *rvalp)
{
int error = 0, vcpu = -1;
void *datap = (void *)arg;
enum vm_lock_type {
LOCK_NONE = 0,
LOCK_VCPU,
LOCK_READ_HOLD,
LOCK_WRITE_HOLD
} lock_type = LOCK_NONE;
switch (cmd) {
case VM_RUN:
case VM_GET_REGISTER:
case VM_SET_REGISTER:
case VM_GET_SEGMENT_DESCRIPTOR:
case VM_SET_SEGMENT_DESCRIPTOR:
case VM_GET_REGISTER_SET:
case VM_SET_REGISTER_SET:
case VM_INJECT_EXCEPTION:
case VM_GET_CAPABILITY:
case VM_SET_CAPABILITY:
case VM_PPTDEV_MSI:
case VM_PPTDEV_MSIX:
case VM_SET_X2APIC_STATE:
case VM_GLA2GPA:
case VM_GLA2GPA_NOFAULT:
case VM_ACTIVATE_CPU:
case VM_SET_INTINFO:
case VM_GET_INTINFO:
case VM_RESTART_INSTRUCTION:
case VM_SET_KERNEMU_DEV:
case VM_GET_KERNEMU_DEV:
case VM_RESET_CPU:
case VM_GET_RUN_STATE:
case VM_SET_RUN_STATE:
case VM_GET_FPU:
case VM_SET_FPU:
case VM_GET_CPUID:
case VM_SET_CPUID:
case VM_LEGACY_CPUID:
if (ddi_copyin(datap, &vcpu, sizeof (vcpu), md)) {
return (EFAULT);
}
if (vcpu < 0 || vcpu >= vm_get_maxcpus(sc->vmm_vm)) {
return (EINVAL);
}
vcpu_lock_one(sc, vcpu);
lock_type = LOCK_VCPU;
break;
case VM_REINIT:
case VM_BIND_PPTDEV:
case VM_UNBIND_PPTDEV:
case VM_MAP_PPTDEV_MMIO:
case VM_UNMAP_PPTDEV_MMIO:
case VM_ALLOC_MEMSEG:
case VM_MMAP_MEMSEG:
case VM_MUNMAP_MEMSEG:
case VM_WRLOCK_CYCLE:
case VM_PMTMR_LOCATE:
case VM_PAUSE:
case VM_RESUME:
vmm_write_lock(sc);
lock_type = LOCK_WRITE_HOLD;
break;
case VM_GET_MEMSEG:
case VM_MMAP_GETNEXT:
case VM_LAPIC_IRQ:
case VM_INJECT_NMI:
case VM_IOAPIC_ASSERT_IRQ:
case VM_IOAPIC_DEASSERT_IRQ:
case VM_IOAPIC_PULSE_IRQ:
case VM_LAPIC_MSI:
case VM_LAPIC_LOCAL_IRQ:
case VM_GET_X2APIC_STATE:
case VM_RTC_READ:
case VM_RTC_WRITE:
case VM_RTC_SETTIME:
case VM_RTC_GETTIME:
case VM_PPTDEV_DISABLE_MSIX:
case VM_DEVMEM_GETOFFSET:
case VM_TRACK_DIRTY_PAGES:
case VM_NPT_OPERATION:
vmm_read_lock(sc);
lock_type = LOCK_READ_HOLD;
break;
case VM_DATA_READ:
case VM_DATA_WRITE:
if (ddi_copyin(datap, &vcpu, sizeof (vcpu), md)) {
return (EFAULT);
}
if (vcpu == -1) {
vmm_write_lock(sc);
lock_type = LOCK_WRITE_HOLD;
} else if (vcpu >= 0 && vcpu < vm_get_maxcpus(sc->vmm_vm)) {
vcpu_lock_one(sc, vcpu);
lock_type = LOCK_VCPU;
} else {
return (EINVAL);
}
break;
case VM_GET_GPA_PMAP:
case VM_IOAPIC_PINCOUNT:
case VM_SUSPEND:
case VM_DESC_FPU_AREA:
case VM_SET_AUTODESTRUCT:
case VM_DESTROY_SELF:
case VM_DESTROY_PENDING:
case VM_VCPU_BARRIER:
default:
break;
}
switch (cmd) {
case VM_RUN: {
struct vm_entry entry;
if (ddi_copyin(datap, &entry, sizeof (entry), md)) {
error = EFAULT;
break;
}
if (!(curthread->t_schedflag & TS_VCPU))
smt_mark_as_vcpu();
error = vm_run(sc->vmm_vm, vcpu, &entry);
ASSERT(error != 0);
if (error < 0) {
const struct vm_exit *vme;
void *outp = entry.exit_data;
error = 0;
vme = vm_exitinfo(sc->vmm_vm, vcpu);
if (ddi_copyout(vme, outp, sizeof (*vme), md)) {
error = EFAULT;
}
}
break;
}
case VM_SUSPEND: {
struct vm_suspend vmsuspend;
if (ddi_copyin(datap, &vmsuspend, sizeof (vmsuspend), md)) {
error = EFAULT;
break;
}
error = vm_suspend(sc->vmm_vm, vmsuspend.how, vmsuspend.source);
break;
}
case VM_REINIT: {
struct vm_reinit reinit;
if (ddi_copyin(datap, &reinit, sizeof (reinit), md)) {
error = EFAULT;
break;
}
if ((error = vmm_drv_block_hook(sc, B_TRUE)) != 0) {
break;
}
error = vm_reinit(sc->vmm_vm, reinit.flags);
(void) vmm_drv_block_hook(sc, B_FALSE);
break;
}
case VM_STAT_DESC: {
struct vm_stat_desc statdesc;
if (ddi_copyin(datap, &statdesc, sizeof (statdesc), md)) {
error = EFAULT;
break;
}
error = vmm_stat_desc_copy(statdesc.index, statdesc.desc,
sizeof (statdesc.desc));
if (error == 0 &&
ddi_copyout(&statdesc, datap, sizeof (statdesc), md)) {
error = EFAULT;
break;
}
break;
}
case VM_STATS_IOC: {
struct vm_stats vmstats;
if (ddi_copyin(datap, &vmstats, sizeof (vmstats), md)) {
error = EFAULT;
break;
}
hrt2tv(gethrtime(), &vmstats.tv);
error = vmm_stat_copy(sc->vmm_vm, vmstats.cpuid, vmstats.index,
nitems(vmstats.statbuf),
&vmstats.num_entries, vmstats.statbuf);
if (error == 0 &&
ddi_copyout(&vmstats, datap, sizeof (vmstats), md)) {
error = EFAULT;
break;
}
break;
}
case VM_PPTDEV_MSI: {
struct vm_pptdev_msi pptmsi;
if (ddi_copyin(datap, &pptmsi, sizeof (pptmsi), md)) {
error = EFAULT;
break;
}
error = ppt_setup_msi(sc->vmm_vm, pptmsi.vcpu, pptmsi.pptfd,
pptmsi.addr, pptmsi.msg, pptmsi.numvec);
break;
}
case VM_PPTDEV_MSIX: {
struct vm_pptdev_msix pptmsix;
if (ddi_copyin(datap, &pptmsix, sizeof (pptmsix), md)) {
error = EFAULT;
break;
}
error = ppt_setup_msix(sc->vmm_vm, pptmsix.vcpu, pptmsix.pptfd,
pptmsix.idx, pptmsix.addr, pptmsix.msg,
pptmsix.vector_control);
break;
}
case VM_PPTDEV_DISABLE_MSIX: {
struct vm_pptdev pptdev;
if (ddi_copyin(datap, &pptdev, sizeof (pptdev), md)) {
error = EFAULT;
break;
}
error = ppt_disable_msix(sc->vmm_vm, pptdev.pptfd);
break;
}
case VM_MAP_PPTDEV_MMIO: {
struct vm_pptdev_mmio pptmmio;
if (ddi_copyin(datap, &pptmmio, sizeof (pptmmio), md)) {
error = EFAULT;
break;
}
error = ppt_map_mmio(sc->vmm_vm, pptmmio.pptfd, pptmmio.gpa,
pptmmio.len, pptmmio.hpa);
break;
}
case VM_UNMAP_PPTDEV_MMIO: {
struct vm_pptdev_mmio pptmmio;
if (ddi_copyin(datap, &pptmmio, sizeof (pptmmio), md)) {
error = EFAULT;
break;
}
error = ppt_unmap_mmio(sc->vmm_vm, pptmmio.pptfd, pptmmio.gpa,
pptmmio.len);
break;
}
case VM_BIND_PPTDEV: {
struct vm_pptdev pptdev;
if (ddi_copyin(datap, &pptdev, sizeof (pptdev), md)) {
error = EFAULT;
break;
}
error = vm_assign_pptdev(sc->vmm_vm, pptdev.pptfd);
break;
}
case VM_UNBIND_PPTDEV: {
struct vm_pptdev pptdev;
if (ddi_copyin(datap, &pptdev, sizeof (pptdev), md)) {
error = EFAULT;
break;
}
error = vm_unassign_pptdev(sc->vmm_vm, pptdev.pptfd);
break;
}
case VM_GET_PPTDEV_LIMITS: {
struct vm_pptdev_limits pptlimits;
if (ddi_copyin(datap, &pptlimits, sizeof (pptlimits), md)) {
error = EFAULT;
break;
}
error = ppt_get_limits(sc->vmm_vm, pptlimits.pptfd,
&pptlimits.msi_limit, &pptlimits.msix_limit);
if (error == 0 &&
ddi_copyout(&pptlimits, datap, sizeof (pptlimits), md)) {
error = EFAULT;
break;
}
break;
}
case VM_INJECT_EXCEPTION: {
struct vm_exception vmexc;
if (ddi_copyin(datap, &vmexc, sizeof (vmexc), md)) {
error = EFAULT;
break;
}
error = vm_inject_exception(sc->vmm_vm, vcpu, vmexc.vector,
vmexc.error_code_valid != 0, vmexc.error_code,
vmexc.restart_instruction != 0);
break;
}
case VM_INJECT_NMI: {
struct vm_nmi vmnmi;
if (ddi_copyin(datap, &vmnmi, sizeof (vmnmi), md)) {
error = EFAULT;
break;
}
error = vm_inject_nmi(sc->vmm_vm, vmnmi.cpuid);
break;
}
case VM_LAPIC_IRQ: {
struct vm_lapic_irq vmirq;
if (ddi_copyin(datap, &vmirq, sizeof (vmirq), md)) {
error = EFAULT;
break;
}
error = lapic_intr_edge(sc->vmm_vm, vmirq.cpuid, vmirq.vector);
break;
}
case VM_LAPIC_LOCAL_IRQ: {
struct vm_lapic_irq vmirq;
if (ddi_copyin(datap, &vmirq, sizeof (vmirq), md)) {
error = EFAULT;
break;
}
error = lapic_set_local_intr(sc->vmm_vm, vmirq.cpuid,
vmirq.vector);
break;
}
case VM_LAPIC_MSI: {
struct vm_lapic_msi vmmsi;
if (ddi_copyin(datap, &vmmsi, sizeof (vmmsi), md)) {
error = EFAULT;
break;
}
error = lapic_intr_msi(sc->vmm_vm, vmmsi.addr, vmmsi.msg);
break;
}
case VM_IOAPIC_ASSERT_IRQ: {
struct vm_ioapic_irq ioapic_irq;
if (ddi_copyin(datap, &ioapic_irq, sizeof (ioapic_irq), md)) {
error = EFAULT;
break;
}
error = vioapic_assert_irq(sc->vmm_vm, ioapic_irq.irq);
break;
}
case VM_IOAPIC_DEASSERT_IRQ: {
struct vm_ioapic_irq ioapic_irq;
if (ddi_copyin(datap, &ioapic_irq, sizeof (ioapic_irq), md)) {
error = EFAULT;
break;
}
error = vioapic_deassert_irq(sc->vmm_vm, ioapic_irq.irq);
break;
}
case VM_IOAPIC_PULSE_IRQ: {
struct vm_ioapic_irq ioapic_irq;
if (ddi_copyin(datap, &ioapic_irq, sizeof (ioapic_irq), md)) {
error = EFAULT;
break;
}
error = vioapic_pulse_irq(sc->vmm_vm, ioapic_irq.irq);
break;
}
case VM_IOAPIC_PINCOUNT: {
int pincount;
pincount = vioapic_pincount(sc->vmm_vm);
if (ddi_copyout(&pincount, datap, sizeof (int), md)) {
error = EFAULT;
break;
}
break;
}
case VM_DESC_FPU_AREA: {
struct vm_fpu_desc desc;
void *buf = NULL;
if (ddi_copyin(datap, &desc, sizeof (desc), md)) {
error = EFAULT;
break;
}
if (desc.vfd_num_entries > 64) {
error = EINVAL;
break;
}
const size_t buf_sz = sizeof (struct vm_fpu_desc_entry) *
desc.vfd_num_entries;
if (buf_sz != 0) {
buf = kmem_zalloc(buf_sz, KM_SLEEP);
}
CTASSERT(sizeof (struct vm_fpu_desc_entry) ==
sizeof (hma_xsave_state_desc_t));
size_t req_size;
const uint_t max_entries = hma_fpu_describe_xsave_state(
(hma_xsave_state_desc_t *)buf,
desc.vfd_num_entries,
&req_size);
desc.vfd_req_size = req_size;
desc.vfd_num_entries = max_entries;
if (buf_sz != 0) {
if (ddi_copyout(buf, desc.vfd_entry_data, buf_sz, md)) {
error = EFAULT;
}
kmem_free(buf, buf_sz);
}
if (error == 0) {
if (ddi_copyout(&desc, datap, sizeof (desc), md)) {
error = EFAULT;
}
}
break;
}
case VM_SET_AUTODESTRUCT: {
mutex_enter(&vmm_mtx);
if (arg != 0) {
sc->vmm_flags |= VMM_AUTODESTROY;
} else {
sc->vmm_flags &= ~VMM_AUTODESTROY;
}
mutex_exit(&vmm_mtx);
break;
}
case VM_DESTROY_SELF: {
bool hma_release = false;
mutex_enter(&vmm_mtx);
VERIFY0(vmm_destroy_locked(sc, VDO_DEFAULT, &hma_release));
mutex_exit(&vmm_mtx);
if (hma_release) {
vmm_hma_release();
}
break;
}
case VM_DESTROY_PENDING: {
*rvalp = 0;
break;
}
case VM_ISA_ASSERT_IRQ: {
struct vm_isa_irq isa_irq;
if (ddi_copyin(datap, &isa_irq, sizeof (isa_irq), md)) {
error = EFAULT;
break;
}
error = vatpic_assert_irq(sc->vmm_vm, isa_irq.atpic_irq);
if (error == 0 && isa_irq.ioapic_irq != -1) {
error = vioapic_assert_irq(sc->vmm_vm,
isa_irq.ioapic_irq);
}
break;
}
case VM_ISA_DEASSERT_IRQ: {
struct vm_isa_irq isa_irq;
if (ddi_copyin(datap, &isa_irq, sizeof (isa_irq), md)) {
error = EFAULT;
break;
}
error = vatpic_deassert_irq(sc->vmm_vm, isa_irq.atpic_irq);
if (error == 0 && isa_irq.ioapic_irq != -1) {
error = vioapic_deassert_irq(sc->vmm_vm,
isa_irq.ioapic_irq);
}
break;
}
case VM_ISA_PULSE_IRQ: {
struct vm_isa_irq isa_irq;
if (ddi_copyin(datap, &isa_irq, sizeof (isa_irq), md)) {
error = EFAULT;
break;
}
error = vatpic_pulse_irq(sc->vmm_vm, isa_irq.atpic_irq);
if (error == 0 && isa_irq.ioapic_irq != -1) {
error = vioapic_pulse_irq(sc->vmm_vm,
isa_irq.ioapic_irq);
}
break;
}
case VM_ISA_SET_IRQ_TRIGGER: {
struct vm_isa_irq_trigger isa_irq_trigger;
if (ddi_copyin(datap, &isa_irq_trigger,
sizeof (isa_irq_trigger), md)) {
error = EFAULT;
break;
}
error = vatpic_set_irq_trigger(sc->vmm_vm,
isa_irq_trigger.atpic_irq, isa_irq_trigger.trigger);
break;
}
case VM_MMAP_GETNEXT: {
struct vm_memmap mm;
if (ddi_copyin(datap, &mm, sizeof (mm), md)) {
error = EFAULT;
break;
}
error = vm_mmap_getnext(sc->vmm_vm, &mm.gpa, &mm.segid,
(uintptr_t *)&mm.segoff, &mm.len, &mm.prot, &mm.flags);
if (error == 0 && ddi_copyout(&mm, datap, sizeof (mm), md)) {
error = EFAULT;
break;
}
break;
}
case VM_MMAP_MEMSEG: {
struct vm_memmap mm;
if (ddi_copyin(datap, &mm, sizeof (mm), md)) {
error = EFAULT;
break;
}
error = vm_mmap_memseg(sc->vmm_vm, mm.gpa, mm.segid,
(uintptr_t)mm.segoff, mm.len, mm.prot, mm.flags);
break;
}
case VM_MUNMAP_MEMSEG: {
struct vm_munmap mu;
if (ddi_copyin(datap, &mu, sizeof (mu), md)) {
error = EFAULT;
break;
}
error = vm_munmap_memseg(sc->vmm_vm, mu.gpa, mu.len);
break;
}
case VM_ALLOC_MEMSEG: {
struct vm_memseg vmseg;
if (ddi_copyin(datap, &vmseg, sizeof (vmseg), md)) {
error = EFAULT;
break;
}
error = vmmdev_alloc_memseg(sc, &vmseg);
break;
}
case VM_GET_MEMSEG: {
struct vm_memseg vmseg;
if (ddi_copyin(datap, &vmseg, sizeof (vmseg), md)) {
error = EFAULT;
break;
}
error = vmmdev_get_memseg(sc, &vmseg);
if (error == 0 &&
ddi_copyout(&vmseg, datap, sizeof (vmseg), md)) {
error = EFAULT;
break;
}
break;
}
case VM_GET_REGISTER: {
struct vm_register vmreg;
if (ddi_copyin(datap, &vmreg, sizeof (vmreg), md)) {
error = EFAULT;
break;
}
error = vm_get_register(sc->vmm_vm, vcpu, vmreg.regnum,
&vmreg.regval);
if (error == 0 &&
ddi_copyout(&vmreg, datap, sizeof (vmreg), md)) {
error = EFAULT;
break;
}
break;
}
case VM_SET_REGISTER: {
struct vm_register vmreg;
if (ddi_copyin(datap, &vmreg, sizeof (vmreg), md)) {
error = EFAULT;
break;
}
error = vm_set_register(sc->vmm_vm, vcpu, vmreg.regnum,
vmreg.regval);
break;
}
case VM_SET_SEGMENT_DESCRIPTOR: {
struct vm_seg_desc vmsegd;
if (ddi_copyin(datap, &vmsegd, sizeof (vmsegd), md)) {
error = EFAULT;
break;
}
error = vm_set_seg_desc(sc->vmm_vm, vcpu, vmsegd.regnum,
&vmsegd.desc);
break;
}
case VM_GET_SEGMENT_DESCRIPTOR: {
struct vm_seg_desc vmsegd;
if (ddi_copyin(datap, &vmsegd, sizeof (vmsegd), md)) {
error = EFAULT;
break;
}
error = vm_get_seg_desc(sc->vmm_vm, vcpu, vmsegd.regnum,
&vmsegd.desc);
if (error == 0 &&
ddi_copyout(&vmsegd, datap, sizeof (vmsegd), md)) {
error = EFAULT;
break;
}
break;
}
case VM_GET_REGISTER_SET: {
struct vm_register_set vrs;
int regnums[VM_REG_LAST];
uint64_t regvals[VM_REG_LAST];
if (ddi_copyin(datap, &vrs, sizeof (vrs), md)) {
error = EFAULT;
break;
}
if (vrs.count > VM_REG_LAST || vrs.count == 0) {
error = EINVAL;
break;
}
if (ddi_copyin(vrs.regnums, regnums,
sizeof (int) * vrs.count, md)) {
error = EFAULT;
break;
}
error = 0;
for (uint_t i = 0; i < vrs.count && error == 0; i++) {
if (regnums[i] < 0) {
error = EINVAL;
break;
}
error = vm_get_register(sc->vmm_vm, vcpu, regnums[i],
®vals[i]);
}
if (error == 0 && ddi_copyout(regvals, vrs.regvals,
sizeof (uint64_t) * vrs.count, md)) {
error = EFAULT;
}
break;
}
case VM_SET_REGISTER_SET: {
struct vm_register_set vrs;
int regnums[VM_REG_LAST];
uint64_t regvals[VM_REG_LAST];
if (ddi_copyin(datap, &vrs, sizeof (vrs), md)) {
error = EFAULT;
break;
}
if (vrs.count > VM_REG_LAST || vrs.count == 0) {
error = EINVAL;
break;
}
if (ddi_copyin(vrs.regnums, regnums,
sizeof (int) * vrs.count, md)) {
error = EFAULT;
break;
}
if (ddi_copyin(vrs.regvals, regvals,
sizeof (uint64_t) * vrs.count, md)) {
error = EFAULT;
break;
}
error = 0;
for (uint_t i = 0; i < vrs.count && error == 0; i++) {
if (regnums[i] < 0) {
error = EINVAL;
break;
}
error = vm_set_register(sc->vmm_vm, vcpu, regnums[i],
regvals[i]);
}
break;
}
case VM_RESET_CPU: {
struct vm_vcpu_reset vvr;
if (ddi_copyin(datap, &vvr, sizeof (vvr), md)) {
error = EFAULT;
break;
}
if (vvr.kind != VRK_RESET && vvr.kind != VRK_INIT) {
error = EINVAL;
}
error = vcpu_arch_reset(sc->vmm_vm, vcpu, vvr.kind == VRK_INIT);
break;
}
case VM_GET_RUN_STATE: {
struct vm_run_state vrs;
bzero(&vrs, sizeof (vrs));
error = vm_get_run_state(sc->vmm_vm, vcpu, &vrs.state,
&vrs.sipi_vector);
if (error == 0) {
if (ddi_copyout(&vrs, datap, sizeof (vrs), md)) {
error = EFAULT;
break;
}
}
break;
}
case VM_SET_RUN_STATE: {
struct vm_run_state vrs;
if (ddi_copyin(datap, &vrs, sizeof (vrs), md)) {
error = EFAULT;
break;
}
error = vm_set_run_state(sc->vmm_vm, vcpu, vrs.state,
vrs.sipi_vector);
break;
}
case VM_GET_FPU: {
struct vm_fpu_state req;
const size_t max_len = (PAGESIZE * 2);
void *kbuf;
if (ddi_copyin(datap, &req, sizeof (req), md)) {
error = EFAULT;
break;
}
if (req.len > max_len || req.len == 0) {
error = EINVAL;
break;
}
kbuf = kmem_zalloc(req.len, KM_SLEEP);
error = vm_get_fpu(sc->vmm_vm, vcpu, kbuf, req.len);
if (error == 0) {
if (ddi_copyout(kbuf, req.buf, req.len, md)) {
error = EFAULT;
}
}
kmem_free(kbuf, req.len);
break;
}
case VM_SET_FPU: {
struct vm_fpu_state req;
const size_t max_len = (PAGESIZE * 2);
void *kbuf;
if (ddi_copyin(datap, &req, sizeof (req), md)) {
error = EFAULT;
break;
}
if (req.len > max_len || req.len == 0) {
error = EINVAL;
break;
}
kbuf = kmem_alloc(req.len, KM_SLEEP);
if (ddi_copyin(req.buf, kbuf, req.len, md)) {
error = EFAULT;
} else {
error = vm_set_fpu(sc->vmm_vm, vcpu, kbuf, req.len);
}
kmem_free(kbuf, req.len);
break;
}
case VM_GET_CPUID: {
struct vm_vcpu_cpuid_config cfg;
struct vcpu_cpuid_entry *entries = NULL;
if (ddi_copyin(datap, &cfg, sizeof (cfg), md)) {
error = EFAULT;
break;
}
if (cfg.vvcc_nent > VMM_MAX_CPUID_ENTRIES) {
error = EINVAL;
break;
}
const size_t entries_size =
cfg.vvcc_nent * sizeof (struct vcpu_cpuid_entry);
if (entries_size != 0) {
entries = kmem_zalloc(entries_size, KM_SLEEP);
}
vcpu_cpuid_config_t vm_cfg = {
.vcc_nent = cfg.vvcc_nent,
.vcc_entries = entries,
};
error = vm_get_cpuid(sc->vmm_vm, vcpu, &vm_cfg);
cfg.vvcc_flags = vm_cfg.vcc_flags;
cfg.vvcc_nent = vm_cfg.vcc_nent;
if (entries != NULL) {
if (error == 0 && ddi_copyout(entries, cfg.vvcc_entries,
entries_size, md) != 0) {
error = EFAULT;
}
kmem_free(entries, entries_size);
}
if (ddi_copyout(&cfg, datap, sizeof (cfg), md) != 0) {
error = EFAULT;
}
break;
}
case VM_SET_CPUID: {
struct vm_vcpu_cpuid_config cfg;
struct vcpu_cpuid_entry *entries = NULL;
size_t entries_size = 0;
if (ddi_copyin(datap, &cfg, sizeof (cfg), md)) {
error = EFAULT;
break;
}
if (cfg.vvcc_nent > VMM_MAX_CPUID_ENTRIES) {
error = EFBIG;
break;
}
if ((cfg.vvcc_flags & VCC_FLAG_LEGACY_HANDLING) != 0) {
if (cfg.vvcc_nent != 0) {
error = EINVAL;
break;
}
} else if (cfg.vvcc_nent != 0) {
entries_size =
cfg.vvcc_nent * sizeof (struct vcpu_cpuid_entry);
entries = kmem_alloc(entries_size, KM_SLEEP);
if (ddi_copyin(cfg.vvcc_entries, entries, entries_size,
md) != 0) {
error = EFAULT;
kmem_free(entries, entries_size);
break;
}
}
vcpu_cpuid_config_t vm_cfg = {
.vcc_flags = cfg.vvcc_flags,
.vcc_nent = cfg.vvcc_nent,
.vcc_entries = entries,
};
error = vm_set_cpuid(sc->vmm_vm, vcpu, &vm_cfg);
if (entries != NULL) {
kmem_free(entries, entries_size);
}
break;
}
case VM_LEGACY_CPUID: {
struct vm_legacy_cpuid vlc;
if (ddi_copyin(datap, &vlc, sizeof (vlc), md)) {
error = EFAULT;
break;
}
vlc.vlc_vcpuid = vcpu;
legacy_emulate_cpuid(sc->vmm_vm, vcpu, &vlc.vlc_eax,
&vlc.vlc_ebx, &vlc.vlc_ecx, &vlc.vlc_edx);
if (ddi_copyout(&vlc, datap, sizeof (vlc), md)) {
error = EFAULT;
break;
}
break;
}
case VM_SET_KERNEMU_DEV:
case VM_GET_KERNEMU_DEV: {
struct vm_readwrite_kernemu_device kemu;
size_t size = 0;
if (ddi_copyin(datap, &kemu, sizeof (kemu), md)) {
error = EFAULT;
break;
}
if (kemu.access_width > 3) {
error = EINVAL;
break;
}
size = (1 << kemu.access_width);
ASSERT(size >= 1 && size <= 8);
if (cmd == VM_SET_KERNEMU_DEV) {
error = vm_service_mmio_write(sc->vmm_vm, vcpu,
kemu.gpa, kemu.value, size);
} else {
error = vm_service_mmio_read(sc->vmm_vm, vcpu,
kemu.gpa, &kemu.value, size);
}
if (error == 0) {
if (ddi_copyout(&kemu, datap, sizeof (kemu), md)) {
error = EFAULT;
break;
}
}
break;
}
case VM_GET_CAPABILITY: {
struct vm_capability vmcap;
if (ddi_copyin(datap, &vmcap, sizeof (vmcap), md)) {
error = EFAULT;
break;
}
error = vm_get_capability(sc->vmm_vm, vcpu, vmcap.captype,
&vmcap.capval);
if (error == 0 &&
ddi_copyout(&vmcap, datap, sizeof (vmcap), md)) {
error = EFAULT;
break;
}
break;
}
case VM_SET_CAPABILITY: {
struct vm_capability vmcap;
if (ddi_copyin(datap, &vmcap, sizeof (vmcap), md)) {
error = EFAULT;
break;
}
error = vm_set_capability(sc->vmm_vm, vcpu, vmcap.captype,
vmcap.capval);
break;
}
case VM_SET_X2APIC_STATE: {
struct vm_x2apic x2apic;
if (ddi_copyin(datap, &x2apic, sizeof (x2apic), md)) {
error = EFAULT;
break;
}
error = vm_set_x2apic_state(sc->vmm_vm, vcpu, x2apic.state);
break;
}
case VM_GET_X2APIC_STATE: {
struct vm_x2apic x2apic;
if (ddi_copyin(datap, &x2apic, sizeof (x2apic), md)) {
error = EFAULT;
break;
}
error = vm_get_x2apic_state(sc->vmm_vm, x2apic.cpuid,
&x2apic.state);
if (error == 0 &&
ddi_copyout(&x2apic, datap, sizeof (x2apic), md)) {
error = EFAULT;
break;
}
break;
}
case VM_GET_GPA_PMAP: {
error = EINVAL;
break;
}
case VM_GET_HPET_CAPABILITIES: {
struct vm_hpet_cap hpetcap;
error = vhpet_getcap(&hpetcap);
if (error == 0 &&
ddi_copyout(&hpetcap, datap, sizeof (hpetcap), md)) {
error = EFAULT;
break;
}
break;
}
case VM_GLA2GPA: {
struct vm_gla2gpa gg;
if (ddi_copyin(datap, &gg, sizeof (gg), md)) {
error = EFAULT;
break;
}
gg.vcpuid = vcpu;
error = vm_gla2gpa(sc->vmm_vm, vcpu, &gg.paging, gg.gla,
gg.prot, &gg.gpa, &gg.fault);
if (error == 0 && ddi_copyout(&gg, datap, sizeof (gg), md)) {
error = EFAULT;
break;
}
break;
}
case VM_GLA2GPA_NOFAULT: {
struct vm_gla2gpa gg;
if (ddi_copyin(datap, &gg, sizeof (gg), md)) {
error = EFAULT;
break;
}
gg.vcpuid = vcpu;
error = vm_gla2gpa_nofault(sc->vmm_vm, vcpu, &gg.paging,
gg.gla, gg.prot, &gg.gpa, &gg.fault);
if (error == 0 && ddi_copyout(&gg, datap, sizeof (gg), md)) {
error = EFAULT;
break;
}
break;
}
case VM_ACTIVATE_CPU:
error = vm_activate_cpu(sc->vmm_vm, vcpu);
break;
case VM_SUSPEND_CPU:
if (ddi_copyin(datap, &vcpu, sizeof (vcpu), md)) {
error = EFAULT;
} else {
error = vm_suspend_cpu(sc->vmm_vm, vcpu);
}
break;
case VM_RESUME_CPU:
if (ddi_copyin(datap, &vcpu, sizeof (vcpu), md)) {
error = EFAULT;
} else {
error = vm_resume_cpu(sc->vmm_vm, vcpu);
}
break;
case VM_VCPU_BARRIER:
vcpu = arg;
error = vm_vcpu_barrier(sc->vmm_vm, vcpu);
break;
case VM_GET_CPUS: {
struct vm_cpuset vm_cpuset;
cpuset_t tempset;
void *srcp = &tempset;
int size;
if (ddi_copyin(datap, &vm_cpuset, sizeof (vm_cpuset), md)) {
error = EFAULT;
break;
}
size = vm_cpuset.cpusetsize;
if (size <= 0 || size > sizeof (cpuset_t)) {
error = ERANGE;
}
if (size <= sizeof (tempset.cpub[0])) {
srcp = &tempset.cpub[0];
}
if (vm_cpuset.which == VM_ACTIVE_CPUS) {
tempset = vm_active_cpus(sc->vmm_vm);
} else if (vm_cpuset.which == VM_DEBUG_CPUS) {
tempset = vm_debug_cpus(sc->vmm_vm);
} else {
error = EINVAL;
}
ASSERT(size > 0 && size <= sizeof (tempset));
if (error == 0 &&
ddi_copyout(srcp, vm_cpuset.cpus, size, md)) {
error = EFAULT;
break;
}
break;
}
case VM_SET_INTINFO: {
struct vm_intinfo vmii;
if (ddi_copyin(datap, &vmii, sizeof (vmii), md)) {
error = EFAULT;
break;
}
error = vm_exit_intinfo(sc->vmm_vm, vcpu, vmii.info1);
break;
}
case VM_GET_INTINFO: {
struct vm_intinfo vmii;
vmii.vcpuid = vcpu;
error = vm_get_intinfo(sc->vmm_vm, vcpu, &vmii.info1,
&vmii.info2);
if (error == 0 &&
ddi_copyout(&vmii, datap, sizeof (vmii), md)) {
error = EFAULT;
break;
}
break;
}
case VM_RTC_WRITE: {
struct vm_rtc_data rtcdata;
if (ddi_copyin(datap, &rtcdata, sizeof (rtcdata), md)) {
error = EFAULT;
break;
}
error = vrtc_nvram_write(sc->vmm_vm, rtcdata.offset,
rtcdata.value);
break;
}
case VM_RTC_READ: {
struct vm_rtc_data rtcdata;
if (ddi_copyin(datap, &rtcdata, sizeof (rtcdata), md)) {
error = EFAULT;
break;
}
error = vrtc_nvram_read(sc->vmm_vm, rtcdata.offset,
&rtcdata.value);
if (error == 0 &&
ddi_copyout(&rtcdata, datap, sizeof (rtcdata), md)) {
error = EFAULT;
break;
}
break;
}
case VM_RTC_SETTIME: {
timespec_t ts;
if (ddi_copyin(datap, &ts, sizeof (ts), md)) {
error = EFAULT;
break;
}
error = vrtc_set_time(sc->vmm_vm, &ts);
break;
}
case VM_RTC_GETTIME: {
timespec_t ts;
vrtc_get_time(sc->vmm_vm, &ts);
if (ddi_copyout(&ts, datap, sizeof (ts), md)) {
error = EFAULT;
break;
}
break;
}
case VM_PMTMR_LOCATE: {
uint16_t port = arg;
error = vpmtmr_set_location(sc->vmm_vm, port);
break;
}
case VM_RESTART_INSTRUCTION:
error = vm_restart_instruction(sc->vmm_vm, vcpu);
break;
case VM_SET_TOPOLOGY: {
struct vm_cpu_topology topo;
if (ddi_copyin(datap, &topo, sizeof (topo), md) != 0) {
error = EFAULT;
break;
}
error = vm_set_topology(sc->vmm_vm, topo.sockets, topo.cores,
topo.threads, topo.maxcpus);
break;
}
case VM_GET_TOPOLOGY: {
struct vm_cpu_topology topo;
vm_get_topology(sc->vmm_vm, &topo.sockets, &topo.cores,
&topo.threads, &topo.maxcpus);
if (ddi_copyout(&topo, datap, sizeof (topo), md) != 0) {
error = EFAULT;
break;
}
break;
}
case VM_DEVMEM_GETOFFSET: {
struct vm_devmem_offset vdo;
vmm_devmem_entry_t *de;
if (ddi_copyin(datap, &vdo, sizeof (vdo), md) != 0) {
error = EFAULT;
break;
}
de = vmmdev_devmem_find(sc, vdo.segid);
if (de != NULL) {
vdo.offset = de->vde_off;
if (ddi_copyout(&vdo, datap, sizeof (vdo), md) != 0) {
error = EFAULT;
}
} else {
error = ENOENT;
}
break;
}
case VM_TRACK_DIRTY_PAGES: {
const size_t max_track_region_len = 8 * PAGESIZE * 8 * PAGESIZE;
struct vmm_dirty_tracker tracker;
uint8_t *bitmap;
size_t len;
if (ddi_copyin(datap, &tracker, sizeof (tracker), md) != 0) {
error = EFAULT;
break;
}
if ((tracker.vdt_start_gpa & PAGEOFFSET) != 0) {
error = EINVAL;
break;
}
if (tracker.vdt_len == 0) {
break;
}
if ((tracker.vdt_len & PAGEOFFSET) != 0) {
error = EINVAL;
break;
}
if (tracker.vdt_len > max_track_region_len) {
error = EINVAL;
break;
}
len = roundup(tracker.vdt_len / PAGESIZE, 8) / 8;
bitmap = kmem_zalloc(len, KM_SLEEP);
error = vm_track_dirty_pages(sc->vmm_vm, tracker.vdt_start_gpa,
tracker.vdt_len, bitmap);
if (error == 0 &&
ddi_copyout(bitmap, tracker.vdt_pfns, len, md) != 0) {
error = EFAULT;
}
kmem_free(bitmap, len);
break;
}
case VM_NPT_OPERATION: {
struct vm_npt_operation vno;
uint8_t *bitmap = NULL;
uint64_t bitmap_size = 0;
if (ddi_copyin(datap, &vno, sizeof (vno), md) != 0) {
error = EFAULT;
break;
}
if ((vno.vno_gpa & PAGEOFFSET) != 0 ||
(vno.vno_len & PAGEOFFSET) != 0) {
error = EINVAL;
break;
}
if ((UINT64_MAX - vno.vno_len) < vno.vno_gpa) {
error = EOVERFLOW;
break;
}
if ((vno.vno_operation &
(VNO_FLAG_BITMAP_IN | VNO_FLAG_BITMAP_OUT)) != 0) {
if (vno.vno_len == 0) {
error = EINVAL;
break;
}
const uint64_t max_bitmap_size = 8 * PAGESIZE;
bitmap_size = roundup(vno.vno_len / PAGESIZE, 8) / 8;
if (bitmap_size > max_bitmap_size) {
error = E2BIG;
break;
}
bitmap = kmem_zalloc(bitmap_size, KM_SLEEP);
}
if ((vno.vno_operation & VNO_FLAG_BITMAP_IN) != 0) {
ASSERT(bitmap != NULL);
if (ddi_copyin(vno.vno_bitmap, bitmap, bitmap_size,
md) != 0) {
error = EFAULT;
}
}
if (error == 0) {
error = vm_npt_do_operation(sc->vmm_vm, vno.vno_gpa,
vno.vno_len, vno.vno_operation, bitmap, rvalp);
}
if ((vno.vno_operation & VNO_FLAG_BITMAP_OUT) != 0 &&
error == 0) {
ASSERT(bitmap != NULL);
if (ddi_copyout(bitmap, vno.vno_bitmap, bitmap_size,
md) != 0) {
error = EFAULT;
}
}
if (bitmap != NULL) {
kmem_free(bitmap, bitmap_size);
}
break;
}
case VM_WRLOCK_CYCLE: {
break;
}
case VM_DATA_READ: {
struct vm_data_xfer vdx;
if (ddi_copyin(datap, &vdx, sizeof (vdx), md) != 0) {
error = EFAULT;
break;
}
if ((vdx.vdx_flags & ~VDX_FLAGS_VALID) != 0) {
error = EINVAL;
break;
}
if (vdx.vdx_len > VM_DATA_XFER_LIMIT) {
error = EFBIG;
break;
}
const size_t len = vdx.vdx_len;
void *buf = NULL;
if (len != 0) {
const void *udata = vdx.vdx_data;
buf = kmem_alloc(len, KM_SLEEP);
if ((vdx.vdx_flags & VDX_FLAG_READ_COPYIN) == 0) {
bzero(buf, len);
} else if (ddi_copyin(udata, buf, len, md) != 0) {
kmem_free(buf, len);
error = EFAULT;
break;
}
}
vdx.vdx_result_len = 0;
vmm_data_req_t req = {
.vdr_class = vdx.vdx_class,
.vdr_version = vdx.vdx_version,
.vdr_flags = vdx.vdx_flags,
.vdr_len = len,
.vdr_data = buf,
.vdr_result_len = &vdx.vdx_result_len,
.vdr_vcpuid = vdx.vdx_vcpuid,
};
error = vmm_data_read(sc->vmm_vm, &req);
if (error == 0 && buf != NULL) {
if (ddi_copyout(buf, vdx.vdx_data, len, md) != 0) {
error = EFAULT;
}
}
if (ddi_copyout(&vdx, datap, sizeof (vdx), md) != 0) {
error = (error != 0) ? error : EFAULT;
}
if (buf != NULL) {
kmem_free(buf, len);
}
break;
}
case VM_DATA_WRITE: {
struct vm_data_xfer vdx;
if (ddi_copyin(datap, &vdx, sizeof (vdx), md) != 0) {
error = EFAULT;
break;
}
if ((vdx.vdx_flags & ~VDX_FLAGS_VALID) != 0) {
error = EINVAL;
break;
}
if (vdx.vdx_len > VM_DATA_XFER_LIMIT) {
error = EFBIG;
break;
}
const size_t len = vdx.vdx_len;
void *buf = NULL;
if (len != 0) {
buf = kmem_alloc(len, KM_SLEEP);
if (ddi_copyin(vdx.vdx_data, buf, len, md) != 0) {
kmem_free(buf, len);
error = EFAULT;
break;
}
}
vdx.vdx_result_len = 0;
vmm_data_req_t req = {
.vdr_class = vdx.vdx_class,
.vdr_version = vdx.vdx_version,
.vdr_flags = vdx.vdx_flags,
.vdr_len = len,
.vdr_data = buf,
.vdr_result_len = &vdx.vdx_result_len,
.vdr_vcpuid = vdx.vdx_vcpuid,
};
if (vmm_allow_state_writes != 0) {
error = vmm_data_write(sc->vmm_vm, &req);
} else {
error = EPERM;
}
if (error == 0 && buf != NULL &&
(vdx.vdx_flags & VDX_FLAG_WRITE_COPYOUT) != 0) {
if (ddi_copyout(buf, vdx.vdx_data, len, md) != 0) {
error = EFAULT;
}
}
if (ddi_copyout(&vdx, datap, sizeof (vdx), md) != 0) {
error = (error != 0) ? error : EFAULT;
}
if (buf != NULL) {
kmem_free(buf, len);
}
break;
}
case VM_PAUSE: {
error = vm_pause_instance(sc->vmm_vm);
break;
}
case VM_RESUME: {
error = vm_resume_instance(sc->vmm_vm);
break;
}
default:
error = ENOTTY;
break;
}
switch (lock_type) {
case LOCK_NONE:
break;
case LOCK_VCPU:
vcpu_unlock_one(sc, vcpu);
break;
case LOCK_READ_HOLD:
vmm_read_unlock(sc);
break;
case LOCK_WRITE_HOLD:
vmm_write_unlock(sc);
break;
default:
panic("unexpected lock type");
break;
}
return (error);
}
static vmm_softc_t *
vmm_lookup(const char *name)
{
list_t *vml = &vmm_list;
vmm_softc_t *sc;
ASSERT(MUTEX_HELD(&vmm_mtx));
for (sc = list_head(vml); sc != NULL; sc = list_next(vml, sc)) {
if (strcmp(sc->vmm_name, name) == 0) {
break;
}
}
return (sc);
}
static boolean_t
vmm_hma_acquire(void)
{
ASSERT(MUTEX_NOT_HELD(&vmm_mtx));
mutex_enter(&vmmdev_mtx);
if (vmmdev_hma_reg == NULL) {
VERIFY3U(vmmdev_hma_ref, ==, 0);
vmmdev_hma_reg = hma_register(vmmdev_hvm_name);
if (vmmdev_hma_reg == NULL) {
cmn_err(CE_WARN, "%s HMA registration failed.",
vmmdev_hvm_name);
mutex_exit(&vmmdev_mtx);
return (B_FALSE);
}
}
vmmdev_hma_ref++;
mutex_exit(&vmmdev_mtx);
return (B_TRUE);
}
static void
vmm_hma_release(void)
{
ASSERT(MUTEX_NOT_HELD(&vmm_mtx));
mutex_enter(&vmmdev_mtx);
VERIFY3U(vmmdev_hma_ref, !=, 0);
vmmdev_hma_ref--;
if (vmmdev_hma_ref == 0) {
VERIFY(vmmdev_hma_reg != NULL);
hma_unregister(vmmdev_hma_reg);
vmmdev_hma_reg = NULL;
}
mutex_exit(&vmmdev_mtx);
}
static int
vmmdev_do_vm_create(const struct vm_create_req *req, cred_t *cr)
{
vmm_softc_t *sc = NULL;
minor_t minor;
int error = ENOMEM;
size_t len;
const char *name = req->name;
len = strnlen(name, VM_MAX_NAMELEN);
if (len == 0) {
return (EINVAL);
}
if (len >= VM_MAX_NAMELEN) {
return (ENAMETOOLONG);
}
if (strchr(name, '/') != NULL) {
return (EINVAL);
}
if (!vmm_hma_acquire())
return (ENXIO);
mutex_enter(&vmm_mtx);
if (vmm_lookup(name) != NULL) {
mutex_exit(&vmm_mtx);
vmm_hma_release();
return (EEXIST);
}
if (!INGLOBALZONE(curproc)) {
for (sc = list_head(&vmm_list); sc != NULL;
sc = list_next(&vmm_list, sc)) {
if (sc->vmm_zone == curzone) {
mutex_exit(&vmm_mtx);
vmm_hma_release();
return (EINVAL);
}
}
}
minor = id_alloc(vmm_minors);
if (ddi_soft_state_zalloc(vmm_statep, minor) != DDI_SUCCESS) {
goto fail;
} else if ((sc = ddi_get_soft_state(vmm_statep, minor)) == NULL) {
ddi_soft_state_free(vmm_statep, minor);
goto fail;
} else if (ddi_create_minor_node(vmmdev_dip, name, S_IFCHR, minor,
DDI_PSEUDO, 0) != DDI_SUCCESS) {
goto fail;
}
if (vmm_kstat_alloc(sc, minor, cr) != 0) {
goto fail;
}
error = vm_create(req->flags, &sc->vmm_vm);
if (error == 0) {
(void) strlcpy(sc->vmm_name, name, sizeof (sc->vmm_name));
sc->vmm_minor = minor;
list_create(&sc->vmm_devmem_list, sizeof (vmm_devmem_entry_t),
offsetof(vmm_devmem_entry_t, vde_node));
list_create(&sc->vmm_holds, sizeof (vmm_hold_t),
offsetof(vmm_hold_t, vmh_node));
cv_init(&sc->vmm_cv, NULL, CV_DEFAULT, NULL);
mutex_init(&sc->vmm_lease_lock, NULL, MUTEX_DEFAULT, NULL);
list_create(&sc->vmm_lease_list, sizeof (vmm_lease_t),
offsetof(vmm_lease_t, vml_node));
cv_init(&sc->vmm_lease_cv, NULL, CV_DEFAULT, NULL);
rw_init(&sc->vmm_rwlock, NULL, RW_DEFAULT, NULL);
sc->vmm_zone = crgetzone(cr);
zone_hold(sc->vmm_zone);
vmm_zsd_add_vm(sc);
vmm_kstat_init(sc);
list_insert_tail(&vmm_list, sc);
mutex_exit(&vmm_mtx);
return (0);
}
vmm_kstat_fini(sc);
ddi_remove_minor_node(vmmdev_dip, name);
fail:
id_free(vmm_minors, minor);
if (sc != NULL) {
ddi_soft_state_free(vmm_statep, minor);
}
mutex_exit(&vmm_mtx);
vmm_hma_release();
return (error);
}
int
vmm_drv_hold(file_t *fp, cred_t *cr, vmm_hold_t **holdp)
{
vnode_t *vp = fp->f_vnode;
const dev_t dev = vp->v_rdev;
vmm_softc_t *sc;
vmm_hold_t *hold;
int err = 0;
if (vp->v_type != VCHR) {
return (ENXIO);
}
const major_t major = getmajor(dev);
const minor_t minor = getminor(dev);
mutex_enter(&vmmdev_mtx);
if (vmmdev_dip == NULL || major != ddi_driver_major(vmmdev_dip)) {
mutex_exit(&vmmdev_mtx);
return (ENOENT);
}
mutex_enter(&vmm_mtx);
mutex_exit(&vmmdev_mtx);
if ((sc = ddi_get_soft_state(vmm_statep, minor)) == NULL) {
err = ENOENT;
goto out;
}
if ((sc->vmm_flags & VMM_DESTROY) != 0) {
err = EBUSY;
goto out;
}
hold = kmem_zalloc(sizeof (*hold), KM_SLEEP);
hold->vmh_sc = sc;
hold->vmh_release_req = B_FALSE;
list_insert_tail(&sc->vmm_holds, hold);
sc->vmm_flags |= VMM_HELD;
*holdp = hold;
out:
mutex_exit(&vmm_mtx);
return (err);
}
void
vmm_drv_rele(vmm_hold_t *hold)
{
vmm_softc_t *sc;
bool hma_release = false;
ASSERT(hold != NULL);
ASSERT(hold->vmh_sc != NULL);
VERIFY(hold->vmh_ioport_hook_cnt == 0);
VERIFY(hold->vmh_mmio_hook_cnt == 0);
mutex_enter(&vmm_mtx);
sc = hold->vmh_sc;
list_remove(&sc->vmm_holds, hold);
kmem_free(hold, sizeof (*hold));
if (list_is_empty(&sc->vmm_holds)) {
sc->vmm_flags &= ~VMM_HELD;
if ((sc->vmm_flags & VMM_DESTROY) != 0) {
VERIFY0(vmm_destroy_locked(sc, VDO_DEFAULT,
&hma_release));
}
}
mutex_exit(&vmm_mtx);
if (hma_release) {
vmm_hma_release();
}
}
boolean_t
vmm_drv_release_reqd(vmm_hold_t *hold)
{
ASSERT(hold != NULL);
return (hold->vmh_release_req);
}
vmm_lease_t *
vmm_drv_lease_sign(vmm_hold_t *hold, boolean_t (*expiref)(void *), void *arg)
{
vmm_softc_t *sc = hold->vmh_sc;
vmm_lease_t *lease;
ASSERT3P(expiref, !=, NULL);
if (hold->vmh_release_req) {
return (NULL);
}
lease = kmem_alloc(sizeof (*lease), KM_SLEEP);
list_link_init(&lease->vml_node);
lease->vml_expire_func = expiref;
lease->vml_expire_arg = arg;
lease->vml_expired = B_FALSE;
lease->vml_break_deferred = B_FALSE;
lease->vml_hold = hold;
lease->vml_vm = sc->vmm_vm;
lease->vml_vmclient = vmspace_client_alloc(vm_get_vmspace(sc->vmm_vm));
mutex_enter(&sc->vmm_lease_lock);
while (sc->vmm_lease_blocker != 0) {
cv_wait(&sc->vmm_lease_cv, &sc->vmm_lease_lock);
}
list_insert_tail(&sc->vmm_lease_list, lease);
vmm_read_lock(sc);
mutex_exit(&sc->vmm_lease_lock);
return (lease);
}
static void
vmm_lease_break_locked(vmm_softc_t *sc, vmm_lease_t *lease)
{
ASSERT(MUTEX_HELD(&sc->vmm_lease_lock));
list_remove(&sc->vmm_lease_list, lease);
vmm_read_unlock(sc);
vmc_destroy(lease->vml_vmclient);
kmem_free(lease, sizeof (*lease));
}
static void
vmm_lease_block(vmm_softc_t *sc)
{
mutex_enter(&sc->vmm_lease_lock);
VERIFY3U(sc->vmm_lease_blocker, !=, UINT_MAX);
sc->vmm_lease_blocker++;
if (sc->vmm_lease_blocker == 1) {
list_t *list = &sc->vmm_lease_list;
vmm_lease_t *lease = list_head(list);
while (lease != NULL) {
void *arg = lease->vml_expire_arg;
boolean_t (*expiref)(void *) = lease->vml_expire_func;
boolean_t sync_break = B_FALSE;
lease->vml_expired = B_TRUE;
mutex_exit(&sc->vmm_lease_lock);
sync_break = expiref(arg);
mutex_enter(&sc->vmm_lease_lock);
if (sync_break) {
vmm_lease_t *next;
next = list_next(list, lease);
vmm_lease_break_locked(sc, lease);
lease = next;
} else {
lease = list_next(list, lease);
}
}
while (!list_is_empty(list)) {
lease = list_head(list);
while (lease != NULL) {
vmm_lease_t *next = list_next(list, lease);
if (lease->vml_break_deferred) {
vmm_lease_break_locked(sc, lease);
}
lease = next;
}
if (list_is_empty(list)) {
break;
}
cv_wait(&sc->vmm_lease_cv, &sc->vmm_lease_lock);
}
cv_broadcast(&sc->vmm_lease_cv);
} else {
list_t *list = &sc->vmm_lease_list;
while (!list_is_empty(list)) {
cv_wait(&sc->vmm_lease_cv, &sc->vmm_lease_lock);
}
}
mutex_exit(&sc->vmm_lease_lock);
}
static void
vmm_lease_unblock(vmm_softc_t *sc)
{
mutex_enter(&sc->vmm_lease_lock);
VERIFY3U(sc->vmm_lease_blocker, !=, 0);
sc->vmm_lease_blocker--;
if (sc->vmm_lease_blocker == 0) {
cv_broadcast(&sc->vmm_lease_cv);
}
mutex_exit(&sc->vmm_lease_lock);
}
void
vmm_drv_lease_break(vmm_hold_t *hold, vmm_lease_t *lease)
{
vmm_softc_t *sc = hold->vmh_sc;
VERIFY3P(hold, ==, lease->vml_hold);
VERIFY(!lease->vml_break_deferred);
mutex_enter(&sc->vmm_lease_lock);
if (sc->vmm_lease_blocker == 0) {
vmm_lease_break_locked(sc, lease);
} else {
lease->vml_break_deferred = B_TRUE;
cv_broadcast(&sc->vmm_lease_cv);
}
mutex_exit(&sc->vmm_lease_lock);
}
boolean_t
vmm_drv_lease_expired(vmm_lease_t *lease)
{
return (lease->vml_expired);
}
vmm_page_t *
vmm_drv_page_hold(vmm_lease_t *lease, uintptr_t gpa, int prot)
{
ASSERT(lease != NULL);
ASSERT0(gpa & PAGEOFFSET);
return ((vmm_page_t *)vmc_hold(lease->vml_vmclient, gpa, prot));
}
CTASSERT(VMPF_DEFER_DIRTY == VPF_DEFER_DIRTY);
vmm_page_t *
vmm_drv_page_hold_ext(vmm_lease_t *lease, uintptr_t gpa, int prot, int flags)
{
ASSERT(lease != NULL);
ASSERT0(gpa & PAGEOFFSET);
vmm_page_t *page =
(vmm_page_t *)vmc_hold_ext(lease->vml_vmclient, gpa, prot, flags);
return (page);
}
void
vmm_drv_page_release(vmm_page_t *vmmp)
{
(void) vmp_release((vm_page_t *)vmmp);
}
void
vmm_drv_page_release_chain(vmm_page_t *vmmp)
{
(void) vmp_release_chain((vm_page_t *)vmmp);
}
const void *
vmm_drv_page_readable(const vmm_page_t *vmmp)
{
return (vmp_get_readable((const vm_page_t *)vmmp));
}
void *
vmm_drv_page_writable(const vmm_page_t *vmmp)
{
return (vmp_get_writable((const vm_page_t *)vmmp));
}
void
vmm_drv_page_mark_dirty(vmm_page_t *vmmp)
{
return (vmp_mark_dirty((vm_page_t *)vmmp));
}
void
vmm_drv_page_chain(vmm_page_t *vmmp, vmm_page_t *to_chain)
{
vmp_chain((vm_page_t *)vmmp, (vm_page_t *)to_chain);
}
vmm_page_t *
vmm_drv_page_next(const vmm_page_t *vmmp)
{
return ((vmm_page_t *)vmp_next((vm_page_t *)vmmp));
}
int
vmm_drv_msi(vmm_lease_t *lease, uint64_t addr, uint64_t msg)
{
ASSERT(lease != NULL);
return (lapic_intr_msi(lease->vml_vm, addr, msg));
}
int
vmm_drv_ioport_hook(vmm_hold_t *hold, uint16_t ioport, vmm_drv_iop_cb_t func,
void *arg, void **cookie)
{
vmm_softc_t *sc;
int err;
ASSERT(hold != NULL);
ASSERT(cookie != NULL);
sc = hold->vmh_sc;
mutex_enter(&vmm_mtx);
if ((sc->vmm_flags & VMM_BLOCK_HOOK) != 0) {
mutex_exit(&vmm_mtx);
return (EBUSY);
}
if (hold->vmh_ioport_hook_cnt == UINT_MAX) {
mutex_exit(&vmm_mtx);
return (ENOSPC);
}
hold->vmh_ioport_hook_cnt++;
mutex_exit(&vmm_mtx);
vmm_write_lock(sc);
err = vm_ioport_hook(sc->vmm_vm, ioport, (ioport_handler_t)func,
arg, cookie);
vmm_write_unlock(sc);
if (err != 0) {
mutex_enter(&vmm_mtx);
hold->vmh_ioport_hook_cnt--;
mutex_exit(&vmm_mtx);
}
return (err);
}
void
vmm_drv_ioport_unhook(vmm_hold_t *hold, void **cookie)
{
vmm_softc_t *sc;
ASSERT(hold != NULL);
ASSERT(cookie != NULL);
ASSERT(hold->vmh_ioport_hook_cnt != 0);
sc = hold->vmh_sc;
vmm_write_lock(sc);
vm_ioport_unhook(sc->vmm_vm, cookie);
vmm_write_unlock(sc);
mutex_enter(&vmm_mtx);
hold->vmh_ioport_hook_cnt--;
mutex_exit(&vmm_mtx);
}
int
vmm_drv_mmio_hook(vmm_hold_t *hold, uint64_t address, uint32_t size,
vmm_drv_mmio_cb_t func, void *arg, void **cookie)
{
vmm_softc_t *sc;
int err;
ASSERT(hold != NULL);
ASSERT(cookie != NULL);
if (UINT64_MAX - size < address)
return (EOVERFLOW);
sc = hold->vmh_sc;
mutex_enter(&vmm_mtx);
if ((sc->vmm_flags & VMM_BLOCK_HOOK) != 0) {
mutex_exit(&vmm_mtx);
return (EBUSY);
}
if (hold->vmh_mmio_hook_cnt == UINT_MAX) {
mutex_exit(&vmm_mtx);
return (ENOSPC);
}
hold->vmh_mmio_hook_cnt++;
mutex_exit(&vmm_mtx);
vmm_write_lock(sc);
err = vm_mmio_hook(sc->vmm_vm, address, size, (mmio_handler_t)func,
arg, cookie);
vmm_write_unlock(sc);
if (err != 0) {
mutex_enter(&vmm_mtx);
hold->vmh_mmio_hook_cnt--;
mutex_exit(&vmm_mtx);
}
return (err);
}
int
vmm_drv_mmio_unhook(vmm_hold_t *hold, void **cookie)
{
vmm_softc_t *sc;
int ret;
ASSERT(hold != NULL);
ASSERT(cookie != NULL);
ASSERT(hold->vmh_mmio_hook_cnt != 0);
sc = hold->vmh_sc;
vmm_write_lock(sc);
ret = vm_mmio_unhook(sc->vmm_vm, cookie);
vmm_write_unlock(sc);
if (ret == 0) {
mutex_enter(&vmm_mtx);
hold->vmh_mmio_hook_cnt--;
mutex_exit(&vmm_mtx);
}
return (ret);
}
static void
vmm_drv_purge(vmm_softc_t *sc)
{
ASSERT(MUTEX_HELD(&vmm_mtx));
if ((sc->vmm_flags & VMM_HELD) != 0) {
vmm_hold_t *hold;
for (hold = list_head(&sc->vmm_holds); hold != NULL;
hold = list_next(&sc->vmm_holds, hold)) {
hold->vmh_release_req = B_TRUE;
}
mutex_exit(&vmm_mtx);
vmm_lease_block(sc);
vmm_lease_unblock(sc);
mutex_enter(&vmm_mtx);
}
}
static int
vmm_drv_block_hook(vmm_softc_t *sc, boolean_t enable_block)
{
int err = 0;
mutex_enter(&vmm_mtx);
if (!enable_block) {
VERIFY((sc->vmm_flags & VMM_BLOCK_HOOK) != 0);
sc->vmm_flags &= ~VMM_BLOCK_HOOK;
goto done;
}
if (!list_is_empty(&sc->vmm_holds)) {
vmm_hold_t *hold;
for (hold = list_head(&sc->vmm_holds); hold != NULL;
hold = list_next(&sc->vmm_holds, hold)) {
if (hold->vmh_ioport_hook_cnt != 0 ||
hold->vmh_mmio_hook_cnt != 0) {
err = EBUSY;
goto done;
}
}
}
sc->vmm_flags |= VMM_BLOCK_HOOK;
done:
mutex_exit(&vmm_mtx);
return (err);
}
static void
vmm_destroy_begin(vmm_softc_t *sc, vmm_destroy_opts_t opts)
{
ASSERT(MUTEX_HELD(&vmm_mtx));
ASSERT0(sc->vmm_flags & VMM_DESTROY);
sc->vmm_flags |= VMM_DESTROY;
const int maxcpus = vm_get_maxcpus(sc->vmm_vm);
for (int vcpu = 0; vcpu < maxcpus; vcpu++) {
vcpu_lock_one(sc, vcpu);
vcpu_unlock_one(sc, vcpu);
}
vmmdev_devmem_purge(sc);
if ((opts & VDO_NO_CLEAN_ZSD) == 0) {
vmm_zsd_rem_vm(sc);
}
zone_rele(sc->vmm_zone);
vmm_drv_purge(sc);
}
static bool
vmm_destroy_ready(vmm_softc_t *sc)
{
ASSERT(MUTEX_HELD(&vmm_mtx));
if ((sc->vmm_flags & (VMM_HELD | VMM_IS_OPEN)) == 0) {
VERIFY(list_is_empty(&sc->vmm_holds));
return (true);
}
return (false);
}
static void
vmm_destroy_finish(vmm_softc_t *sc)
{
ASSERT(MUTEX_HELD(&vmm_mtx));
ASSERT(vmm_destroy_ready(sc));
list_remove(&vmm_list, sc);
vmm_kstat_fini(sc);
vm_destroy(sc->vmm_vm);
ddi_remove_minor_node(vmmdev_dip, sc->vmm_name);
(void) devfs_clean(ddi_get_parent(vmmdev_dip), NULL, DV_CLEAN_FORCE);
const minor_t minor = sc->vmm_minor;
ddi_soft_state_free(vmm_statep, minor);
id_free(vmm_minors, minor);
}
static int
vmm_destroy_locked(vmm_softc_t *sc, vmm_destroy_opts_t opts,
bool *hma_release)
{
ASSERT(MUTEX_HELD(&vmm_mtx));
*hma_release = false;
if ((sc->vmm_flags & VMM_DESTROY) == 0) {
vmm_destroy_begin(sc, opts);
}
if (vmm_destroy_ready(sc)) {
if (sc->vmm_destroy_waiters != 0) {
cv_broadcast(&sc->vmm_cv);
while (sc->vmm_destroy_waiters != 0) {
cv_wait(&sc->vmm_cv, &vmm_mtx);
}
}
vmm_destroy_finish(sc);
*hma_release = true;
return (0);
} else if ((opts & VDO_ATTEMPT_WAIT) != 0) {
int err = 0;
sc->vmm_destroy_waiters++;
while (!vmm_destroy_ready(sc) && err == 0) {
if (cv_wait_sig(&sc->vmm_cv, &vmm_mtx) <= 0) {
err = EINTR;
}
}
sc->vmm_destroy_waiters--;
if (sc->vmm_destroy_waiters == 0) {
cv_signal(&sc->vmm_cv);
}
return (err);
} else {
return (0);
}
}
void
vmm_zone_vm_destroy(vmm_softc_t *sc)
{
bool hma_release = false;
int err;
mutex_enter(&vmm_mtx);
err = vmm_destroy_locked(sc, VDO_NO_CLEAN_ZSD, &hma_release);
mutex_exit(&vmm_mtx);
VERIFY0(err);
if (hma_release) {
vmm_hma_release();
}
}
static int
vmmdev_do_vm_destroy(const struct vm_destroy_req *req, cred_t *cr)
{
vmm_softc_t *sc;
bool hma_release = false;
int err;
if (crgetuid(cr) != 0) {
return (EPERM);
}
mutex_enter(&vmm_mtx);
sc = vmm_lookup(req->name);
if (sc == NULL) {
mutex_exit(&vmm_mtx);
return (ENOENT);
}
if (!INGLOBALZONE(curproc) && sc->vmm_zone != curzone) {
mutex_exit(&vmm_mtx);
return (EPERM);
}
err = vmm_destroy_locked(sc, VDO_ATTEMPT_WAIT, &hma_release);
mutex_exit(&vmm_mtx);
if (hma_release) {
vmm_hma_release();
}
return (err);
}
#define VCPU_NAME_BUFLEN 32
static int
vmm_kstat_alloc(vmm_softc_t *sc, minor_t minor, const cred_t *cr)
{
zoneid_t zid = crgetzoneid(cr);
int instance = minor;
kstat_t *ksp;
ASSERT3P(sc->vmm_kstat_vm, ==, NULL);
ksp = kstat_create_zone(VMM_MODULE_NAME, instance, "vm",
VMM_KSTAT_CLASS, KSTAT_TYPE_NAMED,
sizeof (vmm_kstats_t) / sizeof (kstat_named_t), 0, zid);
if (ksp == NULL) {
return (-1);
}
sc->vmm_kstat_vm = ksp;
for (uint_t i = 0; i < VM_MAXCPU; i++) {
char namebuf[VCPU_NAME_BUFLEN];
ASSERT3P(sc->vmm_kstat_vcpu[i], ==, NULL);
(void) snprintf(namebuf, VCPU_NAME_BUFLEN, "vcpu%u", i);
ksp = kstat_create_zone(VMM_MODULE_NAME, instance, namebuf,
VMM_KSTAT_CLASS, KSTAT_TYPE_NAMED,
sizeof (vmm_vcpu_kstats_t) / sizeof (kstat_named_t),
0, zid);
if (ksp == NULL) {
goto fail;
}
sc->vmm_kstat_vcpu[i] = ksp;
}
if (zid != GLOBAL_ZONEID) {
kstat_zone_add(sc->vmm_kstat_vm, GLOBAL_ZONEID);
for (uint_t i = 0; i < VM_MAXCPU; i++) {
kstat_zone_add(sc->vmm_kstat_vcpu[i], GLOBAL_ZONEID);
}
}
return (0);
fail:
for (uint_t i = 0; i < VM_MAXCPU; i++) {
if (sc->vmm_kstat_vcpu[i] != NULL) {
kstat_delete(sc->vmm_kstat_vcpu[i]);
sc->vmm_kstat_vcpu[i] = NULL;
} else {
break;
}
}
kstat_delete(sc->vmm_kstat_vm);
sc->vmm_kstat_vm = NULL;
return (-1);
}
static void
vmm_kstat_init(vmm_softc_t *sc)
{
kstat_t *ksp;
ASSERT3P(sc->vmm_vm, !=, NULL);
ASSERT3P(sc->vmm_kstat_vm, !=, NULL);
ksp = sc->vmm_kstat_vm;
vmm_kstats_t *vk = ksp->ks_data;
ksp->ks_private = sc->vmm_vm;
kstat_named_init(&vk->vk_name, "vm_name", KSTAT_DATA_STRING);
kstat_named_setstr(&vk->vk_name, sc->vmm_name);
for (uint_t i = 0; i < VM_MAXCPU; i++) {
ASSERT3P(sc->vmm_kstat_vcpu[i], !=, NULL);
ksp = sc->vmm_kstat_vcpu[i];
vmm_vcpu_kstats_t *vvk = ksp->ks_data;
kstat_named_init(&vvk->vvk_vcpu, "vcpu", KSTAT_DATA_UINT32);
vvk->vvk_vcpu.value.ui32 = i;
kstat_named_init(&vvk->vvk_time_init, "time_init",
KSTAT_DATA_UINT64);
kstat_named_init(&vvk->vvk_time_run, "time_run",
KSTAT_DATA_UINT64);
kstat_named_init(&vvk->vvk_time_idle, "time_idle",
KSTAT_DATA_UINT64);
kstat_named_init(&vvk->vvk_time_emu_kern, "time_emu_kern",
KSTAT_DATA_UINT64);
kstat_named_init(&vvk->vvk_time_emu_user, "time_emu_user",
KSTAT_DATA_UINT64);
kstat_named_init(&vvk->vvk_time_sched, "time_sched",
KSTAT_DATA_UINT64);
ksp->ks_private = sc->vmm_vm;
ksp->ks_update = vmm_kstat_update_vcpu;
}
kstat_install(sc->vmm_kstat_vm);
for (uint_t i = 0; i < VM_MAXCPU; i++) {
kstat_install(sc->vmm_kstat_vcpu[i]);
}
}
static void
vmm_kstat_fini(vmm_softc_t *sc)
{
ASSERT(sc->vmm_kstat_vm != NULL);
kstat_delete(sc->vmm_kstat_vm);
sc->vmm_kstat_vm = NULL;
for (uint_t i = 0; i < VM_MAXCPU; i++) {
ASSERT3P(sc->vmm_kstat_vcpu[i], !=, NULL);
kstat_delete(sc->vmm_kstat_vcpu[i]);
sc->vmm_kstat_vcpu[i] = NULL;
}
}
static int
vmm_open(dev_t *devp, int flag, int otyp, cred_t *credp)
{
minor_t minor;
vmm_softc_t *sc;
if (curproc->p_model != DATAMODEL_LP64) {
return (EFBIG);
}
minor = getminor(*devp);
if (minor == VMM_CTL_MINOR) {
if ((flag & FEXCL) != FEXCL || otyp != OTYP_CHR) {
return (EINVAL);
}
return (0);
}
mutex_enter(&vmm_mtx);
sc = ddi_get_soft_state(vmm_statep, minor);
if (sc == NULL) {
mutex_exit(&vmm_mtx);
return (ENXIO);
}
sc->vmm_flags |= VMM_IS_OPEN;
mutex_exit(&vmm_mtx);
return (0);
}
static int
vmm_close(dev_t dev, int flag, int otyp, cred_t *credp)
{
const minor_t minor = getminor(dev);
vmm_softc_t *sc;
bool hma_release = false;
if (minor == VMM_CTL_MINOR) {
return (0);
}
mutex_enter(&vmm_mtx);
sc = ddi_get_soft_state(vmm_statep, minor);
if (sc == NULL) {
mutex_exit(&vmm_mtx);
return (ENXIO);
}
VERIFY3U(sc->vmm_flags & VMM_IS_OPEN, !=, 0);
sc->vmm_flags &= ~VMM_IS_OPEN;
if ((sc->vmm_flags & VMM_DESTROY) != 0 ||
(sc->vmm_flags & VMM_AUTODESTROY) != 0) {
VERIFY0(vmm_destroy_locked(sc, VDO_DEFAULT, &hma_release));
}
mutex_exit(&vmm_mtx);
if (hma_release) {
vmm_hma_release();
}
return (0);
}
static int
vmm_is_supported(intptr_t arg)
{
int r;
const char *msg;
if (vmm_is_intel()) {
r = vmx_x86_supported(&msg);
} else if (vmm_is_svm()) {
r = 0;
} else {
r = ENXIO;
msg = "Unsupported CPU vendor";
}
if (r != 0 && arg != (intptr_t)NULL) {
if (copyoutstr(msg, (char *)arg, strlen(msg) + 1, NULL) != 0)
return (EFAULT);
}
return (r);
}
static int
vmm_ctl_ioctl(int cmd, intptr_t arg, int md, cred_t *cr, int *rvalp)
{
void *argp = (void *)arg;
switch (cmd) {
case VMM_CREATE_VM: {
struct vm_create_req req;
if ((md & FWRITE) == 0) {
return (EPERM);
}
if (ddi_copyin(argp, &req, sizeof (req), md) != 0) {
return (EFAULT);
}
return (vmmdev_do_vm_create(&req, cr));
}
case VMM_DESTROY_VM: {
struct vm_destroy_req req;
if ((md & FWRITE) == 0) {
return (EPERM);
}
if (ddi_copyin(argp, &req, sizeof (req), md) != 0) {
return (EFAULT);
}
return (vmmdev_do_vm_destroy(&req, cr));
}
case VMM_VM_SUPPORTED:
return (vmm_is_supported(arg));
case VMM_CHECK_IOMMU:
if (!vmm_check_iommu()) {
return (ENXIO);
}
return (0);
case VMM_RESV_QUERY:
case VMM_RESV_SET_TARGET:
return (vmmr_ioctl(cmd, arg, md, cr, rvalp));
default:
break;
}
return (ENOTTY);
}
static int
vmm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
int *rvalp)
{
vmm_softc_t *sc;
minor_t minor;
if (curproc->p_model != DATAMODEL_LP64) {
return (EFBIG);
}
if (ddi_model_convert_from(mode & FMODELS) != DDI_MODEL_NONE) {
return (ENOTSUP);
}
if (cmd == VMM_INTERFACE_VERSION) {
*rvalp = VMM_CURRENT_INTERFACE_VERSION;
return (0);
}
minor = getminor(dev);
if (minor == VMM_CTL_MINOR) {
return (vmm_ctl_ioctl(cmd, arg, mode, credp, rvalp));
}
sc = ddi_get_soft_state(vmm_statep, minor);
ASSERT(sc != NULL);
if ((sc->vmm_flags & VMM_DESTROY) != 0) {
if (cmd == VM_DESTROY_PENDING) {
*rvalp = 1;
return (0);
}
return (ENXIO);
}
return (vmmdev_do_ioctl(sc, cmd, arg, mode, credp, rvalp));
}
static int
vmm_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len,
unsigned int prot, unsigned int maxprot, unsigned int flags, cred_t *credp)
{
vmm_softc_t *sc;
const minor_t minor = getminor(dev);
int err;
if (minor == VMM_CTL_MINOR) {
return (ENODEV);
}
if (off < 0 || (off + len) <= 0) {
return (EINVAL);
}
if ((prot & PROT_USER) == 0) {
return (EACCES);
}
sc = ddi_get_soft_state(vmm_statep, minor);
ASSERT(sc);
if (sc->vmm_flags & VMM_DESTROY)
return (ENXIO);
vmm_read_lock(sc);
if (off >= VM_DEVMEM_START) {
int segid;
off_t segoff;
if (!vmmdev_devmem_segid(sc, off, len, &segid, &segoff)) {
err = ENODEV;
} else {
err = vm_segmap_obj(sc->vmm_vm, segid, segoff, len, as,
addrp, prot, maxprot, flags);
}
} else {
err = vm_segmap_space(sc->vmm_vm, off, as, addrp, len, prot,
maxprot, flags);
}
vmm_read_unlock(sc);
return (err);
}
static sdev_plugin_validate_t
vmm_sdev_validate(sdev_ctx_t ctx)
{
const char *name = sdev_ctx_name(ctx);
vmm_softc_t *sc;
sdev_plugin_validate_t ret;
minor_t minor;
if (sdev_ctx_vtype(ctx) != VCHR)
return (SDEV_VTOR_INVALID);
VERIFY3S(sdev_ctx_minor(ctx, &minor), ==, 0);
mutex_enter(&vmm_mtx);
if ((sc = vmm_lookup(name)) == NULL)
ret = SDEV_VTOR_INVALID;
else if (sc->vmm_minor != minor)
ret = SDEV_VTOR_STALE;
else
ret = SDEV_VTOR_VALID;
mutex_exit(&vmm_mtx);
return (ret);
}
static int
vmm_sdev_filldir(sdev_ctx_t ctx)
{
vmm_softc_t *sc;
int ret;
if (strcmp(sdev_ctx_path(ctx), VMM_SDEV_ROOT) != 0) {
cmn_err(CE_WARN, "%s: bad path '%s' != '%s'\n", __func__,
sdev_ctx_path(ctx), VMM_SDEV_ROOT);
return (EINVAL);
}
mutex_enter(&vmm_mtx);
ASSERT(vmmdev_dip != NULL);
for (sc = list_head(&vmm_list); sc != NULL;
sc = list_next(&vmm_list, sc)) {
if (INGLOBALZONE(curproc) || sc->vmm_zone == curzone) {
ret = sdev_plugin_mknod(ctx, sc->vmm_name,
S_IFCHR | 0600,
makedevice(ddi_driver_major(vmmdev_dip),
sc->vmm_minor));
} else {
continue;
}
if (ret != 0 && ret != EEXIST)
goto out;
}
ret = 0;
out:
mutex_exit(&vmm_mtx);
return (ret);
}
static void
vmm_sdev_inactive(sdev_ctx_t ctx)
{
}
static sdev_plugin_ops_t vmm_sdev_ops = {
.spo_version = SDEV_PLUGIN_VERSION,
.spo_flags = SDEV_PLUGIN_SUBDIR,
.spo_validate = vmm_sdev_validate,
.spo_filldir = vmm_sdev_filldir,
.spo_inactive = vmm_sdev_inactive
};
static int
vmm_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
{
int error;
switch (cmd) {
case DDI_INFO_DEVT2DEVINFO:
*result = (void *)vmmdev_dip;
error = DDI_SUCCESS;
break;
case DDI_INFO_DEVT2INSTANCE:
*result = (void *)0;
error = DDI_SUCCESS;
break;
default:
error = DDI_FAILURE;
break;
}
return (error);
}
static int
vmm_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
{
sdev_plugin_hdl_t sph;
hma_reg_t *reg = NULL;
boolean_t vmm_loaded = B_FALSE;
if (cmd != DDI_ATTACH) {
return (DDI_FAILURE);
}
mutex_enter(&vmmdev_mtx);
if (vmmdev_dip != NULL) {
mutex_exit(&vmmdev_mtx);
return (DDI_FAILURE);
}
vmm_sol_glue_init();
if ((reg = hma_register(vmmdev_hvm_name)) == NULL) {
goto fail;
} else if (vmm_mod_load() != 0) {
goto fail;
}
vmm_loaded = B_TRUE;
hma_unregister(reg);
reg = NULL;
if (ddi_create_minor_node(dip, "ctl", S_IFCHR,
VMM_CTL_MINOR, DDI_PSEUDO, 0) != 0) {
goto fail;
}
sph = sdev_plugin_register(VMM_MODULE_NAME, &vmm_sdev_ops, NULL);
if (sph == (sdev_plugin_hdl_t)NULL) {
ddi_remove_minor_node(dip, NULL);
goto fail;
}
ddi_report_dev(dip);
vmmdev_sdev_hdl = sph;
vmmdev_dip = dip;
mutex_exit(&vmmdev_mtx);
return (DDI_SUCCESS);
fail:
if (vmm_loaded) {
vmm_mod_unload();
}
if (reg != NULL) {
hma_unregister(reg);
}
vmm_sol_glue_cleanup();
mutex_exit(&vmmdev_mtx);
return (DDI_FAILURE);
}
static int
vmm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
{
if (cmd != DDI_DETACH) {
return (DDI_FAILURE);
}
if (mutex_tryenter(&vmmdev_mtx) == 0)
return (DDI_FAILURE);
mutex_enter(&vmm_mtx);
if (!list_is_empty(&vmm_list)) {
mutex_exit(&vmm_mtx);
mutex_exit(&vmmdev_mtx);
return (DDI_FAILURE);
}
mutex_exit(&vmm_mtx);
if (!vmmr_is_empty()) {
mutex_exit(&vmmdev_mtx);
return (DDI_FAILURE);
}
VERIFY(vmmdev_sdev_hdl != (sdev_plugin_hdl_t)NULL);
if (sdev_plugin_unregister(vmmdev_sdev_hdl) != 0) {
mutex_exit(&vmmdev_mtx);
return (DDI_FAILURE);
}
vmmdev_sdev_hdl = (sdev_plugin_hdl_t)NULL;
ddi_remove_minor_node(dip, "ctl");
vmmdev_dip = NULL;
vmm_mod_unload();
VERIFY3U(vmmdev_hma_reg, ==, NULL);
vmm_sol_glue_cleanup();
mutex_exit(&vmmdev_mtx);
return (DDI_SUCCESS);
}
static struct cb_ops vmm_cb_ops = {
vmm_open,
vmm_close,
nodev,
nodev,
nodev,
nodev,
nodev,
vmm_ioctl,
nodev,
nodev,
vmm_segmap,
nochpoll,
ddi_prop_op,
NULL,
D_NEW | D_MP | D_DEVMAP
};
static struct dev_ops vmm_ops = {
DEVO_REV,
0,
vmm_info,
nulldev,
nulldev,
vmm_attach,
vmm_detach,
nodev,
&vmm_cb_ops,
(struct bus_ops *)NULL
};
static struct modldrv modldrv = {
&mod_driverops,
"bhyve vmm",
&vmm_ops
};
static struct modlinkage modlinkage = {
MODREV_1,
&modldrv,
NULL
};
int
_init(void)
{
int error;
sysinit();
mutex_init(&vmmdev_mtx, NULL, MUTEX_DRIVER, NULL);
mutex_init(&vmm_mtx, NULL, MUTEX_DRIVER, NULL);
list_create(&vmm_list, sizeof (vmm_softc_t),
offsetof(vmm_softc_t, vmm_node));
vmm_minors = id_space_create("vmm_minors", VMM_CTL_MINOR + 1, MAXMIN32);
error = ddi_soft_state_init(&vmm_statep, sizeof (vmm_softc_t), 0);
if (error) {
return (error);
}
error = vmmr_init();
if (error) {
ddi_soft_state_fini(&vmm_statep);
return (error);
}
vmm_zsd_init();
error = mod_install(&modlinkage);
if (error) {
ddi_soft_state_fini(&vmm_statep);
vmm_zsd_fini();
vmmr_fini();
}
return (error);
}
int
_fini(void)
{
int error;
error = mod_remove(&modlinkage);
if (error) {
return (error);
}
vmm_zsd_fini();
vmmr_fini();
ddi_soft_state_fini(&vmm_statep);
return (0);
}
int
_info(struct modinfo *modinfop)
{
return (mod_info(&modlinkage, modinfop));
}