#include <sys/param.h>
#include <sys/systm.h>
#include <sys/device.h>
#include <sys/pool.h>
#include <sys/pledge.h>
#include <sys/proc.h>
#include <sys/ioctl.h>
#include <sys/malloc.h>
#include <sys/signalvar.h>
#include <uvm/uvm_extern.h>
#include <uvm/uvm_aobj.h>
#include <machine/vmmvar.h>
#include <dev/vmm/vmm.h>
struct vmm_softc *vmm_softc;
struct pool vm_pool;
struct pool vcpu_pool;
struct cfdriver vmm_cd = {
NULL, "vmm", DV_DULL, CD_SKIPHIBERNATE
};
const struct cfattach vmm_ca = {
sizeof(struct vmm_softc), vmm_probe, vmm_attach, NULL, vmm_activate
};
int
vmm_probe(struct device *parent, void *match, void *aux)
{
const char **busname = (const char **)aux;
if (strcmp(*busname, vmm_cd.cd_name) != 0)
return (0);
return (vmm_probe_machdep(parent, match, aux));
}
void
vmm_attach(struct device *parent, struct device *self, void *aux)
{
struct vmm_softc *sc = (struct vmm_softc *)self;
rw_init(&sc->sc_slock, "vmmslk");
sc->sc_status = VMM_ACTIVE;
refcnt_init(&sc->sc_refcnt);
sc->vcpu_ct = 0;
sc->vcpu_max = VMM_MAX_VCPUS;
sc->vm_ct = 0;
sc->vm_idx = 0;
SLIST_INIT(&sc->vm_list);
rw_init(&sc->vm_lock, "vm_list");
pool_init(&vm_pool, sizeof(struct vm), 0, IPL_MPFLOOR, PR_WAITOK,
"vmpool", NULL);
pool_init(&vcpu_pool, sizeof(struct vcpu), 64, IPL_MPFLOOR, PR_WAITOK,
"vcpupl", NULL);
vmm_attach_machdep(parent, self, aux);
vmm_softc = sc;
printf("\n");
}
int
vmm_activate(struct device *self, int act)
{
switch (act) {
case DVACT_QUIESCE:
rw_enter_write(&vmm_softc->sc_slock);
KASSERT(vmm_softc->sc_status == VMM_ACTIVE);
vmm_softc->sc_status = VMM_SUSPENDED;
rw_exit_write(&vmm_softc->sc_slock);
refcnt_finalize(&vmm_softc->sc_refcnt, "vmmsusp");
vmm_activate_machdep(self, act);
break;
case DVACT_WAKEUP:
vmm_activate_machdep(self, act);
rw_enter_write(&vmm_softc->sc_slock);
KASSERT(vmm_softc->sc_status == VMM_SUSPENDED);
refcnt_init(&vmm_softc->sc_refcnt);
vmm_softc->sc_status = VMM_ACTIVE;
rw_exit_write(&vmm_softc->sc_slock);
wakeup(&vmm_softc->sc_status);
break;
}
return (0);
}
int
vmmopen(dev_t dev, int flag, int mode, struct proc *p)
{
if (vmm_softc == NULL)
return (ENODEV);
if (vmm_softc->mode == VMM_MODE_UNKNOWN)
return (ENODEV);
return 0;
}
int
vmmclose(dev_t dev, int flag, int mode, struct proc *p)
{
return 0;
}
int
vm_find(uint32_t id, struct vm **res)
{
struct proc *p = curproc;
struct vm *vm;
int ret = ENOENT;
*res = NULL;
rw_enter_read(&vmm_softc->vm_lock);
SLIST_FOREACH(vm, &vmm_softc->vm_list, vm_link) {
if (vm->vm_id == id) {
if (((p->p_pledge &
(PLEDGE_VMM | PLEDGE_PROC)) == PLEDGE_VMM) &&
(vm->vm_creator_pid != p->p_p->ps_pid))
ret = EPERM;
else {
refcnt_take(&vm->vm_refcnt);
*res = vm;
ret = 0;
}
break;
}
}
rw_exit_read(&vmm_softc->vm_lock);
if (ret == EPERM)
return (pledge_fail(p, EPERM, PLEDGE_VMM));
return (ret);
}
int
vmmioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
{
int ret;
KERNEL_UNLOCK();
ret = rw_enter(&vmm_softc->sc_slock, RW_READ | RW_INTR);
if (ret != 0)
goto out;
while (vmm_softc->sc_status != VMM_ACTIVE) {
ret = rwsleep_nsec(&vmm_softc->sc_status, &vmm_softc->sc_slock,
PWAIT | PCATCH, "vmmresume", INFSLP);
if (ret != 0) {
rw_exit(&vmm_softc->sc_slock);
goto out;
}
}
refcnt_take(&vmm_softc->sc_refcnt);
rw_exit(&vmm_softc->sc_slock);
switch (cmd) {
case VMM_IOC_CREATE:
if ((ret = vmm_start()) != 0) {
vmm_stop();
break;
}
ret = vm_create((struct vm_create_params *)data, p);
break;
case VMM_IOC_RUN:
ret = vm_run((struct vm_run_params *)data);
break;
case VMM_IOC_INFO:
ret = vm_get_info((struct vm_info_params *)data);
break;
case VMM_IOC_TERM:
ret = vm_terminate((struct vm_terminate_params *)data);
break;
case VMM_IOC_RESETCPU:
ret = vm_resetcpu((struct vm_resetcpu_params *)data);
break;
case VMM_IOC_READREGS:
ret = vm_rwregs((struct vm_rwregs_params *)data, 0);
break;
case VMM_IOC_WRITEREGS:
ret = vm_rwregs((struct vm_rwregs_params *)data, 1);
break;
case VMM_IOC_READVMPARAMS:
ret = vm_rwvmparams((struct vm_rwvmparams_params *)data, 0);
break;
case VMM_IOC_WRITEVMPARAMS:
ret = vm_rwvmparams((struct vm_rwvmparams_params *)data, 1);
break;
case VMM_IOC_SHAREMEM:
ret = vm_share_mem((struct vm_sharemem_params *)data, p);
break;
default:
ret = vmmioctl_machdep(dev, cmd, data, flag, p);
break;
}
refcnt_rele_wake(&vmm_softc->sc_refcnt);
out:
KERNEL_LOCK();
return (ret);
}
int
pledge_ioctl_vmm(struct proc *p, long com)
{
switch (com) {
case VMM_IOC_CREATE:
case VMM_IOC_INFO:
case VMM_IOC_SHAREMEM:
if (p->p_pledge & PLEDGE_PROC)
return (0);
break;
case VMM_IOC_TERM:
case VMM_IOC_RUN:
case VMM_IOC_RESETCPU:
case VMM_IOC_READREGS:
case VMM_IOC_WRITEREGS:
case VMM_IOC_READVMPARAMS:
case VMM_IOC_WRITEVMPARAMS:
return (0);
default:
return pledge_ioctl_vmm_machdep(p, com);
}
return (EPERM);
}
struct vcpu *
vm_find_vcpu(struct vm *vm, uint32_t id)
{
struct vcpu *vcpu;
if (vm == NULL)
return (NULL);
SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) {
if (vcpu->vc_id == id)
return (vcpu);
}
return (NULL);
}
int
vm_create(struct vm_create_params *vcp, struct proc *p)
{
int i, ret = EINVAL;
size_t memsize;
struct vm *vm;
struct vcpu *vcpu;
struct uvm_object *uao;
struct vm_mem_range *vmr;
unsigned int uvmflags = 0;
memsize = vm_create_check_mem_ranges(vcp);
if (memsize == 0)
return (EINVAL);
if (vcp->vcp_ncpus != 1)
return (EINVAL);
rw_enter_write(&vmm_softc->vm_lock);
if (vmm_softc->vcpu_ct + vcp->vcp_ncpus > vmm_softc->vcpu_max) {
DPRINTF("%s: maximum vcpus (%lu) reached\n", __func__,
vmm_softc->vcpu_max);
rw_exit_write(&vmm_softc->vm_lock);
return (ENOMEM);
}
vmm_softc->vcpu_ct += vcp->vcp_ncpus;
vmm_softc->vm_ct++;
rw_exit_write(&vmm_softc->vm_lock);
vm = pool_get(&vm_pool, PR_WAITOK | PR_ZERO);
vm->vm_creator_pid = p->p_p->ps_pid;
strncpy(vm->vm_name, vcp->vcp_name, VMM_MAX_NAME_LEN - 1);
vm->vm_pmap = pmap_create();
vm->vm_nmemranges = vcp->vcp_nmemranges;
memcpy(vm->vm_memranges, vcp->vcp_memranges,
vm->vm_nmemranges * sizeof(vm->vm_memranges[0]));
vm->vm_memory_size = memsize;
uvmflags = UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
MAP_INHERIT_NONE, MADV_NORMAL, UVM_FLAG_CONCEAL);
for (i = 0; i < vm->vm_nmemranges; i++) {
vmr = &vm->vm_memranges[i];
if (vmr->vmr_type == VM_MEM_MMIO)
continue;
uao = NULL;
uao = uao_create(vmr->vmr_size, UAO_FLAG_CANFAIL);
if (uao == NULL) {
printf("%s: failed to initialize memory slot\n",
__func__);
ret = ENOMEM;
goto err;
}
ret = uvm_map(&p->p_vmspace->vm_map, &vmr->vmr_va,
vmr->vmr_size, uao, 0, 0, uvmflags);
if (ret) {
printf("%s: uvm_map failed: %d\n", __func__, ret);
uao_detach(uao);
ret = ENOMEM;
goto err;
}
ret = uvm_map_immutable(&p->p_vmspace->vm_map, vmr->vmr_va,
vmr->vmr_va + vmr->vmr_size, 1);
if (ret) {
printf("%s: uvm_map_immutable failed: %d\n", __func__,
ret);
uvm_unmap(&p->p_vmspace->vm_map, vmr->vmr_va,
vmr->vmr_va + vmr->vmr_size);
goto err;
}
uao_reference(uao);
vm->vm_memory_slot[i] = uao;
}
if (vm_impl_init(vm, p)) {
printf("failed to init arch-specific features for vm %p\n", vm);
ret = ENOMEM;
goto err;
}
vm->vm_vcpu_ct = 0;
SLIST_INIT(&vm->vm_vcpu_list);
for (i = 0; i < vcp->vcp_ncpus; i++) {
vcpu = pool_get(&vcpu_pool, PR_WAITOK | PR_ZERO);
vcpu->vc_parent = vm;
vcpu->vc_id = vm->vm_vcpu_ct;
vm->vm_vcpu_ct++;
if ((ret = vcpu_init(vcpu, vcp)) != 0) {
printf("failed to init vcpu %d for vm %p\n", i, vm);
pool_put(&vcpu_pool, vcpu);
goto err;
}
SLIST_INSERT_HEAD(&vm->vm_vcpu_list, vcpu, vc_vcpu_link);
}
rw_enter_write(&vmm_softc->vm_lock);
vmm_softc->vm_idx++;
vm->vm_id = vmm_softc->vm_idx;
vcp->vcp_id = vm->vm_id;
refcnt_init(&vm->vm_refcnt);
SLIST_INSERT_HEAD(&vmm_softc->vm_list, vm, vm_link);
rw_exit_write(&vmm_softc->vm_lock);
memcpy(vcp->vcp_memranges, vm->vm_memranges,
vcp->vcp_nmemranges * sizeof(vcp->vcp_memranges[0]));
return (0);
err:
vm_teardown(&vm);
rw_enter_write(&vmm_softc->vm_lock);
vmm_softc->vm_ct--;
vmm_softc->vcpu_ct -= vcp->vcp_ncpus;
if (vmm_softc->vm_ct < 1)
vmm_stop();
rw_exit_write(&vmm_softc->vm_lock);
return (ret);
}
size_t
vm_create_check_mem_ranges(struct vm_create_params *vcp)
{
size_t i, memsize = 0;
struct vm_mem_range *vmr, *pvmr;
const paddr_t maxgpa = VMM_MAX_VM_MEM_SIZE;
if (vcp->vcp_nmemranges == 0 ||
vcp->vcp_nmemranges > VMM_MAX_MEM_RANGES) {
DPRINTF("invalid number of guest memory ranges\n");
return (0);
}
for (i = 0; i < vcp->vcp_nmemranges; i++) {
vmr = &vcp->vcp_memranges[i];
if ((vmr->vmr_gpa & PAGE_MASK) || (vmr->vmr_va & PAGE_MASK) ||
(vmr->vmr_size & PAGE_MASK) || vmr->vmr_size == 0) {
DPRINTF("memory range %zu is not page aligned\n", i);
return (0);
}
if (vmr->vmr_gpa >= maxgpa ||
vmr->vmr_size > maxgpa - vmr->vmr_gpa) {
DPRINTF("exceeded max memory size\n");
return (0);
}
if (i > 0 && pvmr->vmr_gpa + pvmr->vmr_size > vmr->vmr_gpa) {
DPRINTF("guest range %zu overlaps or !ascending\n", i);
return (0);
}
if (vmr->vmr_type != VM_MEM_MMIO)
memsize += vmr->vmr_size;
pvmr = vmr;
}
return (memsize);
}
void
vm_teardown(struct vm **target)
{
size_t i, nvcpu = 0;
vaddr_t sva, eva;
struct vcpu *vcpu, *tmp;
struct vm *vm = *target;
struct uvm_object *uao;
KERNEL_ASSERT_UNLOCKED();
SLIST_FOREACH_SAFE(vcpu, &vm->vm_vcpu_list, vc_vcpu_link, tmp) {
SLIST_REMOVE(&vm->vm_vcpu_list, vcpu, vcpu, vc_vcpu_link);
vcpu_deinit(vcpu);
pool_put(&vcpu_pool, vcpu);
nvcpu++;
}
for (i = 0; i < vm->vm_nmemranges; i++) {
sva = vm->vm_memranges[i].vmr_gpa;
eva = sva + vm->vm_memranges[i].vmr_size - 1;
pmap_remove(vm->vm_pmap, sva, eva);
}
for (i = 0; i < vm->vm_nmemranges; i++) {
uao = vm->vm_memory_slot[i];
vm->vm_memory_slot[i] = NULL;
if (uao != NULL)
uao_detach(uao);
}
pmap_destroy(vm->vm_pmap);
vm->vm_pmap = NULL;
pool_put(&vm_pool, vm);
*target = NULL;
}
int
vm_get_info(struct vm_info_params *vip)
{
struct vm_info_result *out;
struct vm *vm;
struct vcpu *vcpu;
int i = 0, j;
size_t need, vm_ct;
rw_enter_read(&vmm_softc->vm_lock);
vm_ct = vmm_softc->vm_ct;
rw_exit_read(&vmm_softc->vm_lock);
need = vm_ct * sizeof(struct vm_info_result);
if (vip->vip_size < need) {
vip->vip_info_ct = 0;
vip->vip_size = need;
return (0);
}
out = malloc(need, M_DEVBUF, M_NOWAIT|M_ZERO);
if (out == NULL) {
vip->vip_info_ct = 0;
return (ENOMEM);
}
vip->vip_info_ct = vm_ct;
rw_enter_read(&vmm_softc->vm_lock);
SLIST_FOREACH(vm, &vmm_softc->vm_list, vm_link) {
refcnt_take(&vm->vm_refcnt);
out[i].vir_memory_size = vm->vm_memory_size;
out[i].vir_used_size =
pmap_resident_count(vm->vm_pmap) * PAGE_SIZE;
out[i].vir_ncpus = vm->vm_vcpu_ct;
out[i].vir_id = vm->vm_id;
out[i].vir_creator_pid = vm->vm_creator_pid;
strlcpy(out[i].vir_name, vm->vm_name, VMM_MAX_NAME_LEN);
for (j = 0; j < vm->vm_vcpu_ct; j++) {
out[i].vir_vcpu_state[j] = VCPU_STATE_UNKNOWN;
SLIST_FOREACH(vcpu, &vm->vm_vcpu_list,
vc_vcpu_link) {
if (vcpu->vc_id == j)
out[i].vir_vcpu_state[j] =
vcpu->vc_state;
}
}
refcnt_rele_wake(&vm->vm_refcnt);
i++;
if (i == vm_ct)
break;
}
rw_exit_read(&vmm_softc->vm_lock);
if (copyout(out, vip->vip_info, need) == EFAULT) {
free(out, M_DEVBUF, need);
return (EFAULT);
}
free(out, M_DEVBUF, need);
return (0);
}
int
vm_terminate(struct vm_terminate_params *vtp)
{
struct vm *vm;
int error, nvcpu, vm_id;
error = vm_find(vtp->vtp_vm_id, &vm);
if (error)
return (error);
if (atomic_cas_uint(&vm->vm_dying, 0, 1) == 1) {
refcnt_rele_wake(&vm->vm_refcnt);
return (EBUSY);
}
rw_enter_write(&vmm_softc->vm_lock);
SLIST_REMOVE(&vmm_softc->vm_list, vm, vm, vm_link);
rw_exit_write(&vmm_softc->vm_lock);
if (refcnt_rele(&vm->vm_refcnt))
panic("%s: vm %d(%p) vm_list refcnt drop was the last",
__func__, vm->vm_id, vm);
refcnt_finalize(&vm->vm_refcnt, __func__);
vm_id = vm->vm_id;
nvcpu = vm->vm_vcpu_ct;
vm_teardown(&vm);
if (vm_id > 0) {
rw_enter_write(&vmm_softc->vm_lock);
vmm_softc->vm_ct--;
vmm_softc->vcpu_ct -= nvcpu;
if (vmm_softc->vm_ct < 1)
vmm_stop();
rw_exit_write(&vmm_softc->vm_lock);
}
return (0);
}
int
vm_resetcpu(struct vm_resetcpu_params *vrp)
{
struct vm *vm;
struct vcpu *vcpu;
int error, ret = 0;
error = vm_find(vrp->vrp_vm_id, &vm);
if (error != 0) {
DPRINTF("%s: vm id %u not found\n", __func__,
vrp->vrp_vm_id);
return (error);
}
vcpu = vm_find_vcpu(vm, vrp->vrp_vcpu_id);
if (vcpu == NULL) {
DPRINTF("%s: vcpu id %u of vm %u not found\n", __func__,
vrp->vrp_vcpu_id, vrp->vrp_vm_id);
ret = ENOENT;
goto out;
}
rw_enter_write(&vcpu->vc_lock);
if (vcpu->vc_state != VCPU_STATE_STOPPED)
ret = EBUSY;
else {
if (vcpu_reset_regs(vcpu, &vrp->vrp_init_state)) {
printf("%s: failed\n", __func__);
#ifdef VMM_DEBUG
dump_vcpu(vcpu);
#endif
ret = EIO;
}
}
rw_exit_write(&vcpu->vc_lock);
out:
refcnt_rele_wake(&vm->vm_refcnt);
return (ret);
}
int
vcpu_must_stop(struct vcpu *vcpu)
{
struct proc *p = curproc;
if (vcpu->vc_state == VCPU_STATE_REQTERM)
return (1);
if (SIGPENDING(p) != 0)
return (1);
return (0);
}
int
vm_share_mem(struct vm_sharemem_params *vsp, struct proc *p)
{
int ret = EINVAL, unmap = 0;
size_t i, failed_uao = 0, n;
struct vm *vm;
struct vm_mem_range *src, *dst;
struct uvm_object *uao;
unsigned int uvmflags;
ret = vm_find(vsp->vsp_vm_id, &vm);
if (ret)
return (ret);
if (vm->vm_nmemranges != vsp->vsp_nmemranges)
goto out;
n = vm->vm_nmemranges;
for (i = 0; i < n; i++) {
src = &vm->vm_memranges[i];
dst = &vsp->vsp_memranges[i];
if (src->vmr_type != dst->vmr_type)
goto out;
if (src->vmr_gpa != dst->vmr_gpa)
goto out;
if (src->vmr_size != dst->vmr_size)
goto out;
if (vsp->vsp_va[i] != 0)
goto out;
}
uvmflags = UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
MAP_INHERIT_NONE, MADV_NORMAL, UVM_FLAG_CONCEAL);
for (i = 0; i < n; i++) {
dst = &vsp->vsp_memranges[i];
if (dst->vmr_type == VM_MEM_MMIO)
continue;
uao = vm->vm_memory_slot[i];
KASSERT(uao != NULL);
ret = uvm_map(&p->p_p->ps_vmspace->vm_map, &vsp->vsp_va[i],
dst->vmr_size, uao, 0, 0, uvmflags);
if (ret) {
printf("%s: uvm_map failed: %d\n", __func__, ret);
unmap = (i > 0) ? 1 : 0;
failed_uao = i;
goto out;
}
uao_reference(uao);
ret = uvm_map_immutable(&p->p_p->ps_vmspace->vm_map,
vsp->vsp_va[i], vsp->vsp_va[i] + dst->vmr_size, 1);
if (ret) {
printf("%s: uvm_map_immutable failed: %d\n",
__func__, ret);
unmap = 1;
failed_uao = i + 1;
goto out;
}
}
ret = 0;
out:
if (unmap) {
for (i = 0; i < failed_uao; i++) {
dst = &vsp->vsp_memranges[i];
uvm_unmap(&p->p_p->ps_vmspace->vm_map,
vsp->vsp_va[i], vsp->vsp_va[i] + dst->vmr_size);
}
}
refcnt_rele_wake(&vm->vm_refcnt);
return (ret);
}