#include <sys/param.h>
#include <sys/kmem.h>
#include <sys/thread.h>
#include <sys/list.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/ddi.h>
#include <sys/sysmacros.h>
#include <sys/machsystm.h>
#include <sys/vmsystm.h>
#include <sys/x86_archext.h>
#include <vm/as.h>
#include <vm/hat_i86.h>
#include <vm/seg_vn.h>
#include <vm/seg_kmem.h>
#include <sys/vmm_vm.h>
#include <sys/seg_vmm.h>
#include <sys/vmm_kernel.h>
#include <sys/vmm_reservoir.h>
#include <sys/vmm_gpt.h>
#include "vmm_util.h"
typedef struct vmspace_mapping {
list_node_t vmsm_node;
vm_object_t *vmsm_object;
uintptr_t vmsm_addr;
size_t vmsm_len;
off_t vmsm_offset;
uint_t vmsm_prot;
} vmspace_mapping_t;
#define VMSM_OFFSET(vmsm, addr) ( \
(vmsm)->vmsm_offset + \
((addr) - (uintptr_t)(vmsm)->vmsm_addr))
typedef enum vm_client_state {
VCS_IDLE = 0,
VCS_ACTIVE = (1 << 0),
VCS_HOLD = (1 << 1),
VCS_ON_CPU = (1 << 2),
VCS_ORPHANED = (1 << 3),
VCS_DESTROY = (1 << 4),
} vm_client_state_t;
struct vmspace {
kmutex_t vms_lock;
kcondvar_t vms_cv;
bool vms_held;
uintptr_t vms_size;
vmm_gpt_t *vms_gpt;
uint64_t vms_pt_gen;
uint64_t vms_pages_mapped;
bool vms_track_dirty;
list_t vms_maplist;
list_t vms_clients;
};
struct vm_client {
vmspace_t *vmc_space;
list_node_t vmc_node;
kmutex_t vmc_lock;
kcondvar_t vmc_cv;
vm_client_state_t vmc_state;
int vmc_cpu_active;
uint64_t vmc_cpu_gen;
bool vmc_track_dirty;
vmc_inval_cb_t vmc_inval_func;
void *vmc_inval_data;
list_t vmc_held_pages;
};
typedef enum vm_object_type {
VMOT_NONE,
VMOT_MEM,
VMOT_MMIO,
} vm_object_type_t;
struct vm_object {
uint_t vmo_refcnt;
vm_object_type_t vmo_type;
size_t vmo_size;
void *vmo_data;
uint8_t vmo_attr;
};
#define VPF_ALL (VPF_DEFER_DIRTY)
struct vm_page {
vm_client_t *vmp_client;
list_node_t vmp_node;
vm_page_t *vmp_chain;
uintptr_t vmp_gpa;
pfn_t vmp_pfn;
uint64_t *vmp_ptep;
vm_object_t *vmp_obj_ref;
uint8_t vmp_prot;
uint8_t vmp_flags;
};
static vmspace_mapping_t *vm_mapping_find(vmspace_t *, uintptr_t, size_t);
static void vmspace_hold_enter(vmspace_t *);
static void vmspace_hold_exit(vmspace_t *, bool);
static void vmspace_clients_invalidate(vmspace_t *, uintptr_t, size_t);
static int vmspace_ensure_mapped(vmspace_t *, uintptr_t, int, pfn_t *,
uint64_t *);
static void vmc_space_hold(vm_client_t *);
static void vmc_space_release(vm_client_t *, bool);
static void vmc_space_invalidate(vm_client_t *, uintptr_t, size_t, uint64_t);
static void vmc_space_unmap(vm_client_t *, uintptr_t, size_t, vm_object_t *);
static vm_client_t *vmc_space_orphan(vm_client_t *, vmspace_t *);
bool
vmm_vm_init(void)
{
if (vmm_is_intel()) {
extern struct vmm_pte_impl ept_pte_impl;
return (vmm_gpt_init(&ept_pte_impl));
} else if (vmm_is_svm()) {
extern struct vmm_pte_impl rvi_pte_impl;
return (vmm_gpt_init(&rvi_pte_impl));
} else {
panic("Unexpected hypervisor hardware vendor");
}
}
void
vmm_vm_fini(void)
{
vmm_gpt_fini();
}
vmspace_t *
vmspace_alloc(size_t end)
{
vmspace_t *vms;
const uintptr_t size = end + 1;
VERIFY(size > 0 && (size & PAGEOFFSET) == 0 &&
size <= (uintptr_t)USERLIMIT);
vms = kmem_zalloc(sizeof (*vms), KM_SLEEP);
vms->vms_size = size;
list_create(&vms->vms_maplist, sizeof (vmspace_mapping_t),
offsetof(vmspace_mapping_t, vmsm_node));
list_create(&vms->vms_clients, sizeof (vm_client_t),
offsetof(vm_client_t, vmc_node));
vms->vms_gpt = vmm_gpt_alloc();
vms->vms_pt_gen = 1;
vms->vms_track_dirty = false;
return (vms);
}
void
vmspace_destroy(vmspace_t *vms)
{
mutex_enter(&vms->vms_lock);
VERIFY(list_is_empty(&vms->vms_maplist));
if (!list_is_empty(&vms->vms_clients)) {
vm_client_t *vmc = list_head(&vms->vms_clients);
while (vmc != NULL) {
vmc = vmc_space_orphan(vmc, vms);
}
while (!list_is_empty(&vms->vms_clients)) {
cv_wait(&vms->vms_cv, &vms->vms_lock);
}
}
VERIFY(list_is_empty(&vms->vms_clients));
vmm_gpt_free(vms->vms_gpt);
mutex_exit(&vms->vms_lock);
mutex_destroy(&vms->vms_lock);
cv_destroy(&vms->vms_cv);
list_destroy(&vms->vms_maplist);
list_destroy(&vms->vms_clients);
kmem_free(vms, sizeof (*vms));
}
uint64_t
vmspace_resident_count(vmspace_t *vms)
{
return (vms->vms_pages_mapped);
}
void
vmspace_bits_operate(vmspace_t *vms, const uint64_t gpa, size_t len,
vmspace_bit_oper_t oper, uint8_t *bitmap)
{
const bool bit_input = (oper & VBO_FLAG_BITMAP_IN) != 0;
const bool bit_output = (oper & VBO_FLAG_BITMAP_OUT) != 0;
const vmspace_bit_oper_t oper_only =
oper & ~(VBO_FLAG_BITMAP_IN | VBO_FLAG_BITMAP_OUT);
vmm_gpt_t *gpt = vms->vms_gpt;
ASSERT(bitmap != NULL || (!bit_input && !bit_output));
vmm_gpt_iter_t iter;
vmm_gpt_iter_entry_t entry;
vmm_gpt_iter_init(&iter, gpt, gpa, len);
while (vmm_gpt_iter_next(&iter, &entry)) {
const size_t offset = (entry.vgie_gpa - gpa);
const uint64_t pfn_offset = offset >> PAGESHIFT;
const size_t bit_offset = pfn_offset / 8;
const uint8_t bit_mask = 1 << (pfn_offset % 8);
if (bit_input && (bitmap[bit_offset] & bit_mask) == 0) {
continue;
}
bool value = false;
uint64_t *ptep = entry.vgie_ptep;
if (ptep == NULL) {
if (bit_output) {
bitmap[bit_offset] &= ~bit_mask;
}
continue;
}
switch (oper_only) {
case VBO_GET_DIRTY:
value = vmm_gpte_query_dirty(ptep);
break;
case VBO_SET_DIRTY: {
uint_t prot = 0;
bool present_writable = false;
pfn_t pfn;
if (!vmm_gpte_is_mapped(ptep, &pfn, &prot)) {
int err = vmspace_ensure_mapped(vms,
entry.vgie_gpa, PROT_WRITE, &pfn, ptep);
if (err == 0) {
present_writable = true;
}
} else if ((prot & PROT_WRITE) != 0) {
present_writable = true;
}
if (present_writable) {
value = !vmm_gpte_reset_dirty(ptep, true);
}
break;
}
case VBO_RESET_DIRTY:
value = vmm_gpte_reset_dirty(ptep, false);
break;
default:
panic("unrecognized operator: %d", oper_only);
break;
}
if (bit_output) {
if (value) {
bitmap[bit_offset] |= bit_mask;
} else {
bitmap[bit_offset] &= ~bit_mask;
}
}
}
vmspace_hold_enter(vms);
vms->vms_pt_gen++;
vmspace_clients_invalidate(vms, gpa, len);
vmspace_hold_exit(vms, true);
}
bool
vmspace_get_tracking(vmspace_t *vms)
{
mutex_enter(&vms->vms_lock);
const bool val = vms->vms_track_dirty;
mutex_exit(&vms->vms_lock);
return (val);
}
int
vmspace_set_tracking(vmspace_t *vms, bool enable_dirty_tracking)
{
if (enable_dirty_tracking && !vmm_gpt_can_track_dirty(vms->vms_gpt)) {
return (ENOTSUP);
}
vmspace_hold_enter(vms);
if (vms->vms_track_dirty == enable_dirty_tracking) {
vmspace_hold_exit(vms, false);
return (0);
}
vms->vms_track_dirty = enable_dirty_tracking;
for (vm_client_t *vmc = list_head(&vms->vms_clients);
vmc != NULL;
vmc = list_next(&vms->vms_clients, vmc)) {
mutex_enter(&vmc->vmc_lock);
vmc->vmc_track_dirty = enable_dirty_tracking;
mutex_exit(&vmc->vmc_lock);
}
vms->vms_pt_gen++;
vmspace_clients_invalidate(vms, 0, vms->vms_size);
vmspace_hold_exit(vms, true);
return (0);
}
static pfn_t
vm_object_pager_reservoir(vm_object_t *vmo, uintptr_t off)
{
vmmr_region_t *region;
pfn_t pfn;
ASSERT3U(vmo->vmo_type, ==, VMOT_MEM);
region = vmo->vmo_data;
pfn = vmmr_region_pfn_at(region, off);
return (pfn);
}
static pfn_t
vm_object_pager_mmio(vm_object_t *vmo, uintptr_t off)
{
pfn_t pfn;
ASSERT3U(vmo->vmo_type, ==, VMOT_MMIO);
ASSERT3P(vmo->vmo_data, !=, NULL);
ASSERT3U(off, <, vmo->vmo_size);
pfn = ((uintptr_t)vmo->vmo_data + off) >> PAGESHIFT;
return (pfn);
}
vm_object_t *
vm_object_mem_allocate(size_t size, bool transient)
{
int err;
vmmr_region_t *region = NULL;
vm_object_t *vmo;
ASSERT3U(size, !=, 0);
ASSERT3U(size & PAGEOFFSET, ==, 0);
err = vmmr_alloc(size, transient, ®ion);
if (err != 0) {
return (NULL);
}
vmo = kmem_alloc(sizeof (*vmo), KM_SLEEP);
vmo->vmo_type = VMOT_MEM;
vmo->vmo_size = size;
vmo->vmo_attr = MTRR_TYPE_WB;
vmo->vmo_data = region;
vmo->vmo_refcnt = 1;
return (vmo);
}
static vm_object_t *
vm_object_mmio_allocate(size_t size, uintptr_t hpa)
{
vm_object_t *vmo;
ASSERT3U(size, !=, 0);
ASSERT3U(size & PAGEOFFSET, ==, 0);
ASSERT3U(hpa & PAGEOFFSET, ==, 0);
vmo = kmem_alloc(sizeof (*vmo), KM_SLEEP);
vmo->vmo_type = VMOT_MMIO;
vmo->vmo_size = size;
vmo->vmo_attr = MTRR_TYPE_UC;
vmo->vmo_data = (void *)hpa;
vmo->vmo_refcnt = 1;
return (vmo);
}
vm_object_t *
vmm_mmio_alloc(vmspace_t *vmspace, uintptr_t gpa, size_t len, uintptr_t hpa)
{
int error;
vm_object_t *obj;
obj = vm_object_mmio_allocate(len, hpa);
if (obj != NULL) {
error = vmspace_map(vmspace, obj, 0, gpa, len,
PROT_READ | PROT_WRITE);
if (error != 0) {
vm_object_release(obj);
obj = NULL;
}
}
return (obj);
}
void
vm_object_release(vm_object_t *vmo)
{
ASSERT(vmo != NULL);
uint_t ref = atomic_dec_uint_nv(&vmo->vmo_refcnt);
VERIFY3U(ref, !=, UINT_MAX);
if (ref != 0) {
return;
}
switch (vmo->vmo_type) {
case VMOT_MEM:
vmmr_free((vmmr_region_t *)vmo->vmo_data);
break;
case VMOT_MMIO:
break;
default:
panic("unexpected object type %u", vmo->vmo_type);
break;
}
vmo->vmo_data = NULL;
vmo->vmo_size = 0;
kmem_free(vmo, sizeof (*vmo));
}
void
vm_object_reference(vm_object_t *vmo)
{
ASSERT(vmo != NULL);
uint_t ref = atomic_inc_uint_nv(&vmo->vmo_refcnt);
VERIFY3U(ref, !=, 0);
}
pfn_t
vm_object_pfn(vm_object_t *vmo, uintptr_t off)
{
const uintptr_t aligned_off = off & PAGEMASK;
switch (vmo->vmo_type) {
case VMOT_MEM:
return (vm_object_pager_reservoir(vmo, aligned_off));
case VMOT_MMIO:
return (vm_object_pager_mmio(vmo, aligned_off));
case VMOT_NONE:
break;
}
panic("unexpected object type %u", vmo->vmo_type);
}
static vmspace_mapping_t *
vm_mapping_find(vmspace_t *vms, uintptr_t addr, size_t size)
{
vmspace_mapping_t *vmsm;
list_t *ml = &vms->vms_maplist;
const uintptr_t range_end = addr + size;
ASSERT3U(addr, <=, range_end);
if (addr >= vms->vms_size) {
return (NULL);
}
for (vmsm = list_head(ml); vmsm != NULL; vmsm = list_next(ml, vmsm)) {
const uintptr_t seg_end = vmsm->vmsm_addr + vmsm->vmsm_len;
if (addr >= vmsm->vmsm_addr && addr < seg_end) {
if (range_end <= seg_end) {
return (vmsm);
} else {
return (NULL);
}
}
}
return (NULL);
}
static bool
vm_mapping_gap(vmspace_t *vms, uintptr_t addr, size_t size)
{
vmspace_mapping_t *vmsm;
list_t *ml = &vms->vms_maplist;
const uintptr_t range_end = addr + size - 1;
ASSERT(MUTEX_HELD(&vms->vms_lock));
ASSERT(size > 0);
for (vmsm = list_head(ml); vmsm != NULL; vmsm = list_next(ml, vmsm)) {
const uintptr_t seg_end = vmsm->vmsm_addr + vmsm->vmsm_len - 1;
if (vmsm->vmsm_addr > range_end || addr > seg_end)
continue;
return (false);
}
return (true);
}
static void
vm_mapping_remove(vmspace_t *vms, vmspace_mapping_t *vmsm)
{
list_t *ml = &vms->vms_maplist;
ASSERT(MUTEX_HELD(&vms->vms_lock));
ASSERT(vms->vms_held);
list_remove(ml, vmsm);
vm_object_release(vmsm->vmsm_object);
kmem_free(vmsm, sizeof (*vmsm));
}
static void
vmspace_hold_enter(vmspace_t *vms)
{
mutex_enter(&vms->vms_lock);
VERIFY(!vms->vms_held);
vm_client_t *vmc = list_head(&vms->vms_clients);
for (; vmc != NULL; vmc = list_next(&vms->vms_clients, vmc)) {
vmc_space_hold(vmc);
}
vms->vms_held = true;
}
static void
vmspace_hold_exit(vmspace_t *vms, bool kick_on_cpu)
{
ASSERT(MUTEX_HELD(&vms->vms_lock));
VERIFY(vms->vms_held);
vm_client_t *vmc = list_head(&vms->vms_clients);
for (; vmc != NULL; vmc = list_next(&vms->vms_clients, vmc)) {
vmc_space_release(vmc, kick_on_cpu);
}
vms->vms_held = false;
mutex_exit(&vms->vms_lock);
}
static void
vmspace_clients_invalidate(vmspace_t *vms, uintptr_t gpa, size_t len)
{
ASSERT(MUTEX_HELD(&vms->vms_lock));
VERIFY(vms->vms_held);
for (vm_client_t *vmc = list_head(&vms->vms_clients);
vmc != NULL;
vmc = list_next(&vms->vms_clients, vmc)) {
vmc_space_invalidate(vmc, gpa, len, vms->vms_pt_gen);
}
}
int
vmspace_map(vmspace_t *vms, vm_object_t *vmo, uintptr_t obj_off, uintptr_t addr,
size_t len, uint8_t prot)
{
vmspace_mapping_t *vmsm;
int res = 0;
if (len == 0 || (addr + len) < addr ||
obj_off >= (obj_off + len) || vmo->vmo_size < (obj_off + len)) {
return (EINVAL);
}
if ((addr + len) >= vms->vms_size) {
return (ENOMEM);
}
vmsm = kmem_alloc(sizeof (*vmsm), KM_SLEEP);
vmspace_hold_enter(vms);
if (!vm_mapping_gap(vms, addr, len)) {
kmem_free(vmsm, sizeof (*vmsm));
res = ENOMEM;
} else {
vmsm->vmsm_object = vmo;
vmsm->vmsm_addr = addr;
vmsm->vmsm_len = len;
vmsm->vmsm_offset = (off_t)obj_off;
vmsm->vmsm_prot = prot;
list_insert_tail(&vms->vms_maplist, vmsm);
vmm_gpt_populate_region(vms->vms_gpt, addr, len);
}
vmspace_hold_exit(vms, false);
return (res);
}
int
vmspace_unmap(vmspace_t *vms, uintptr_t addr, uintptr_t len)
{
const uintptr_t end = addr + len;
vmspace_mapping_t *vmsm;
vm_client_t *vmc;
uint64_t gen = 0;
ASSERT3U(addr, <, end);
vmspace_hold_enter(vms);
if ((vmsm = vm_mapping_find(vms, addr, len)) == NULL ||
vmsm->vmsm_addr != addr || vmsm->vmsm_len != len) {
vmspace_hold_exit(vms, false);
return (ENOENT);
}
for (vmc = list_head(&vms->vms_clients); vmc != NULL;
vmc = list_next(&vms->vms_clients, vmc)) {
vmc_space_unmap(vmc, addr, len, vmsm->vmsm_object);
}
if (vmm_gpt_unmap_region(vms->vms_gpt, addr, len) != 0) {
vms->vms_pt_gen++;
gen = vms->vms_pt_gen;
}
vmm_gpt_vacate_region(vms->vms_gpt, addr, len);
if (gen != 0) {
vmspace_clients_invalidate(vms, addr, len);
}
vm_mapping_remove(vms, vmsm);
vmspace_hold_exit(vms, true);
return (0);
}
static int
vmspace_ensure_mapped(vmspace_t *vms, uintptr_t gpa, int req_prot, pfn_t *pfnp,
uint64_t *leaf_pte)
{
vmspace_mapping_t *vmsm;
vm_object_t *vmo;
pfn_t pfn;
ASSERT(pfnp != NULL);
ASSERT(leaf_pte != NULL);
vmsm = vm_mapping_find(vms, gpa, PAGESIZE);
if (vmsm == NULL) {
return (FC_NOMAP);
}
if ((req_prot & vmsm->vmsm_prot) != req_prot) {
return (FC_PROT);
}
vmo = vmsm->vmsm_object;
pfn = vm_object_pfn(vmo, VMSM_OFFSET(vmsm, gpa));
VERIFY(pfn != PFN_INVALID);
if (vmm_gpt_map_at(vms->vms_gpt, leaf_pte, pfn, vmsm->vmsm_prot,
vmo->vmo_attr)) {
atomic_inc_64(&vms->vms_pages_mapped);
}
*pfnp = pfn;
return (0);
}
static int
vmspace_lookup_map(vmspace_t *vms, uintptr_t gpa, int req_prot, pfn_t *pfnp,
uint64_t **ptepp)
{
vmm_gpt_t *gpt = vms->vms_gpt;
uint64_t *entries[MAX_GPT_LEVEL], *leaf;
pfn_t pfn = PFN_INVALID;
uint_t prot;
ASSERT0(gpa & PAGEOFFSET);
ASSERT((req_prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) != PROT_NONE);
(void) vmm_gpt_walk(gpt, gpa, entries, LEVEL1);
leaf = entries[LEVEL1];
if (leaf == NULL) {
return (FC_NOMAP);
}
if (vmm_gpte_is_mapped(leaf, &pfn, &prot)) {
if ((req_prot & prot) != req_prot) {
return (FC_PROT);
}
} else {
int err = vmspace_ensure_mapped(vms, gpa, req_prot, &pfn, leaf);
if (err != 0) {
return (err);
}
}
ASSERT(pfn != PFN_INVALID && leaf != NULL);
if (pfnp != NULL) {
*pfnp = pfn;
}
if (ptepp != NULL) {
*ptepp = leaf;
}
return (0);
}
int
vmspace_populate(vmspace_t *vms, uintptr_t addr, uintptr_t len)
{
ASSERT0(addr & PAGEOFFSET);
ASSERT0(len & PAGEOFFSET);
vmspace_mapping_t *vmsm;
mutex_enter(&vms->vms_lock);
if ((vmsm = vm_mapping_find(vms, addr, len)) == NULL) {
mutex_exit(&vms->vms_lock);
return (FC_NOMAP);
}
vm_object_t *vmo = vmsm->vmsm_object;
const int prot = vmsm->vmsm_prot;
const uint8_t attr = vmo->vmo_attr;
vmm_gpt_t *gpt = vms->vms_gpt;
size_t populated = 0;
vmm_gpt_iter_t iter;
vmm_gpt_iter_entry_t entry;
vmm_gpt_iter_init(&iter, gpt, addr, len);
while (vmm_gpt_iter_next(&iter, &entry)) {
const pfn_t pfn =
vm_object_pfn(vmo, VMSM_OFFSET(vmsm, entry.vgie_gpa));
VERIFY(pfn != PFN_INVALID);
if (vmm_gpt_map_at(gpt, entry.vgie_ptep, pfn, prot, attr)) {
populated++;
}
}
atomic_add_64(&vms->vms_pages_mapped, populated);
mutex_exit(&vms->vms_lock);
return (0);
}
vm_client_t *
vmspace_client_alloc(vmspace_t *vms)
{
vm_client_t *vmc;
vmc = kmem_zalloc(sizeof (vm_client_t), KM_SLEEP);
vmc->vmc_space = vms;
mutex_init(&vmc->vmc_lock, NULL, MUTEX_DRIVER, NULL);
cv_init(&vmc->vmc_cv, NULL, CV_DRIVER, NULL);
vmc->vmc_state = VCS_IDLE;
vmc->vmc_cpu_active = -1;
list_create(&vmc->vmc_held_pages, sizeof (vm_page_t),
offsetof(vm_page_t, vmp_node));
vmc->vmc_track_dirty = vms->vms_track_dirty;
mutex_enter(&vms->vms_lock);
list_insert_tail(&vms->vms_clients, vmc);
mutex_exit(&vms->vms_lock);
return (vmc);
}
uint64_t
vmspace_table_root(vmspace_t *vms)
{
return (vmm_gpt_get_pmtp(vms->vms_gpt, vms->vms_track_dirty));
}
uint64_t
vmspace_table_gen(vmspace_t *vms)
{
return (vms->vms_pt_gen);
}
static int
vmc_activate(vm_client_t *vmc)
{
mutex_enter(&vmc->vmc_lock);
VERIFY0(vmc->vmc_state & VCS_ACTIVE);
if ((vmc->vmc_state & VCS_ORPHANED) != 0) {
mutex_exit(&vmc->vmc_lock);
return (ENXIO);
}
while ((vmc->vmc_state & VCS_HOLD) != 0) {
cv_wait(&vmc->vmc_cv, &vmc->vmc_lock);
}
vmc->vmc_state |= VCS_ACTIVE;
return (0);
}
static void
vmc_deactivate(vm_client_t *vmc)
{
ASSERT(MUTEX_HELD(&vmc->vmc_lock));
VERIFY(vmc->vmc_state & VCS_ACTIVE);
vmc->vmc_state ^= VCS_ACTIVE;
if ((vmc->vmc_state & VCS_HOLD) != 0) {
cv_broadcast(&vmc->vmc_cv);
}
mutex_exit(&vmc->vmc_lock);
}
uint64_t
vmc_table_enter(vm_client_t *vmc)
{
vmspace_t *vms = vmc->vmc_space;
uint64_t gen;
ASSERT0(vmc->vmc_state & (VCS_ACTIVE | VCS_ON_CPU));
ASSERT3S(vmc->vmc_cpu_active, ==, -1);
gen = vms->vms_pt_gen;
vmc->vmc_cpu_active = CPU->cpu_id;
vmc->vmc_cpu_gen = gen;
atomic_or_uint(&vmc->vmc_state, VCS_ON_CPU);
return (gen);
}
void
vmc_table_exit(vm_client_t *vmc)
{
mutex_enter(&vmc->vmc_lock);
ASSERT(vmc->vmc_state & VCS_ON_CPU);
vmc->vmc_state ^= VCS_ON_CPU;
vmc->vmc_cpu_active = -1;
if ((vmc->vmc_state & VCS_HOLD) != 0) {
cv_broadcast(&vmc->vmc_cv);
}
mutex_exit(&vmc->vmc_lock);
}
static void
vmc_space_hold(vm_client_t *vmc)
{
mutex_enter(&vmc->vmc_lock);
VERIFY0(vmc->vmc_state & VCS_HOLD);
atomic_or_uint(&vmc->vmc_state, VCS_HOLD);
while ((vmc->vmc_state & VCS_ACTIVE) != 0) {
cv_wait(&vmc->vmc_cv, &vmc->vmc_lock);
}
mutex_exit(&vmc->vmc_lock);
}
static void
vmc_space_release(vm_client_t *vmc, bool kick_on_cpu)
{
mutex_enter(&vmc->vmc_lock);
VERIFY(vmc->vmc_state & VCS_HOLD);
if (kick_on_cpu && (vmc->vmc_state & VCS_ON_CPU) != 0) {
poke_cpu(vmc->vmc_cpu_active);
while ((vmc->vmc_state & VCS_ON_CPU) != 0) {
cv_wait(&vmc->vmc_cv, &vmc->vmc_lock);
}
}
atomic_and_uint(&vmc->vmc_state, ~VCS_HOLD);
cv_broadcast(&vmc->vmc_cv);
mutex_exit(&vmc->vmc_lock);
}
static void
vmc_space_invalidate(vm_client_t *vmc, uintptr_t addr, size_t size,
uint64_t gen)
{
mutex_enter(&vmc->vmc_lock);
VERIFY(vmc->vmc_state & VCS_HOLD);
if ((vmc->vmc_state & VCS_ON_CPU) != 0) {
if (vmc->vmc_cpu_gen < gen) {
poke_cpu(vmc->vmc_cpu_active);
while ((vmc->vmc_state & VCS_ON_CPU) != 0) {
cv_wait(&vmc->vmc_cv, &vmc->vmc_lock);
}
}
}
if (vmc->vmc_inval_func != NULL) {
vmc_inval_cb_t func = vmc->vmc_inval_func;
void *data = vmc->vmc_inval_data;
mutex_exit(&vmc->vmc_lock);
func(data, addr, size);
mutex_enter(&vmc->vmc_lock);
}
mutex_exit(&vmc->vmc_lock);
}
static void
vmc_space_unmap(vm_client_t *vmc, uintptr_t addr, size_t size,
vm_object_t *vmo)
{
mutex_enter(&vmc->vmc_lock);
VERIFY(vmc->vmc_state & VCS_HOLD);
VERIFY0(vmc->vmc_state & VCS_ON_CPU);
for (vm_page_t *vmp = list_head(&vmc->vmc_held_pages);
vmp != NULL;
vmp = list_next(&vmc->vmc_held_pages, vmc)) {
if (vmp->vmp_gpa < addr ||
vmp->vmp_gpa >= (addr + size)) {
continue;
}
if (vmp->vmp_obj_ref == NULL) {
vm_object_reference(vmo);
vmp->vmp_obj_ref = vmo;
vmp->vmp_ptep = NULL;
} else {
VERIFY3P(vmp->vmp_ptep, ==, NULL);
}
}
mutex_exit(&vmc->vmc_lock);
}
static vm_client_t *
vmc_space_orphan(vm_client_t *vmc, vmspace_t *vms)
{
vm_client_t *next;
ASSERT(MUTEX_HELD(&vms->vms_lock));
mutex_enter(&vmc->vmc_lock);
VERIFY3P(vmc->vmc_space, ==, vms);
VERIFY0(vmc->vmc_state & VCS_ORPHANED);
if (vmc->vmc_state & VCS_DESTROY) {
next = list_next(&vms->vms_clients, vmc);
} else {
for (vm_page_t *vmp = list_head(&vmc->vmc_held_pages);
vmp != NULL;
vmp = list_next(&vmc->vmc_held_pages, vmp)) {
ASSERT3P(vmp->vmp_ptep, ==, NULL);
ASSERT3P(vmp->vmp_obj_ref, !=, NULL);
}
vmc->vmc_state |= VCS_ORPHANED;
next = list_next(&vms->vms_clients, vmc);
list_remove(&vms->vms_clients, vmc);
vmc->vmc_space = NULL;
}
mutex_exit(&vmc->vmc_lock);
return (next);
}
vm_page_t *
vmc_hold_ext(vm_client_t *vmc, uintptr_t gpa, int prot, int flags)
{
vmspace_t *vms = vmc->vmc_space;
vm_page_t *vmp;
pfn_t pfn = PFN_INVALID;
uint64_t *ptep = NULL;
ASSERT0(gpa & PAGEOFFSET);
ASSERT((prot & (PROT_READ | PROT_WRITE)) != PROT_NONE);
ASSERT0(prot & ~PROT_ALL);
ASSERT0(flags & ~VPF_ALL);
vmp = kmem_alloc(sizeof (*vmp), KM_SLEEP);
if (vmc_activate(vmc) != 0) {
kmem_free(vmp, sizeof (*vmp));
return (NULL);
}
if (vmspace_lookup_map(vms, gpa, prot, &pfn, &ptep) != 0) {
vmc_deactivate(vmc);
kmem_free(vmp, sizeof (*vmp));
return (NULL);
}
ASSERT(pfn != PFN_INVALID && ptep != NULL);
vmp->vmp_client = vmc;
vmp->vmp_chain = NULL;
vmp->vmp_gpa = gpa;
vmp->vmp_pfn = pfn;
vmp->vmp_ptep = ptep;
vmp->vmp_obj_ref = NULL;
vmp->vmp_prot = (uint8_t)prot;
vmp->vmp_flags = (uint8_t)flags;
list_insert_tail(&vmc->vmc_held_pages, vmp);
vmc_deactivate(vmc);
return (vmp);
}
vm_page_t *
vmc_hold(vm_client_t *vmc, uintptr_t gpa, int prot)
{
return (vmc_hold_ext(vmc, gpa, prot, VPF_DEFAULT));
}
int
vmc_fault(vm_client_t *vmc, uintptr_t gpa, int prot)
{
vmspace_t *vms = vmc->vmc_space;
int err;
err = vmc_activate(vmc);
if (err == 0) {
err = vmspace_lookup_map(vms, gpa & PAGEMASK, prot, NULL, NULL);
vmc_deactivate(vmc);
}
return (err);
}
vm_client_t *
vmc_clone(vm_client_t *vmc)
{
vmspace_t *vms = vmc->vmc_space;
return (vmspace_client_alloc(vms));
}
int
vmc_set_inval_cb(vm_client_t *vmc, vmc_inval_cb_t func, void *data)
{
int err;
err = vmc_activate(vmc);
if (err == 0) {
vmc->vmc_inval_func = func;
vmc->vmc_inval_data = data;
vmc_deactivate(vmc);
}
return (err);
}
void
vmc_destroy(vm_client_t *vmc)
{
mutex_enter(&vmc->vmc_lock);
VERIFY(list_is_empty(&vmc->vmc_held_pages));
VERIFY0(vmc->vmc_state & (VCS_ACTIVE | VCS_ON_CPU));
if ((vmc->vmc_state & VCS_ORPHANED) == 0) {
vmspace_t *vms;
vmc->vmc_state |= VCS_DESTROY;
vms = vmc->vmc_space;
mutex_exit(&vmc->vmc_lock);
mutex_enter(&vms->vms_lock);
mutex_enter(&vmc->vmc_lock);
list_remove(&vms->vms_clients, vmc);
cv_signal(&vms->vms_cv);
mutex_exit(&vmc->vmc_lock);
mutex_exit(&vms->vms_lock);
} else {
VERIFY3P(vmc->vmc_space, ==, NULL);
mutex_exit(&vmc->vmc_lock);
}
mutex_destroy(&vmc->vmc_lock);
cv_destroy(&vmc->vmc_cv);
list_destroy(&vmc->vmc_held_pages);
kmem_free(vmc, sizeof (*vmc));
}
static __inline void *
vmp_ptr(const vm_page_t *vmp)
{
ASSERT3U(vmp->vmp_pfn, !=, PFN_INVALID);
const uintptr_t paddr = (vmp->vmp_pfn << PAGESHIFT);
return ((void *)((uintptr_t)kpm_vbase + paddr));
}
const void *
vmp_get_readable(const vm_page_t *vmp)
{
ASSERT(vmp->vmp_prot & PROT_READ);
return (vmp_ptr(vmp));
}
void *
vmp_get_writable(const vm_page_t *vmp)
{
ASSERT(vmp->vmp_prot & PROT_WRITE);
return (vmp_ptr(vmp));
}
pfn_t
vmp_get_pfn(const vm_page_t *vmp)
{
return (vmp->vmp_pfn);
}
void
vmp_mark_dirty(vm_page_t *vmp)
{
ASSERT((vmp->vmp_prot & PROT_WRITE) != 0);
atomic_and_8(&vmp->vmp_flags, ~VPF_DEFER_DIRTY);
}
void
vmp_chain(vm_page_t *vmp, vm_page_t *to_chain)
{
ASSERT3P(vmp->vmp_chain, ==, NULL);
vmp->vmp_chain = to_chain;
}
vm_page_t *
vmp_next(const vm_page_t *vmp)
{
return (vmp->vmp_chain);
}
static __inline bool
vmp_release_inner(vm_page_t *vmp, vm_client_t *vmc)
{
ASSERT(MUTEX_HELD(&vmc->vmc_lock));
bool was_unmapped = false;
list_remove(&vmc->vmc_held_pages, vmp);
if (vmp->vmp_obj_ref != NULL) {
ASSERT3P(vmp->vmp_ptep, ==, NULL);
vm_object_release(vmp->vmp_obj_ref);
was_unmapped = true;
} else {
ASSERT3P(vmp->vmp_ptep, !=, NULL);
if ((vmp->vmp_prot & PROT_WRITE) != 0 &&
(vmp->vmp_flags & VPF_DEFER_DIRTY) == 0 &&
vmc->vmc_track_dirty) {
(void) vmm_gpte_reset_dirty(vmp->vmp_ptep, true);
}
}
kmem_free(vmp, sizeof (*vmp));
return (was_unmapped);
}
bool
vmp_release(vm_page_t *vmp)
{
vm_client_t *vmc = vmp->vmp_client;
VERIFY(vmc != NULL);
mutex_enter(&vmc->vmc_lock);
const bool was_unmapped = vmp_release_inner(vmp, vmc);
mutex_exit(&vmc->vmc_lock);
return (was_unmapped);
}
bool
vmp_release_chain(vm_page_t *vmp)
{
vm_client_t *vmc = vmp->vmp_client;
bool any_unmapped = false;
ASSERT(vmp != NULL);
mutex_enter(&vmc->vmc_lock);
while (vmp != NULL) {
vm_page_t *next = vmp->vmp_chain;
ASSERT3P(vmp->vmp_client, ==, vmc);
if (vmp_release_inner(vmp, vmc)) {
any_unmapped = true;
}
vmp = next;
}
mutex_exit(&vmc->vmc_lock);
return (any_unmapped);
}
int
vm_segmap_obj(struct vm *vm, int segid, off_t segoff, off_t len,
struct as *as, caddr_t *addrp, uint_t prot, uint_t maxprot, uint_t flags)
{
vm_object_t *vmo;
int err;
if (segoff < 0 || len <= 0 ||
(segoff & PAGEOFFSET) != 0 || (len & PAGEOFFSET) != 0) {
return (EINVAL);
}
if ((prot & PROT_USER) == 0) {
return (ENOTSUP);
}
err = vm_get_memseg(vm, segid, NULL, NULL, &vmo);
if (err != 0) {
return (err);
}
VERIFY(segoff >= 0);
VERIFY(len <= vmo->vmo_size);
VERIFY((len + segoff) <= vmo->vmo_size);
if (vmo->vmo_type != VMOT_MEM) {
return (ENOTSUP);
}
as_rangelock(as);
err = choose_addr(as, addrp, (size_t)len, 0, ADDR_VACALIGN, flags);
if (err == 0) {
segvmm_crargs_t svma;
svma.prot = prot;
svma.offset = segoff;
svma.vmo = vmo;
svma.vmc = NULL;
err = as_map(as, *addrp, (size_t)len, segvmm_create, &svma);
}
as_rangeunlock(as);
return (err);
}
int
vm_segmap_space(struct vm *vm, off_t off, struct as *as, caddr_t *addrp,
off_t len, uint_t prot, uint_t maxprot, uint_t flags)
{
const uintptr_t gpa = (uintptr_t)off;
const size_t size = (uintptr_t)len;
int err;
if (off < 0 || len <= 0 ||
(gpa & PAGEOFFSET) != 0 || (size & PAGEOFFSET) != 0) {
return (EINVAL);
}
if ((prot & PROT_USER) == 0) {
return (ENOTSUP);
}
as_rangelock(as);
err = choose_addr(as, addrp, size, off, ADDR_VACALIGN, flags);
if (err == 0) {
segvmm_crargs_t svma;
svma.prot = prot;
svma.offset = gpa;
svma.vmo = NULL;
svma.vmc = vmspace_client_alloc(vm_get_vmspace(vm));
err = as_map(as, *addrp, len, segvmm_create, &svma);
}
as_rangeunlock(as);
return (err);
}