#include <sys/param.h>
#include <sys/systm.h>
#include <sys/atomic.h>
#include <sys/proc.h>
#include <sys/pool.h>
#include <sys/user.h>
#include <sys/mutex.h>
#include <uvm/uvm.h>
#include <machine/cpu.h>
#ifdef MULTIPROCESSOR
#include <machine/i82489reg.h>
#include <machine/i82489var.h>
#endif
#include "vmm.h"
#if NVMM > 0
#include <machine/vmmvar.h>
#endif
#include "acpi.h"
#ifdef PMAP_DEBUG
#define DPRINTF(x...) do { printf(x); } while(0)
#else
#define DPRINTF(x...)
#endif
long nkptp[] = NKPTP_INITIALIZER;
const vaddr_t ptp_masks[] = PTP_MASK_INITIALIZER;
const int ptp_shifts[] = PTP_SHIFT_INITIALIZER;
const long nkptpmax[] = NKPTPMAX_INITIALIZER;
const long nbpd[] = NBPD_INITIALIZER;
pd_entry_t *const normal_pdes[] = PDES_INITIALIZER;
#define pmap_pte_set(p, n) atomic_swap_64(p, n)
#define pmap_pte_clearbits(p, b) x86_atomic_clearbits_u64(p, b)
#define pmap_pte_setbits(p, b) x86_atomic_setbits_u64(p, b)
struct pmap kernel_pmap_store;
pt_entry_t pg_nx = 0;
pt_entry_t pg_g_kern = 0;
pt_entry_t pg_xo;
pt_entry_t pg_crypt = 0;
pt_entry_t pg_frame = PG_FRAME;
pt_entry_t pg_lgframe = PG_LGFRAME;
int pmap_pg_wc = PG_UCMINUS;
#if PCID_KERN != 0
# error "pmap.c assumes PCID_KERN is zero"
#endif
int pmap_use_pcid;
static u_int cr3_pcid_proc;
static u_int cr3_pcid_temp;
paddr_t cr3_reuse_pcid;
paddr_t cr3_pcid_proc_intel;
pt_entry_t protection_codes[8];
int pmap_initialized = 0;
struct pool pmap_pv_pool;
struct pmap_head pmaps;
struct mutex pmaps_lock = MUTEX_INITIALIZER(IPL_VM);
struct pool pmap_pmap_pool;
TAILQ_HEAD(pg_to_free, vm_page);
struct pool pmap_pdp_pool;
void pmap_pdp_ctor(pd_entry_t *);
void pmap_pdp_ctor_intel(pd_entry_t *);
extern vaddr_t msgbuf_vaddr;
extern paddr_t msgbuf_paddr;
extern vaddr_t idt_vaddr;
extern paddr_t idt_paddr;
extern vaddr_t lo32_vaddr;
extern vaddr_t lo32_paddr;
vaddr_t virtual_avail;
extern int end;
void pmap_enter_pv(struct vm_page *, struct pv_entry *, struct pmap *,
vaddr_t, struct vm_page *);
struct vm_page *pmap_get_ptp(struct pmap *, vaddr_t);
struct vm_page *pmap_find_ptp(struct pmap *, vaddr_t, paddr_t, int);
int pmap_find_pte_direct(struct pmap *pm, vaddr_t va, pt_entry_t **pd, int *offs);
void pmap_free_ptp(struct pmap *, struct vm_page *,
vaddr_t, struct pg_to_free *);
void pmap_freepage(struct pmap *, struct vm_page *, int, struct pg_to_free *);
#ifdef MULTIPROCESSOR
static int pmap_is_active(struct pmap *, struct cpu_info *);
#endif
paddr_t pmap_map_ptes(struct pmap *);
struct pv_entry *pmap_remove_pv(struct vm_page *, struct pmap *, vaddr_t);
void pmap_do_remove(struct pmap *, vaddr_t, vaddr_t, int);
#if NVMM > 0
void pmap_remove_ept(struct pmap *, vaddr_t, vaddr_t);
void pmap_do_remove_ept(struct pmap *, vaddr_t);
int pmap_enter_ept(struct pmap *, vaddr_t, paddr_t, vm_prot_t);
void pmap_shootept(struct pmap *, int);
#endif
int pmap_remove_pte(struct pmap *, struct vm_page *, pt_entry_t *,
vaddr_t, int, struct pv_entry **);
void pmap_remove_ptes(struct pmap *, struct vm_page *, vaddr_t,
vaddr_t, vaddr_t, int, struct pv_entry **);
#define PMAP_REMOVE_ALL 0
#define PMAP_REMOVE_SKIPWIRED 1
void pmap_unmap_ptes(struct pmap *, paddr_t);
int pmap_get_physpage(vaddr_t, int, paddr_t *);
int pmap_pdes_valid(vaddr_t, pd_entry_t *);
void pmap_alloc_level(vaddr_t, int, long *);
static inline
void pmap_sync_flags_pte(struct vm_page *, u_long);
void pmap_tlb_shootpage(struct pmap *, vaddr_t, int);
void pmap_tlb_shootrange(struct pmap *, vaddr_t, vaddr_t, int);
void pmap_tlb_shoottlb(struct pmap *, int);
#ifdef MULTIPROCESSOR
void pmap_tlb_shootwait(void);
#else
#define pmap_tlb_shootwait() do { } while (0)
#endif
static inline int
pmap_is_curpmap(struct pmap *pmap)
{
return((pmap == pmap_kernel()) ||
(pmap->pm_pdirpa == (rcr3() & CR3_PADDR)));
}
#ifdef MULTIPROCESSOR
static inline int
pmap_is_active(struct pmap *pmap, struct cpu_info *ci)
{
return (pmap == pmap_kernel() || pmap == ci->ci_proc_pmap
#if NVMM > 0
|| (pmap_is_ept(pmap) && pmap == ci->ci_ept_pmap)
#endif
);
}
#endif
static inline u_int
pmap_pte2flags(u_long pte)
{
return (((pte & PG_U) ? PG_PMAP_REF : 0) |
((pte & PG_M) ? PG_PMAP_MOD : 0));
}
static inline void
pmap_sync_flags_pte(struct vm_page *pg, u_long pte)
{
if (pte & (PG_U|PG_M)) {
atomic_setbits_int(&pg->pg_flags, pmap_pte2flags(pte));
}
}
paddr_t
pmap_map_ptes(struct pmap *pmap)
{
paddr_t cr3;
KASSERT(!pmap_is_ept(pmap));
if (pmap == pmap_kernel())
return 0;
mtx_enter(&pmap->pm_mtx);
cr3 = rcr3();
KASSERT((cr3 & CR3_PCID) == PCID_KERN ||
(cr3 & CR3_PCID) == PCID_PROC);
if (pmap->pm_pdirpa == (cr3 & CR3_PADDR))
cr3 = 0;
else {
cr3 |= cr3_reuse_pcid;
lcr3(pmap->pm_pdirpa | cr3_pcid_temp);
}
return cr3;
}
void
pmap_unmap_ptes(struct pmap *pmap, paddr_t save_cr3)
{
if (pmap != pmap_kernel())
mtx_leave(&pmap->pm_mtx);
if (save_cr3 != 0)
lcr3(save_cr3);
}
int
pmap_find_pte_direct(struct pmap *pm, vaddr_t va, pt_entry_t **pd, int *offs)
{
u_long mask, shift;
pd_entry_t pde;
paddr_t pdpa;
int lev;
pdpa = pm->pm_pdirpa;
shift = L4_SHIFT;
mask = L4_MASK;
for (lev = PTP_LEVELS; lev > 0; lev--) {
*pd = (pd_entry_t *)PMAP_DIRECT_MAP(pdpa);
*offs = (VA_SIGN_POS(va) & mask) >> shift;
pde = (*pd)[*offs];
if ((pde & (PG_PS|PG_V)) != PG_V)
return (lev - 1);
pdpa = ((*pd)[*offs] & pg_frame);
shift -= 9;
mask >>= 9;
}
return (0);
}
void
pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot)
{
pt_entry_t *pte, opte, npte;
pte = kvtopte(va);
npte = (pa & PMAP_PA_MASK) | ((prot & PROT_WRITE) ? PG_RW : PG_RO) |
((pa & PMAP_NOCACHE) ? PG_N : 0) |
((pa & PMAP_WC) ? pmap_pg_wc : 0) | PG_V |
((pa & PMAP_NOCRYPT) ? 0 : pg_crypt);
if (va >= (vaddr_t)NBPD_L2)
npte |= pg_g_kern;
if (!(prot & PROT_EXEC))
npte |= pg_nx;
opte = pmap_pte_set(pte, npte);
#ifdef LARGEPAGES
if (opte & PG_PS)
panic("%s: PG_PS", __func__);
#endif
if (pmap_valid_entry(opte)) {
if ((pa & PMAP_NOCACHE && (opte & PG_N) == 0) ||
(pa & PMAP_NOCRYPT))
wbinvd_on_all_cpus();
pmap_tlb_shootpage(pmap_kernel(), va, 1);
pmap_tlb_shootwait();
}
}
void
pmap_kremove(vaddr_t sva, vsize_t len)
{
pt_entry_t *pte, opte;
vaddr_t va, eva;
eva = sva + len;
for (va = sva; va != eva; va += PAGE_SIZE) {
pte = kvtopte(va);
opte = pmap_pte_set(pte, 0);
#ifdef LARGEPAGES
KASSERT((opte & PG_PS) == 0);
#endif
KASSERT((opte & PG_PVLIST) == 0);
}
pmap_tlb_shootrange(pmap_kernel(), sva, eva, 1);
pmap_tlb_shootwait();
}
vaddr_t
pmap_set_pml4_early(paddr_t pa)
{
extern paddr_t early_pte_pages;
pt_entry_t *pml4e, *pte;
int i, j, off;
paddr_t curpa;
vaddr_t va;
pml4e = (pt_entry_t *)(proc0.p_addr->u_pcb.pcb_cr3 + KERNBASE);
pml4e[PDIR_SLOT_EARLY] = (pd_entry_t)early_pte_pages | PG_V | PG_RW |
pg_crypt;
off = pa & PAGE_MASK_L2;
curpa = pa & L2_FRAME;
pte = (pt_entry_t *)PMAP_DIRECT_MAP(early_pte_pages);
memset(pte, 0, 3 * NBPG);
pte[0] = (early_pte_pages + NBPG) | PG_V | PG_RW | pg_crypt;
pte[1] = (early_pte_pages + 2 * NBPG) | PG_V | PG_RW | pg_crypt;
pte = (pt_entry_t *)PMAP_DIRECT_MAP(early_pte_pages + NBPG);
for (i = 0; i < 2; i++) {
for (j = 0; j < 512; j++) {
pte[(i * 512) + j] = curpa | PG_V | PG_RW | PG_PS |
pg_crypt;
curpa += (2 * 1024 * 1024);
}
}
va = (vaddr_t)((PDIR_SLOT_EARLY * 512ULL) << L3_SHIFT) + off;
return VA_SIGN_NEG(va);
}
void
pmap_clear_pml4_early(void)
{
extern paddr_t early_pte_pages;
pt_entry_t *pml4e, *pte;
pte = (pt_entry_t *)PMAP_DIRECT_MAP(early_pte_pages);
memset(pte, 0, 3 * NBPG);
pml4e = (pd_entry_t *)pmap_kernel()->pm_pdir;
pml4e[PDIR_SLOT_EARLY] = 0;
tlbflush();
}
paddr_t
pmap_bootstrap(paddr_t first_avail, paddr_t max_pa)
{
vaddr_t kva_start = VM_MIN_KERNEL_ADDRESS;
struct pmap *kpm;
int curslot, i, j, p;
long ndmpdp;
paddr_t dmpd, dmpdp, start_cur, cur_pa;
vaddr_t kva, kva_end;
pt_entry_t *pml3, *pml2;
KASSERT(((0x1000ULL | pg_crypt) & pg_frame) == 0x1000ULL);
virtual_avail = kva_start;
if (cpuid_level >= 0x7) {
uint32_t ecx, dummy;
CPUID_LEAF(0x7, 0, dummy, dummy, ecx, dummy);
if (ecx & SEFF0ECX_PKU) {
lcr4(rcr4() | CR4_PKE);
pg_xo = PG_XO;
}
}
protection_codes[PROT_NONE] = pg_nx;
protection_codes[PROT_EXEC] = pg_xo;
protection_codes[PROT_READ] = PG_RO | pg_nx;
protection_codes[PROT_READ | PROT_EXEC] = PG_RO;
protection_codes[PROT_WRITE] = PG_RW | pg_nx;
protection_codes[PROT_WRITE | PROT_EXEC] = PG_RW;
protection_codes[PROT_WRITE | PROT_READ] = PG_RW | pg_nx;
protection_codes[PROT_READ | PROT_WRITE | PROT_EXEC] = PG_RW;
kpm = pmap_kernel();
for (i = 0; i < PTP_LEVELS - 1; i++) {
uvm_obj_init(&kpm->pm_obj[i], &pmap_pager, 1);
kpm->pm_ptphint[i] = NULL;
}
memset(&kpm->pm_list, 0, sizeof(kpm->pm_list));
kpm->pm_pdir = (pd_entry_t *)(proc0.p_addr->u_pcb.pcb_cr3 + KERNBASE);
kpm->pm_pdirpa = proc0.p_addr->u_pcb.pcb_cr3;
kpm->pm_stats.wired_count = kpm->pm_stats.resident_count =
atop(kva_start - VM_MIN_KERNEL_ADDRESS);
kpm->pm_type = PMAP_TYPE_NORMAL;
curpcb->pcb_pmap = kpm;
if ((cpu_ecxfeature & CPUIDECX_PCID) && cpuid_level >= 0x07) {
uint32_t ebx, dummy;
CPUID_LEAF(0x7, 0, dummy, ebx, dummy, dummy);
if (ebx & SEFF0EBX_INVPCID) {
pmap_use_pcid = 1;
pg_g_kern = 0;
lcr4( rcr4() | CR4_PCIDE );
cr3_pcid_proc = PCID_PROC;
cr3_pcid_temp = PCID_TEMP;
cr3_reuse_pcid = CR3_REUSE_PCID;
cr3_pcid_proc_intel = PCID_PROC_INTEL;
}
}
#if KERNBASE == VM_MIN_KERNEL_ADDRESS
for (kva = VM_MIN_KERNEL_ADDRESS ; kva < virtual_avail ;
#else
kva_end = roundup((vaddr_t)&end, PAGE_SIZE);
for (kva = KERNBASE; kva < kva_end ;
#endif
kva += PAGE_SIZE) {
unsigned long p1i = pl1_i(kva);
if (pmap_valid_entry(PTE_BASE[p1i]))
PTE_BASE[p1i] |= pg_g_kern;
}
ndmpdp = (max_pa + NBPD_L3 - 1) >> L3_SHIFT;
if (ndmpdp < NDML2_ENTRIES)
ndmpdp = NDML2_ENTRIES;
if (ndmpdp > 512)
ndmpdp = 512;
dmpdp = kpm->pm_pdir[PDIR_SLOT_DIRECT] & pg_frame;
dmpd = first_avail; first_avail += ndmpdp * PAGE_SIZE;
memset((void *)PMAP_DIRECT_MAP(dmpd), 0, ndmpdp * PAGE_SIZE);
for (i = NDML2_ENTRIES; i < NPDPG * ndmpdp; i++) {
paddr_t pdp;
vaddr_t va;
pdp = (paddr_t)&(((pd_entry_t *)dmpd)[i]);
va = PMAP_DIRECT_MAP(pdp);
*((pd_entry_t *)va) = ((paddr_t)i << L2_SHIFT);
*((pd_entry_t *)va) |= PG_RW | PG_V | PG_PS | pg_g_kern | PG_U |
PG_M | pg_nx | pg_crypt;
}
for (i = NDML2_ENTRIES; i < ndmpdp; i++) {
paddr_t pdp;
vaddr_t va;
pdp = (paddr_t)&(((pd_entry_t *)dmpdp)[i]);
va = PMAP_DIRECT_MAP(pdp);
*((pd_entry_t *)va) = dmpd + (i << PAGE_SHIFT);
*((pd_entry_t *)va) |= PG_RW | PG_V | PG_U | PG_M | pg_nx |
pg_crypt;
}
kpm->pm_pdir[PDIR_SLOT_DIRECT] = dmpdp | PG_V | PG_KW | PG_U |
PG_M | pg_nx | pg_crypt;
for (curslot = 1 ; curslot < NUM_L4_SLOT_DIRECT ; curslot++) {
start_cur = (paddr_t)(curslot * NBPD_L4);
if (max_pa > start_cur) {
dmpd = first_avail; first_avail += PAGE_SIZE;
pml3 = (pt_entry_t *)PMAP_DIRECT_MAP(dmpd);
memset(pml3, 0, PAGE_SIZE);
kpm->pm_pdir[PDIR_SLOT_DIRECT + curslot] = dmpd |
PG_KW | PG_V | PG_U | PG_M | pg_nx | pg_crypt;
p = ((max_pa - start_cur) >> L3_SHIFT);
if (max_pa & L2_MASK)
p++;
if (p > NPDPG)
p = NPDPG;
for (i = 0; i < p; i++) {
dmpd = first_avail; first_avail += PAGE_SIZE;
pml2 = (pt_entry_t *)PMAP_DIRECT_MAP(dmpd);
memset(pml2, 0, PAGE_SIZE);
pml3[i] = dmpd |
PG_RW | PG_V | PG_U | PG_M | pg_nx |
pg_crypt;
cur_pa = start_cur + (i << L3_SHIFT);
j = 0;
while (cur_pa < max_pa && j < NPDPG) {
pml2[j] = curslot * NBPD_L4 +
(uint64_t)i * NBPD_L3 +
(uint64_t)j * NBPD_L2;
pml2[j] |= PG_RW | PG_V | pg_g_kern |
PG_U | PG_M | pg_nx | PG_PS |
pg_crypt;
cur_pa += NBPD_L2;
j++;
}
}
}
}
tlbflush();
msgbuf_vaddr = virtual_avail;
virtual_avail += round_page(MSGBUFSIZE);
idt_vaddr = virtual_avail;
virtual_avail += 2 * PAGE_SIZE;
idt_paddr = first_avail;
first_avail += 2 * PAGE_SIZE;
#if defined(MULTIPROCESSOR) || \
(NACPI > 0 && !defined(SMALL_KERNEL))
lo32_vaddr = virtual_avail;
virtual_avail += PAGE_SIZE;
lo32_paddr = first_avail;
first_avail += PAGE_SIZE;
#endif
LIST_INIT(&pmaps);
pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, IPL_VM, 0,
"pmappl", NULL);
pool_init(&pmap_pv_pool, sizeof(struct pv_entry), 0, IPL_VM, 0,
"pvpl", &pool_allocator_single);
pool_sethiwat(&pmap_pv_pool, 32 * 1024);
pool_init(&pmap_pdp_pool, PAGE_SIZE, 0, IPL_VM, 0,
"pdppl", &pool_allocator_single);
kpm->pm_pdir_intel = NULL;
kpm->pm_pdirpa_intel = 0;
tlbflush();
return first_avail;
}
void
pmap_init_percpu(void)
{
pool_cache_init(&pmap_pv_pool);
}
void
pmap_randomize(void)
{
pd_entry_t *pml4va, *oldpml4va;
paddr_t pml4pa;
int i;
pml4va = km_alloc(PAGE_SIZE, &kv_page, &kp_zero, &kd_nowait);
if (pml4va == NULL)
panic("%s: km_alloc failed", __func__);
oldpml4va = pmap_kernel()->pm_pdir;
memcpy(pml4va, oldpml4va, PAGE_SIZE);
pmap_extract(pmap_kernel(), (vaddr_t)pml4va, &pml4pa);
lcr3(pml4pa);
pmap_kernel()->pm_pdirpa = pml4pa;
pmap_kernel()->pm_pdir = pml4va;
proc0.p_addr->u_pcb.pcb_cr3 = pml4pa;
pml4va[PDIR_SLOT_PTE] = pml4pa | (pml4va[PDIR_SLOT_PTE] & ~pg_frame);
for (i = 0; i < NPDPG; i++) {
if (i == PDIR_SLOT_PTE)
continue;
if (pml4va[i] & pg_frame)
pmap_randomize_level(&pml4va[i], 3);
}
memset(oldpml4va, 0, PAGE_SIZE);
tlbflush();
}
void
pmap_randomize_level(pd_entry_t *pde, int level)
{
pd_entry_t *new_pd_va;
paddr_t old_pd_pa, new_pd_pa;
vaddr_t old_pd_va;
struct vm_page *pg;
int i;
if (level == 0)
return;
if (level < PTP_LEVELS - 1 && (*pde & PG_PS))
return;
new_pd_va = km_alloc(PAGE_SIZE, &kv_page, &kp_zero, &kd_nowait);
if (new_pd_va == NULL)
panic("%s: cannot allocate page for L%d page directory",
__func__, level);
old_pd_pa = *pde & pg_frame;
old_pd_va = PMAP_DIRECT_MAP(old_pd_pa);
pmap_extract(pmap_kernel(), (vaddr_t)new_pd_va, &new_pd_pa);
memcpy(new_pd_va, (void *)old_pd_va, PAGE_SIZE);
*pde = new_pd_pa | (*pde & ~pg_frame);
tlbflush();
memset((void *)old_pd_va, 0, PAGE_SIZE);
pg = PHYS_TO_VM_PAGE(old_pd_pa);
if (pg != NULL) {
pg->wire_count--;
pmap_kernel()->pm_stats.resident_count--;
if (pg->wire_count <= 1)
uvm_pagefree(pg);
}
for (i = 0; i < NPDPG; i++)
if (new_pd_va[i] & pg_frame)
pmap_randomize_level(&new_pd_va[i], level - 1);
}
paddr_t
pmap_prealloc_lowmem_ptps(paddr_t first_avail)
{
pd_entry_t *pdes;
int level;
paddr_t newp;
pdes = pmap_kernel()->pm_pdir;
level = PTP_LEVELS;
for (;;) {
newp = first_avail; first_avail += PAGE_SIZE;
memset((void *)PMAP_DIRECT_MAP(newp), 0, PAGE_SIZE);
pdes[pl_i(0, level)] =
(newp & pg_frame) | PG_V | PG_RW | pg_crypt;
level--;
if (level <= 1)
break;
pdes = normal_pdes[level - 2];
}
return first_avail;
}
void
pmap_init(void)
{
pmap_initialized = 1;
}
void
pmap_enter_pv(struct vm_page *pg, struct pv_entry *pve, struct pmap *pmap,
vaddr_t va, struct vm_page *ptp)
{
pve->pv_pmap = pmap;
pve->pv_va = va;
pve->pv_ptp = ptp;
mtx_enter(&pg->mdpage.pv_mtx);
pve->pv_next = pg->mdpage.pv_list;
pg->mdpage.pv_list = pve;
mtx_leave(&pg->mdpage.pv_mtx);
}
struct pv_entry *
pmap_remove_pv(struct vm_page *pg, struct pmap *pmap, vaddr_t va)
{
struct pv_entry *pve, **prevptr;
mtx_enter(&pg->mdpage.pv_mtx);
prevptr = &pg->mdpage.pv_list;
while ((pve = *prevptr) != NULL) {
if (pve->pv_pmap == pmap && pve->pv_va == va) {
*prevptr = pve->pv_next;
break;
}
prevptr = &pve->pv_next;
}
mtx_leave(&pg->mdpage.pv_mtx);
return(pve);
}
struct vm_page *
pmap_find_ptp(struct pmap *pmap, vaddr_t va, paddr_t pa, int level)
{
int lidx = level - 1;
struct vm_page *pg;
if (pa != (paddr_t)-1 && pmap->pm_ptphint[lidx] &&
pa == VM_PAGE_TO_PHYS(pmap->pm_ptphint[lidx]))
return (pmap->pm_ptphint[lidx]);
pg = uvm_pagelookup(&pmap->pm_obj[lidx], ptp_va2o(va, level));
return pg;
}
void
pmap_freepage(struct pmap *pmap, struct vm_page *ptp, int level,
struct pg_to_free *pagelist)
{
int lidx;
struct uvm_object *obj;
lidx = level - 1;
obj = &pmap->pm_obj[lidx];
pmap->pm_stats.resident_count--;
if (pmap->pm_ptphint[lidx] == ptp)
pmap->pm_ptphint[lidx] = RBT_ROOT(uvm_objtree, &obj->memt);
ptp->wire_count = 0;
uvm_pagerealloc(ptp, NULL, 0);
TAILQ_INSERT_TAIL(pagelist, ptp, pageq);
}
void
pmap_free_ptp(struct pmap *pmap, struct vm_page *ptp, vaddr_t va,
struct pg_to_free *pagelist)
{
unsigned long index;
int level;
vaddr_t invaladdr;
level = 1;
do {
pmap_freepage(pmap, ptp, level, pagelist);
index = pl_i(va, level + 1);
pmap_pte_set(&normal_pdes[level - 1][index], 0);
if (level == PTP_LEVELS - 1 && pmap->pm_pdir_intel != NULL) {
pmap_pte_set(&pmap->pm_pdir_intel[index], 0);
DPRINTF("%s: cleared meltdown PML4e @ index %lu "
"(va range start 0x%llx)\n", __func__, index,
(uint64_t)(index << L4_SHIFT));
}
invaladdr = level == 1 ? (vaddr_t)PTE_BASE :
(vaddr_t)normal_pdes[level - 2];
pmap_tlb_shootpage(pmap, invaladdr + index * PAGE_SIZE,
pmap_is_curpmap(curpcb->pcb_pmap));
if (level < PTP_LEVELS - 1) {
ptp = pmap_find_ptp(pmap, va, (paddr_t)-1, level + 1);
ptp->wire_count--;
if (ptp->wire_count > 1)
break;
}
} while (++level < PTP_LEVELS);
}
struct vm_page *
pmap_get_ptp(struct pmap *pmap, vaddr_t va)
{
struct vm_page *ptp, *pptp;
int i;
unsigned long index;
pd_entry_t *pva, *pva_intel;
paddr_t ppa, pa;
struct uvm_object *obj;
ptp = NULL;
pa = (paddr_t)-1;
for (i = PTP_LEVELS; i > 1; i--) {
pptp = ptp;
ppa = pa;
index = pl_i(va, i);
pva = normal_pdes[i - 2];
if (pmap_valid_entry(pva[index])) {
ppa = pva[index] & pg_frame;
ptp = NULL;
continue;
}
obj = &pmap->pm_obj[i-2];
ptp = uvm_pagealloc(obj, ptp_va2o(va, i - 1), NULL,
UVM_PGA_USERESERVE|UVM_PGA_ZERO);
if (ptp == NULL)
return NULL;
atomic_clearbits_int(&ptp->pg_flags, PG_BUSY);
ptp->wire_count = 1;
pmap->pm_ptphint[i - 2] = ptp;
pa = VM_PAGE_TO_PHYS(ptp);
pva[index] = (pd_entry_t) (pa | PG_u | PG_RW | PG_V | pg_crypt);
if (pmap->pm_pdir_intel != NULL && i == PTP_LEVELS &&
va < VM_MAXUSER_ADDRESS) {
pva_intel = pmap->pm_pdir_intel;
pva_intel[index] = pva[index];
DPRINTF("%s: copying usermode PML4e (content=0x%llx) "
"from 0x%llx -> 0x%llx\n", __func__, pva[index],
(uint64_t)&pva[index], (uint64_t)&pva_intel[index]);
}
pmap->pm_stats.resident_count++;
if (i < PTP_LEVELS) {
if (pptp == NULL)
pptp = pmap_find_ptp(pmap, va, ppa, i);
#ifdef DIAGNOSTIC
if (pptp == NULL)
panic("%s: pde page disappeared", __func__);
#endif
pptp->wire_count++;
}
}
if (ptp == NULL) {
ptp = pmap_find_ptp(pmap, va, ppa, 1);
#ifdef DIAGNOSTIC
if (ptp == NULL) {
printf("va %lx ppa %lx\n", (unsigned long)va,
(unsigned long)ppa);
panic("%s: unmanaged user PTP", __func__);
}
#endif
}
pmap->pm_ptphint[0] = ptp;
return(ptp);
}
void
pmap_pdp_ctor(pd_entry_t *pdir)
{
paddr_t pdirpa;
int npde, i;
struct pmap *kpm = pmap_kernel();
(void) pmap_extract(kpm, (vaddr_t) pdir, &pdirpa);
memset(pdir, 0, PDIR_SLOT_PTE * sizeof(pd_entry_t));
pdir[PDIR_SLOT_PTE] = pdirpa | PG_V | PG_KW | pg_nx | pg_crypt;
npde = nkptp[PTP_LEVELS - 1];
memcpy(&pdir[PDIR_SLOT_KERN], &PDP_BASE[PDIR_SLOT_KERN],
npde * sizeof(pd_entry_t));
memset(&pdir[PDIR_SLOT_KERN + npde], 0,
(NTOPLEVEL_PDES - (PDIR_SLOT_KERN + npde)) * sizeof(pd_entry_t));
for (i = 0; i < NUM_L4_SLOT_DIRECT; i++)
pdir[PDIR_SLOT_DIRECT + i] = kpm->pm_pdir[PDIR_SLOT_DIRECT + i];
#if VM_MIN_KERNEL_ADDRESS != KERNBASE
pdir[pl4_pi(KERNBASE)] = PDP_BASE[pl4_pi(KERNBASE)];
#endif
}
void
pmap_pdp_ctor_intel(pd_entry_t *pdir)
{
struct pmap *kpm = pmap_kernel();
memcpy(pdir, kpm->pm_pdir_intel, PAGE_SIZE);
}
struct pmap *
pmap_create(void)
{
struct pmap *pmap;
int i;
pmap = pool_get(&pmap_pmap_pool, PR_WAITOK);
mtx_init(&pmap->pm_mtx, IPL_VM);
for (i = 0; i < PTP_LEVELS - 1; i++) {
uvm_obj_init(&pmap->pm_obj[i], &pmap_pager, 1);
pmap->pm_ptphint[i] = NULL;
}
pmap->pm_stats.wired_count = 0;
pmap->pm_stats.resident_count = 1;
pmap->pm_type = PMAP_TYPE_NORMAL;
pmap->eptp = 0;
pmap->pm_pdir = pool_get(&pmap_pdp_pool, PR_WAITOK);
pmap_pdp_ctor(pmap->pm_pdir);
pmap->pm_pdirpa = pmap->pm_pdir[PDIR_SLOT_PTE] & pg_frame;
if (cpu_meltdown) {
pmap->pm_pdir_intel = pool_get(&pmap_pdp_pool, PR_WAITOK);
pmap_pdp_ctor_intel(pmap->pm_pdir_intel);
pmap->pm_stats.resident_count++;
if (!pmap_extract(pmap_kernel(), (vaddr_t)pmap->pm_pdir_intel,
&pmap->pm_pdirpa_intel))
panic("%s: unknown PA mapping for meltdown PML4",
__func__);
} else {
pmap->pm_pdir_intel = NULL;
pmap->pm_pdirpa_intel = 0;
}
mtx_enter(&pmaps_lock);
LIST_INSERT_HEAD(&pmaps, pmap, pm_list);
mtx_leave(&pmaps_lock);
return (pmap);
}
void
pmap_destroy(struct pmap *pmap)
{
struct vm_page *pg;
int refs;
int i;
refs = atomic_dec_int_nv(&pmap->pm_obj[0].uo_refs);
if (refs > 0) {
return;
}
mtx_enter(&pmaps_lock);
LIST_REMOVE(pmap, pm_list);
mtx_leave(&pmaps_lock);
for (i = 0; i < PTP_LEVELS - 1; i++) {
while ((pg = RBT_ROOT(uvm_objtree,
&pmap->pm_obj[i].memt)) != NULL) {
KASSERT((pg->pg_flags & PG_BUSY) == 0);
pg->wire_count = 0;
pmap->pm_stats.resident_count--;
uvm_pagefree(pg);
}
}
pool_put(&pmap_pdp_pool, pmap->pm_pdir);
if (pmap->pm_pdir_intel != NULL) {
pmap->pm_stats.resident_count--;
pool_put(&pmap_pdp_pool, pmap->pm_pdir_intel);
}
pool_put(&pmap_pmap_pool, pmap);
}
void
pmap_reference(struct pmap *pmap)
{
atomic_inc_int(&pmap->pm_obj[0].uo_refs);
}
void
pmap_activate(struct proc *p)
{
struct pcb *pcb = &p->p_addr->u_pcb;
struct pmap *pmap = p->p_vmspace->vm_map.pmap;
pcb->pcb_pmap = pmap;
pcb->pcb_cr3 = pmap->pm_pdirpa;
pcb->pcb_cr3 |= (pmap != pmap_kernel()) ? cr3_pcid_proc :
(PCID_KERN | cr3_reuse_pcid);
if (p != curproc)
return;
if ((p->p_flag & P_SYSTEM) == 0) {
struct cpu_info *self = curcpu();
self->ci_proc_pmap = pmap;
if (cpu_meltdown) {
self->ci_kern_cr3 = pcb->pcb_cr3 | cr3_reuse_pcid;
self->ci_user_cr3 = pmap->pm_pdirpa_intel |
cr3_pcid_proc_intel;
}
}
lcr3(pcb->pcb_cr3);
}
void
pmap_deactivate(struct proc *p)
{
if ((p->p_flag & P_SYSTEM) == 0) {
struct cpu_info *self = curcpu();
KASSERT(self->ci_proc_pmap == p->p_vmspace->vm_map.pmap);
self->ci_proc_pmap = NULL;
}
}
int
pmap_pdes_valid(vaddr_t va, pd_entry_t *lastpde)
{
int i;
unsigned long index;
pd_entry_t pde;
for (i = PTP_LEVELS; i > 1; i--) {
index = pl_i(va, i);
pde = normal_pdes[i - 2][index];
if (!pmap_valid_entry(pde))
return 0;
}
if (lastpde != NULL)
*lastpde = pde;
return 1;
}
int
pmap_extract(struct pmap *pmap, vaddr_t va, paddr_t *pap)
{
pt_entry_t *ptes, pte;
int level, offs;
if (pmap == pmap_kernel() && va >= PMAP_DIRECT_BASE &&
va < PMAP_DIRECT_END) {
*pap = va - PMAP_DIRECT_BASE;
return 1;
}
if (pmap != pmap_kernel())
mtx_enter(&pmap->pm_mtx);
level = pmap_find_pte_direct(pmap, va, &ptes, &offs);
pte = ptes[offs];
if (pmap != pmap_kernel())
mtx_leave(&pmap->pm_mtx);
if (__predict_true(level == 0 && pmap_valid_entry(pte))) {
if (pap != NULL)
*pap = (pte & pg_frame) | (va & PAGE_MASK);
return 1;
}
if (level == 1 && (pte & (PG_PS|PG_V)) == (PG_PS|PG_V)) {
if (pap != NULL)
*pap = (pte & pg_lgframe) | (va & PAGE_MASK_L2);
return 1;
}
return 0;
}
void
pmap_zero_page(struct vm_page *pg)
{
pagezero(pmap_map_direct(pg));
}
void
pmap_flush_cache(vaddr_t addr, vsize_t len)
{
vaddr_t i;
if (curcpu()->ci_cflushsz == 0) {
wbinvd_on_all_cpus();
return;
}
mfence();
for (i = addr; i < addr + len; i += curcpu()->ci_cflushsz)
clflush(i);
mfence();
}
void
pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg)
{
vaddr_t srcva = pmap_map_direct(srcpg);
vaddr_t dstva = pmap_map_direct(dstpg);
memcpy((void *)dstva, (void *)srcva, PAGE_SIZE);
}
void
pmap_remove_ptes(struct pmap *pmap, struct vm_page *ptp, vaddr_t ptpva,
vaddr_t startva, vaddr_t endva, int flags, struct pv_entry **free_pvs)
{
struct pv_entry *pve;
pt_entry_t *pte = (pt_entry_t *) ptpva;
struct vm_page *pg;
pt_entry_t opte;
for (; startva < endva && (ptp == NULL || ptp->wire_count > 1)
; pte++, startva += PAGE_SIZE) {
if (!pmap_valid_entry(*pte))
continue;
if ((flags & PMAP_REMOVE_SKIPWIRED) && (*pte & PG_W)) {
continue;
}
opte = pmap_pte_set(pte, 0);
if (opte & PG_W)
pmap->pm_stats.wired_count--;
pmap->pm_stats.resident_count--;
if (ptp != NULL)
ptp->wire_count--;
pg = PHYS_TO_VM_PAGE(opte & pg_frame);
if ((opte & PG_PVLIST) == 0) {
#ifdef DIAGNOSTIC
if (pg != NULL)
panic("%s: managed page without PG_PVLIST: "
"va 0x%lx, opte 0x%llx", __func__,
startva, opte);
#endif
continue;
}
#ifdef DIAGNOSTIC
if (pg == NULL)
panic("%s: unmanaged page marked PG_PVLIST: "
"va 0x%lx, opte 0x%llx", __func__,
startva, opte);
#endif
pmap_sync_flags_pte(pg, opte);
pve = pmap_remove_pv(pg, pmap, startva);
if (pve != NULL) {
pve->pv_next = *free_pvs;
*free_pvs = pve;
}
}
}
int
pmap_remove_pte(struct pmap *pmap, struct vm_page *ptp, pt_entry_t *pte,
vaddr_t va, int flags, struct pv_entry **free_pvs)
{
struct pv_entry *pve;
struct vm_page *pg;
pt_entry_t opte;
if (!pmap_valid_entry(*pte))
return 0;
if ((flags & PMAP_REMOVE_SKIPWIRED) && (*pte & PG_W)) {
return 0;
}
opte = pmap_pte_set(pte, 0);
if (opte & PG_W)
pmap->pm_stats.wired_count--;
pmap->pm_stats.resident_count--;
if (ptp != NULL)
ptp->wire_count--;
pg = PHYS_TO_VM_PAGE(opte & pg_frame);
if ((opte & PG_PVLIST) == 0) {
#ifdef DIAGNOSTIC
if (pg != NULL)
panic("%s: managed page without PG_PVLIST: "
"va 0x%lx, opte 0x%llx", __func__, va, opte);
#endif
return 1;
}
#ifdef DIAGNOSTIC
if (pg == NULL)
panic("%s: unmanaged page marked PG_PVLIST: "
"va 0x%lx, opte 0x%llx", __func__, va, opte);
#endif
pmap_sync_flags_pte(pg, opte);
pve = pmap_remove_pv(pg, pmap, va);
if (pve != NULL) {
pve->pv_next = *free_pvs;
*free_pvs = pve;
}
return 1;
}
void
pmap_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
{
#if NVMM > 0
if (pmap_is_ept(pmap))
pmap_remove_ept(pmap, sva, eva);
else
#endif
pmap_do_remove(pmap, sva, eva, PMAP_REMOVE_ALL);
}
void
pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)
{
pd_entry_t pde;
int result;
paddr_t ptppa;
vaddr_t blkendva;
struct vm_page *ptp;
struct pv_entry *pve;
struct pv_entry *free_pvs = NULL;
vaddr_t va;
int shootall = 0, shootself;
struct pg_to_free empty_ptps;
paddr_t scr3;
TAILQ_INIT(&empty_ptps);
scr3 = pmap_map_ptes(pmap);
shootself = (scr3 == 0);
if (sva + PAGE_SIZE == eva) {
if (pmap_pdes_valid(sva, &pde)) {
ptppa = pde & pg_frame;
if (pmap == pmap_kernel()) {
ptp = NULL;
} else {
ptp = pmap_find_ptp(pmap, sva, ptppa, 1);
#ifdef DIAGNOSTIC
if (ptp == NULL)
panic("%s: unmanaged PTP detected "
"in shortcut path", __func__);
#endif
}
result = pmap_remove_pte(pmap, ptp,
&PTE_BASE[pl1_i(sva)], sva, flags, &free_pvs);
if (result && ptp && ptp->wire_count <= 1)
pmap_free_ptp(pmap, ptp, sva, &empty_ptps);
pmap_tlb_shootpage(pmap, sva, shootself);
pmap_unmap_ptes(pmap, scr3);
pmap_tlb_shootwait();
} else {
pmap_unmap_ptes(pmap, scr3);
}
goto cleanup;
}
if ((eva - sva > 32 * PAGE_SIZE) && sva < VM_MIN_KERNEL_ADDRESS)
shootall = 1;
for (va = sva; va < eva; va = blkendva) {
blkendva = x86_round_pdr(va + 1);
if (blkendva > eva)
blkendva = eva;
if (pl_i(va, PTP_LEVELS) == PDIR_SLOT_PTE)
continue;
if (!pmap_pdes_valid(va, &pde))
continue;
ptppa = pde & pg_frame;
if (pmap == pmap_kernel()) {
ptp = NULL;
} else {
ptp = pmap_find_ptp(pmap, va, ptppa, 1);
#ifdef DIAGNOSTIC
if (ptp == NULL)
panic("%s: unmanaged PTP detected", __func__);
#endif
}
pmap_remove_ptes(pmap, ptp, (vaddr_t)&PTE_BASE[pl1_i(va)],
va, blkendva, flags, &free_pvs);
if (ptp && ptp->wire_count <= 1) {
pmap_free_ptp(pmap, ptp, va, &empty_ptps);
}
}
if (shootall)
pmap_tlb_shoottlb(pmap, shootself);
else
pmap_tlb_shootrange(pmap, sva, eva, shootself);
pmap_unmap_ptes(pmap, scr3);
pmap_tlb_shootwait();
cleanup:
while ((pve = free_pvs) != NULL) {
free_pvs = pve->pv_next;
pool_put(&pmap_pv_pool, pve);
}
while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
TAILQ_REMOVE(&empty_ptps, ptp, pageq);
uvm_pagefree(ptp);
}
}
void
pmap_page_remove(struct vm_page *pg)
{
struct pv_entry *pve;
struct pmap *pm;
pt_entry_t opte;
#ifdef DIAGNOSTIC
pd_entry_t pde;
#endif
struct pg_to_free empty_ptps;
struct vm_page *ptp;
paddr_t scr3;
int shootself;
TAILQ_INIT(&empty_ptps);
mtx_enter(&pg->mdpage.pv_mtx);
while ((pve = pg->mdpage.pv_list) != NULL) {
pmap_reference(pve->pv_pmap);
pm = pve->pv_pmap;
mtx_leave(&pg->mdpage.pv_mtx);
scr3 = pmap_map_ptes(pm);
shootself = (scr3 == 0);
mtx_enter(&pg->mdpage.pv_mtx);
if ((pve = pg->mdpage.pv_list) == NULL ||
pve->pv_pmap != pm) {
mtx_leave(&pg->mdpage.pv_mtx);
pmap_unmap_ptes(pm, scr3);
pmap_destroy(pm);
mtx_enter(&pg->mdpage.pv_mtx);
continue;
}
pg->mdpage.pv_list = pve->pv_next;
mtx_leave(&pg->mdpage.pv_mtx);
#ifdef DIAGNOSTIC
if (pve->pv_ptp != NULL && pmap_pdes_valid(pve->pv_va, &pde) &&
(pde & pg_frame) != VM_PAGE_TO_PHYS(pve->pv_ptp)) {
printf("%s: pg=%p: va=%lx, pv_ptp=%p\n", __func__,
pg, pve->pv_va, pve->pv_ptp);
printf("%s: PTP's phys addr: "
"actual=%lx, recorded=%lx\n", __func__,
(unsigned long)(pde & pg_frame),
VM_PAGE_TO_PHYS(pve->pv_ptp));
panic("%s: mapped managed page has "
"invalid pv_ptp field", __func__);
}
#endif
opte = pmap_pte_set(&PTE_BASE[pl1_i(pve->pv_va)], 0);
if (opte & PG_W)
pve->pv_pmap->pm_stats.wired_count--;
pve->pv_pmap->pm_stats.resident_count--;
pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va, shootself);
pmap_sync_flags_pte(pg, opte);
if (pve->pv_ptp != NULL) {
pve->pv_ptp->wire_count--;
if (pve->pv_ptp->wire_count <= 1) {
pmap_free_ptp(pve->pv_pmap, pve->pv_ptp,
pve->pv_va, &empty_ptps);
}
}
pmap_unmap_ptes(pve->pv_pmap, scr3);
pmap_destroy(pve->pv_pmap);
pool_put(&pmap_pv_pool, pve);
mtx_enter(&pg->mdpage.pv_mtx);
}
mtx_leave(&pg->mdpage.pv_mtx);
pmap_tlb_shootwait();
while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
TAILQ_REMOVE(&empty_ptps, ptp, pageq);
uvm_pagefree(ptp);
}
}
int
pmap_test_attrs(struct vm_page *pg, unsigned int testbits)
{
struct pv_entry *pve;
pt_entry_t *ptes;
int level, offs;
u_long mybits, testflags;
testflags = pmap_pte2flags(testbits);
if (pg->pg_flags & testflags)
return 1;
mybits = 0;
mtx_enter(&pg->mdpage.pv_mtx);
for (pve = pg->mdpage.pv_list; pve != NULL && mybits == 0;
pve = pve->pv_next) {
level = pmap_find_pte_direct(pve->pv_pmap, pve->pv_va, &ptes,
&offs);
mybits |= (ptes[offs] & testbits);
}
mtx_leave(&pg->mdpage.pv_mtx);
if (mybits == 0)
return 0;
atomic_setbits_int(&pg->pg_flags, pmap_pte2flags(mybits));
return 1;
}
int
pmap_clear_attrs(struct vm_page *pg, unsigned long clearbits)
{
struct pv_entry *pve;
pt_entry_t *ptes, opte;
u_long clearflags;
int result, level, offs;
clearflags = pmap_pte2flags(clearbits);
result = pg->pg_flags & clearflags;
if (result)
atomic_clearbits_int(&pg->pg_flags, clearflags);
mtx_enter(&pg->mdpage.pv_mtx);
for (pve = pg->mdpage.pv_list; pve != NULL; pve = pve->pv_next) {
level = pmap_find_pte_direct(pve->pv_pmap, pve->pv_va, &ptes,
&offs);
opte = ptes[offs];
if (opte & clearbits) {
result = 1;
pmap_pte_clearbits(&ptes[offs], (opte & clearbits));
pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va,
pmap_is_curpmap(pve->pv_pmap));
}
}
mtx_leave(&pg->mdpage.pv_mtx);
pmap_tlb_shootwait();
return (result != 0);
}
void
pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
{
pt_entry_t *spte, *epte;
pt_entry_t clear = 0, set = 0;
vaddr_t blkendva;
int shootall = 0, shootself;
vaddr_t va;
paddr_t scr3;
scr3 = pmap_map_ptes(pmap);
shootself = (scr3 == 0);
if (!(prot & PROT_READ))
set |= pg_xo;
if (!(prot & PROT_WRITE))
clear = PG_RW;
if (!(prot & PROT_EXEC))
set |= pg_nx;
if ((eva - sva > 32 * PAGE_SIZE) && sva < VM_MIN_KERNEL_ADDRESS)
shootall = 1;
for (va = sva; va < eva; va = blkendva) {
blkendva = x86_round_pdr(va + 1);
if (blkendva > eva)
blkendva = eva;
if (pl_i(va, PTP_LEVELS) == PDIR_SLOT_PTE)
continue;
if (!pmap_pdes_valid(va, NULL))
continue;
#ifdef DIAGNOSTIC
if (va >= VM_MAXUSER_ADDRESS && va < VM_MAX_ADDRESS)
panic("%s: PTE space", __func__);
#endif
spte = &PTE_BASE[pl1_i(va)];
epte = &PTE_BASE[pl1_i(blkendva)];
for (; spte < epte ; spte++) {
if (!pmap_valid_entry(*spte))
continue;
pmap_pte_clearbits(spte, clear);
pmap_pte_setbits(spte, set);
}
}
if (shootall)
pmap_tlb_shoottlb(pmap, shootself);
else
pmap_tlb_shootrange(pmap, sva, eva, shootself);
pmap_unmap_ptes(pmap, scr3);
pmap_tlb_shootwait();
}
void
pmap_unwire(struct pmap *pmap, vaddr_t va)
{
pt_entry_t *ptes;
int level, offs;
level = pmap_find_pte_direct(pmap, va, &ptes, &offs);
if (level == 0) {
#ifdef DIAGNOSTIC
if (!pmap_valid_entry(ptes[offs]))
panic("%s: invalid (unmapped) va 0x%lx", __func__, va);
#endif
if (__predict_true((ptes[offs] & PG_W) != 0)) {
pmap_pte_clearbits(&ptes[offs], PG_W);
pmap->pm_stats.wired_count--;
}
#ifdef DIAGNOSTIC
else {
printf("%s: wiring for pmap %p va 0x%lx "
"didn't change!\n", __func__, pmap, va);
}
#endif
}
#ifdef DIAGNOSTIC
else {
panic("%s: invalid PDE", __func__);
}
#endif
}
void
pmap_enter_special(vaddr_t va, paddr_t pa, vm_prot_t prot)
{
uint64_t l4idx, l3idx, l2idx, l1idx;
pd_entry_t *pd, *ptp;
paddr_t npa;
struct pmap *pmap = pmap_kernel();
pt_entry_t *ptes;
int level, offs;
if (!cpu_meltdown)
return;
if (va < VM_MIN_KERNEL_ADDRESS)
panic("%s: invalid special mapping va 0x%lx requested",
__func__, va);
if (pmap->pm_pdir_intel == NULL)
pmap->pm_pdir_intel = pool_get(&pmap_pdp_pool,
PR_WAITOK | PR_ZERO);
l4idx = (va & L4_MASK) >> L4_SHIFT;
l3idx = (va & L3_MASK) >> L3_SHIFT;
l2idx = (va & L2_MASK) >> L2_SHIFT;
l1idx = (va & L1_MASK) >> L1_SHIFT;
DPRINTF("%s: va=0x%llx pa=0x%llx l4idx=%lld l3idx=%lld "
"l2idx=%lld l1idx=%lld\n", __func__, (uint64_t)va,
(uint64_t)pa, l4idx, l3idx, l2idx, l1idx);
pd = pmap->pm_pdir_intel;
if (pd == NULL)
panic("%s: PML4 not initialized for pmap @ %p", __func__,
pmap);
npa = pd[l4idx] & PMAP_PA_MASK;
if (!npa) {
ptp = pool_get(&pmap_pdp_pool, PR_WAITOK | PR_ZERO);
if (!pmap_extract(pmap, (vaddr_t)ptp, &npa))
panic("%s: can't locate PDPT page", __func__);
pd[l4idx] = (npa | PG_RW | PG_V | pg_crypt);
DPRINTF("%s: allocated new PDPT page at phys 0x%llx, "
"setting PML4e[%lld] = 0x%llx\n", __func__,
(uint64_t)npa, l4idx, pd[l4idx]);
}
pd = (pd_entry_t *)PMAP_DIRECT_MAP(npa);
if (pd == NULL)
panic("%s: can't locate PDPT @ pa=0x%llx", __func__,
(uint64_t)npa);
npa = pd[l3idx] & PMAP_PA_MASK;
if (!npa) {
ptp = pool_get(&pmap_pdp_pool, PR_WAITOK | PR_ZERO);
if (!pmap_extract(pmap, (vaddr_t)ptp, &npa))
panic("%s: can't locate PD page", __func__);
pd[l3idx] = (npa | PG_RW | PG_V | pg_crypt);
DPRINTF("%s: allocated new PD page at phys 0x%llx, "
"setting PDPTe[%lld] = 0x%llx\n", __func__,
(uint64_t)npa, l3idx, pd[l3idx]);
}
pd = (pd_entry_t *)PMAP_DIRECT_MAP(npa);
if (pd == NULL)
panic("%s: can't locate PD page @ pa=0x%llx", __func__,
(uint64_t)npa);
npa = pd[l2idx] & PMAP_PA_MASK;
if (!npa) {
ptp = pool_get(&pmap_pdp_pool, PR_WAITOK | PR_ZERO);
if (!pmap_extract(pmap, (vaddr_t)ptp, &npa))
panic("%s: can't locate PT page", __func__);
pd[l2idx] = (npa | PG_RW | PG_V | pg_crypt);
DPRINTF("%s: allocated new PT page at phys 0x%llx, "
"setting PDE[%lld] = 0x%llx\n", __func__,
(uint64_t)npa, l2idx, pd[l2idx]);
}
pd = (pd_entry_t *)PMAP_DIRECT_MAP(npa);
if (pd == NULL)
panic("%s: can't locate PT page @ pa=0x%llx", __func__,
(uint64_t)npa);
DPRINTF("%s: setting PTE, PT page @ phys 0x%llx virt 0x%llx prot "
"0x%llx was 0x%llx\n", __func__, (uint64_t)npa, (uint64_t)pd,
(uint64_t)prot, (uint64_t)pd[l1idx]);
pd[l1idx] = pa | protection_codes[prot] | PG_V | PG_W | pg_crypt;
level = pmap_find_pte_direct(pmap, va, &ptes, &offs);
if (__predict_true(level == 0 && pmap_valid_entry(ptes[offs]))) {
if (((pd[l1idx] ^ ptes[offs]) & pg_frame) == 0) {
pd[l1idx] |= PG_G | (ptes[offs] & (PG_N | PG_WT));
ptes[offs] |= PG_G;
} else {
DPRINTF("%s: special diffing mapping at %llx\n",
__func__, (long long)va);
}
} else
DPRINTF("%s: no U+K mapping for special mapping?\n", __func__);
DPRINTF("%s: setting PTE[%lld] = 0x%llx\n", __func__, l1idx, pd[l1idx]);
}
#if NVMM > 0
void
pmap_convert(struct pmap *pmap, int mode)
{
pt_entry_t *pte;
mtx_enter(&pmap->pm_mtx);
pmap->pm_type = mode;
if (pmap_is_ept(pmap)) {
pte = (pt_entry_t *)pmap->pm_pdir;
memset(pte, 0, PAGE_SIZE);
if (pmap->pm_pdir_intel != NULL) {
pool_put(&pmap_pdp_pool, pmap->pm_pdir_intel);
pmap->pm_pdir_intel = NULL;
}
}
mtx_leave(&pmap->pm_mtx);
}
void
pmap_remove_ept(struct pmap *pmap, vaddr_t sgpa, vaddr_t egpa)
{
vaddr_t v;
mtx_enter(&pmap->pm_mtx);
DPRINTF("%s: sgpa=0x%llx egpa=0x%llx\n", __func__, (uint64_t)sgpa,
(uint64_t)egpa);
for (v = sgpa; v < egpa + PAGE_SIZE; v += PAGE_SIZE)
pmap_do_remove_ept(pmap, v);
pmap_shootept(pmap, 1);
mtx_leave(&pmap->pm_mtx);
pmap_tlb_shootwait();
}
void
pmap_do_remove_ept(struct pmap *pmap, paddr_t gpa)
{
uint64_t l4idx, l3idx, l2idx, l1idx;
struct vm_page *pg3, *pg2, *pg1;
paddr_t npa3, npa2, npa1;
pd_entry_t *pd4, *pd3, *pd2, *pd1;
pd_entry_t *pptes;
MUTEX_ASSERT_LOCKED(&pmap->pm_mtx);
l4idx = (gpa & L4_MASK) >> L4_SHIFT;
l3idx = (gpa & L3_MASK) >> L3_SHIFT;
l2idx = (gpa & L2_MASK) >> L2_SHIFT;
l1idx = (gpa & L1_MASK) >> L1_SHIFT;
pd4 = (pd_entry_t *)pmap->pm_pdir;
if (pd4 == NULL)
return;
npa3 = pd4[l4idx] & PMAP_PA_MASK;
if (!npa3)
return;
pd3 = (pd_entry_t *)PMAP_DIRECT_MAP(npa3);
pg3 = PHYS_TO_VM_PAGE(npa3);
npa2 = pd3[l3idx] & PMAP_PA_MASK;
if (!npa2)
return;
pd2 = (pd_entry_t *)PMAP_DIRECT_MAP(npa2);
pg2 = PHYS_TO_VM_PAGE(npa2);
npa1 = pd2[l2idx] & PMAP_PA_MASK;
if (!npa1)
return;
pd1 = (pd_entry_t *)PMAP_DIRECT_MAP(npa1);
pg1 = PHYS_TO_VM_PAGE(npa1);
if (pd1[l1idx] == 0)
return;
pd1[l1idx] = 0;
pg1->wire_count--;
pmap->pm_stats.resident_count--;
if (pg1->wire_count > 1)
return;
pg1->wire_count = 0;
pptes = (pd_entry_t *)PMAP_DIRECT_MAP(npa2);
pptes[l2idx] = 0;
uvm_pagefree(pg1);
pmap->pm_stats.resident_count--;
pg2->wire_count--;
if (pg2->wire_count > 1)
return;
pg2->wire_count = 0;
pptes = (pd_entry_t *)PMAP_DIRECT_MAP(npa3);
pptes[l3idx] = 0;
uvm_pagefree(pg2);
pmap->pm_stats.resident_count--;
pg3->wire_count--;
if (pg3->wire_count > 1)
return;
pg3->wire_count = 0;
pptes = pd4;
pptes[l4idx] = 0;
uvm_pagefree(pg3);
pmap->pm_stats.resident_count--;
}
int
pmap_enter_ept(struct pmap *pmap, paddr_t gpa, paddr_t hpa, vm_prot_t prot)
{
uint64_t l4idx, l3idx, l2idx, l1idx;
pd_entry_t *pd, npte;
struct vm_page *ptp, *pptp;
paddr_t npa;
struct uvm_object *obj;
int ret = 0;
if (gpa > MAXDSIZ)
return ENOMEM;
l4idx = (gpa & L4_MASK) >> L4_SHIFT;
l3idx = (gpa & L3_MASK) >> L3_SHIFT;
l2idx = (gpa & L2_MASK) >> L2_SHIFT;
l1idx = (gpa & L1_MASK) >> L1_SHIFT;
mtx_enter(&pmap->pm_mtx);
pd = (pd_entry_t *)pmap->pm_pdir;
if (pd == NULL) {
ret = ENOMEM;
goto unlock;
}
npa = pd[l4idx] & PMAP_PA_MASK;
if (!npa) {
obj = &pmap->pm_obj[2];
ptp = uvm_pagealloc(obj, ptp_va2o(gpa, 3), NULL,
UVM_PGA_USERESERVE|UVM_PGA_ZERO);
if (ptp == NULL) {
ret = ENOMEM;
goto unlock;
}
atomic_clearbits_int(&ptp->pg_flags, PG_BUSY);
ptp->wire_count = 1;
npa = VM_PAGE_TO_PHYS(ptp);
pd[l4idx] = (npa | EPT_R | EPT_W | EPT_X);
pmap->pm_stats.resident_count++;
pptp = ptp;
} else {
pptp = PHYS_TO_VM_PAGE(npa);
}
pd = (pd_entry_t *)PMAP_DIRECT_MAP(npa);
if (pd == NULL)
panic("%s: can't locate PDPT @ pa=0x%llx", __func__,
(uint64_t)npa);
npa = pd[l3idx] & PMAP_PA_MASK;
if (!npa) {
obj = &pmap->pm_obj[1];
ptp = uvm_pagealloc(obj, ptp_va2o(gpa, 2), NULL,
UVM_PGA_USERESERVE|UVM_PGA_ZERO);
if (ptp == NULL) {
ret = ENOMEM;
goto unlock;
}
atomic_clearbits_int(&ptp->pg_flags, PG_BUSY);
ptp->wire_count = 1;
pptp->wire_count++;
npa = VM_PAGE_TO_PHYS(ptp);
pd[l3idx] = (npa | EPT_R | EPT_W | EPT_X);
pmap->pm_stats.resident_count++;
pptp = ptp;
} else {
pptp = PHYS_TO_VM_PAGE(npa);
}
pd = (pd_entry_t *)PMAP_DIRECT_MAP(npa);
if (pd == NULL)
panic("%s: can't locate PD page @ pa=0x%llx", __func__,
(uint64_t)npa);
npa = pd[l2idx] & PMAP_PA_MASK;
if (!npa) {
obj = &pmap->pm_obj[0];
ptp = uvm_pagealloc(obj, ptp_va2o(gpa, 1), NULL,
UVM_PGA_USERESERVE|UVM_PGA_ZERO);
if (ptp == NULL) {
ret = ENOMEM;
goto unlock;
}
atomic_clearbits_int(&ptp->pg_flags, PG_BUSY);
ptp->wire_count = 1;
pptp->wire_count++;
npa = VM_PAGE_TO_PHYS(ptp);
pd[l2idx] = (npa | EPT_R | EPT_W | EPT_X);
pmap->pm_stats.resident_count++;
} else {
ptp = PHYS_TO_VM_PAGE(npa);
if (ptp == NULL)
panic("%s: ptp page vanished?", __func__);
}
pd = (pd_entry_t *)PMAP_DIRECT_MAP(npa);
if (pd == NULL)
panic("%s: can't locate PT page @ pa=0x%llx", __func__,
(uint64_t)npa);
npte = hpa | EPT_WB;
if (prot & PROT_READ)
npte |= EPT_R;
if (prot & PROT_WRITE)
npte |= EPT_W;
if (prot & PROT_EXEC)
npte |= EPT_X;
if (pd[l1idx] == 0) {
ptp->wire_count++;
pmap->pm_stats.resident_count++;
} else {
}
pd[l1idx] = npte;
unlock:
mtx_leave(&pmap->pm_mtx);
return ret;
}
#endif
int
pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags)
{
pt_entry_t opte, npte;
struct vm_page *ptp, *pg = NULL;
struct pv_entry *pve, *opve = NULL;
int ptpdelta, wireddelta, resdelta;
int wired = (flags & PMAP_WIRED) != 0;
int crypt = (flags & PMAP_NOCRYPT) == 0;
int nocache = (pa & PMAP_NOCACHE) != 0;
int wc = (pa & PMAP_WC) != 0;
int error, shootself;
paddr_t scr3;
#if NVMM > 0
if (pmap_is_ept(pmap))
return pmap_enter_ept(pmap, va, pa, prot);
#endif
KASSERT(!(wc && nocache));
pa &= PMAP_PA_MASK;
#ifdef DIAGNOSTIC
if (va == (vaddr_t) PDP_BASE)
panic("%s: trying to map over PDP!", __func__);
if (va >= VM_MIN_KERNEL_ADDRESS &&
!pmap_valid_entry(pmap->pm_pdir[pl_i(va, PTP_LEVELS)]))
panic("%s: missing kernel PTP for va %lx!", __func__, va);
#endif
pve = pool_get(&pmap_pv_pool, PR_NOWAIT);
if (pve == NULL) {
if (flags & PMAP_CANFAIL) {
error = ENOMEM;
goto out;
}
panic("%s: no pv entries available", __func__);
}
scr3 = pmap_map_ptes(pmap);
shootself = (scr3 == 0);
if (pmap == pmap_kernel()) {
ptp = NULL;
} else {
ptp = pmap_get_ptp(pmap, va);
if (ptp == NULL) {
if (flags & PMAP_CANFAIL) {
pmap_unmap_ptes(pmap, scr3);
error = ENOMEM;
goto out;
}
panic("%s: get ptp failed", __func__);
}
}
opte = PTE_BASE[pl1_i(va)];
if (pmap_valid_entry(opte)) {
resdelta = 0;
if (wired && (opte & PG_W) == 0)
wireddelta = 1;
else if (!wired && (opte & PG_W) != 0)
wireddelta = -1;
else
wireddelta = 0;
ptpdelta = 0;
if ((opte & pg_frame) == pa) {
if (opte & PG_PVLIST) {
pg = PHYS_TO_VM_PAGE(pa);
#ifdef DIAGNOSTIC
if (pg == NULL)
panic("%s: same pa, PG_PVLIST "
"mapping with unmanaged page: "
"va 0x%lx, opte 0x%llx, pa 0x%lx",
__func__, va, opte, pa);
#endif
pmap_sync_flags_pte(pg, opte);
} else {
#ifdef DIAGNOSTIC
if (PHYS_TO_VM_PAGE(pa) != NULL)
panic("%s: same pa, no PG_PVLIST "
"mapping with managed page: "
"va 0x%lx, opte 0x%llx, pa 0x%lx",
__func__, va, opte, pa);
#endif
}
goto enter_now;
}
if (opte & PG_PVLIST) {
pg = PHYS_TO_VM_PAGE(opte & pg_frame);
#ifdef DIAGNOSTIC
if (pg == NULL)
panic("%s: PG_PVLIST mapping with unmanaged "
"page: va 0x%lx, opte 0x%llx, pa 0x%lx",
__func__, va, opte, pa);
#endif
pmap_sync_flags_pte(pg, opte);
opve = pmap_remove_pv(pg, pmap, va);
pg = NULL;
}
} else {
resdelta = 1;
if (wired)
wireddelta = 1;
else
wireddelta = 0;
if (ptp != NULL)
ptpdelta = 1;
else
ptpdelta = 0;
}
if (pmap_initialized)
pg = PHYS_TO_VM_PAGE(pa);
if (pg != NULL) {
pmap_enter_pv(pg, pve, pmap, va, ptp);
pve = NULL;
}
enter_now:
pmap->pm_stats.resident_count += resdelta;
pmap->pm_stats.wired_count += wireddelta;
if (ptp != NULL)
ptp->wire_count += ptpdelta;
KASSERT(pg == PHYS_TO_VM_PAGE(pa));
npte = pa | protection_codes[prot] | PG_V;
if (pg != NULL) {
npte |= PG_PVLIST;
if (pg->pg_flags & PG_PMAP_WC) {
KASSERT(nocache == 0);
wc = 1;
}
}
if (wc)
npte |= pmap_pg_wc;
if (wired)
npte |= PG_W;
if (nocache)
npte |= PG_N;
if (va < VM_MAXUSER_ADDRESS)
npte |= ((flags & PMAP_EFI) ? 0 : PG_u);
else if (va < VM_MAX_ADDRESS)
npte |= (PG_u | PG_RW);
if (pmap == pmap_kernel())
npte |= pg_g_kern;
if (crypt)
npte |= pg_crypt;
if (! pmap_valid_entry(opte)) {
PTE_BASE[pl1_i(va)] = npte;
} else if ((opte | (npte ^ PG_RW)) & PG_RW) {
PTE_BASE[pl1_i(va)] = npte;
if (nocache && (opte & PG_N) == 0)
wbinvd_on_all_cpus();
pmap_tlb_shootpage(pmap, va, shootself);
} else {
PTE_BASE[pl1_i(va)] = npte ^ PG_RW;
if (nocache && (opte & PG_N) == 0)
wbinvd_on_all_cpus();
pmap_tlb_shootpage(pmap, va, shootself);
pmap_tlb_shootwait();
PTE_BASE[pl1_i(va)] = npte;
}
pmap_unmap_ptes(pmap, scr3);
pmap_tlb_shootwait();
error = 0;
out:
if (pve != NULL)
pool_put(&pmap_pv_pool, pve);
if (opve != NULL)
pool_put(&pmap_pv_pool, opve);
return error;
}
int
pmap_get_physpage(vaddr_t va, int level, paddr_t *paddrp)
{
struct vm_page *ptp;
struct pmap *kpm = pmap_kernel();
if (uvm.page_init_done == 0) {
vaddr_t va;
va = pmap_steal_memory(PAGE_SIZE, NULL, NULL);
*paddrp = PMAP_DIRECT_UNMAP(va);
} else {
ptp = uvm_pagealloc(&kpm->pm_obj[level - 1],
ptp_va2o(va, level), NULL,
UVM_PGA_USERESERVE|UVM_PGA_ZERO);
if (ptp == NULL)
panic("%s: out of memory", __func__);
atomic_clearbits_int(&ptp->pg_flags, PG_BUSY);
ptp->wire_count = 1;
*paddrp = VM_PAGE_TO_PHYS(ptp);
}
kpm->pm_stats.resident_count++;
return 1;
}
void
pmap_alloc_level(vaddr_t kva, int lvl, long *needed_ptps)
{
unsigned long i;
vaddr_t va;
paddr_t pa;
unsigned long index, endindex;
int level;
pd_entry_t *pdep;
for (level = lvl; level > 1; level--) {
if (level == PTP_LEVELS)
pdep = pmap_kernel()->pm_pdir;
else
pdep = normal_pdes[level - 2];
va = kva;
index = pl_i(kva, level);
endindex = index + needed_ptps[level - 1];
if (nkptp[level - 1] != 0)
index++;
else
endindex--;
for (i = index; i <= endindex; i++) {
pmap_get_physpage(va, level - 1, &pa);
pdep[i] = pa | PG_RW | PG_V | pg_nx | pg_crypt;
nkptp[level - 1]++;
va += nbpd[level - 1];
}
}
}
static vaddr_t pmap_maxkvaddr = VM_MIN_KERNEL_ADDRESS;
vaddr_t
pmap_growkernel(vaddr_t maxkvaddr)
{
struct pmap *kpm = pmap_kernel(), *pm;
int s, i;
unsigned newpdes;
long needed_kptp[PTP_LEVELS], target_nptp, old;
if (maxkvaddr <= pmap_maxkvaddr)
return pmap_maxkvaddr;
maxkvaddr = x86_round_pdr(maxkvaddr);
old = nkptp[PTP_LEVELS - 1];
for (i = PTP_LEVELS - 1; i >= 1; i--) {
target_nptp = pl_i(maxkvaddr, i + 1) -
pl_i(VM_MIN_KERNEL_ADDRESS, i + 1);
if (target_nptp > nkptpmax[i])
panic("%s: out of KVA space", __func__);
needed_kptp[i] = target_nptp - nkptp[i] + 1;
}
s = splhigh();
pmap_alloc_level(pmap_maxkvaddr, PTP_LEVELS, needed_kptp);
if (needed_kptp[PTP_LEVELS - 1] != 0) {
newpdes = nkptp[PTP_LEVELS - 1] - old;
mtx_enter(&pmaps_lock);
LIST_FOREACH(pm, &pmaps, pm_list) {
memcpy(&pm->pm_pdir[PDIR_SLOT_KERN + old],
&kpm->pm_pdir[PDIR_SLOT_KERN + old],
newpdes * sizeof (pd_entry_t));
}
mtx_leave(&pmaps_lock);
}
pmap_maxkvaddr = maxkvaddr;
splx(s);
return maxkvaddr;
}
vaddr_t
pmap_steal_memory(vsize_t size, vaddr_t *start, vaddr_t *end)
{
int segno;
u_int npg;
vaddr_t va;
paddr_t pa;
struct vm_physseg *seg;
size = round_page(size);
npg = atop(size);
for (segno = 0, seg = vm_physmem; segno < vm_nphysseg; segno++, seg++) {
if (seg->avail_end - seg->avail_start < npg)
continue;
if (seg->avail_start == seg->start ||
seg->avail_end == seg->end)
break;
}
if (segno == vm_nphysseg) {
panic("%s: out of memory", __func__);
} else {
if (seg->avail_start == seg->start) {
pa = ptoa(seg->avail_start);
seg->avail_start += npg;
seg->start += npg;
} else {
pa = ptoa(seg->avail_end) - size;
seg->avail_end -= npg;
seg->end -= npg;
}
if (seg->start == seg->end) {
if (vm_nphysseg-- == 1)
panic("%s: out of memory", __func__);
while (segno < vm_nphysseg) {
seg[0] = seg[1];
seg++;
segno++;
}
}
va = PMAP_DIRECT_MAP(pa);
memset((void *)va, 0, size);
}
if (start != NULL)
*start = virtual_avail;
if (end != NULL)
*end = VM_MAX_KERNEL_ADDRESS;
return (va);
}
#ifdef MULTIPROCESSOR
#ifdef MP_LOCKDEBUG
#include <ddb/db_output.h>
#endif
struct {
volatile int lock __attribute__((aligned(64)));
} tlb_shoot_lock __attribute__((section(".kudata")));
struct {
volatile int cpu __attribute__((aligned(64)));
} tlb_shoot_cpu __attribute__((section(".kudata")));
volatile u_int tlb_shoot_counts[MAXCPUS] __attribute__((section(".kudata")));
volatile vaddr_t tlb_shoot_addr1 __attribute__((section(".kudata")));
volatile vaddr_t tlb_shoot_addr2 __attribute__((section(".kudata")));
volatile int tlb_shoot_first_pcid __attribute__((section(".kudata")));
#if NVMM > 0
volatile uint64_t ept_shoot_mode __attribute__((section(".kudata")));
volatile struct vmx_invept_descriptor ept_shoot_vid
__attribute__((section(".kudata")));
#endif
static inline void
pmap_start_tlb_shoot(u_int targets, const char *func)
{
u_int cpuid = curcpu()->ci_cpuid;
while (atomic_cas_uint(&tlb_shoot_lock.lock, 0, 1) != 0) {
#ifdef MP_LOCKDEBUG
long nticks = __mp_lock_spinout;
#endif
while (tlb_shoot_lock.lock != 0) {
CPU_BUSY_CYCLE();
#ifdef MP_LOCKDEBUG
if (--nticks <= 0) {
db_printf("%s: spun out", func);
db_enter();
nticks = __mp_lock_spinout;
}
#endif
}
}
tlb_shoot_cpu.cpu = cpuid;
atomic_swap_uint(&tlb_shoot_counts[cpuid], targets);
}
void
pmap_tlb_shootwait(void)
{
u_int cpuid = curcpu()->ci_cpuid;
#ifdef MP_LOCKDEBUG
long nticks = __mp_lock_spinout;
#endif
while (tlb_shoot_counts[cpuid] > 0) {
CPU_BUSY_CYCLE();
#ifdef MP_LOCKDEBUG
if (--nticks <= 0) {
db_printf("%s: spun out", __func__);
db_enter();
nticks = __mp_lock_spinout;
}
#endif
}
}
static inline void
pmap_tlb_shootfail()
{
u_int cpuid = curcpu()->ci_cpuid;
if (atomic_dec_int_nv(&tlb_shoot_counts[cpuid]) == 0)
tlb_shoot_lock.lock = 0;
}
#endif
void
pmap_tlb_shootpage(struct pmap *pm, vaddr_t va, int shootself)
{
int is_kva = va >= VM_MIN_KERNEL_ADDRESS;
#ifdef MULTIPROCESSOR
struct cpu_info *ci, *self = curcpu();
CPU_INFO_ITERATOR cii;
int targets = 0;
u_int8_t mask[howmany(MAXCPUS, 8)] = { 0 };
CPU_INFO_FOREACH(cii, ci) {
if (ci == self || !(ci->ci_flags & CPUF_RUNNING))
continue;
if (!is_kva && !pmap_is_active(pm, ci))
continue;
setbit(mask, ci->ci_cpuid);
targets++;
}
if (targets) {
int s = splvm();
pmap_start_tlb_shoot(targets, __func__);
tlb_shoot_first_pcid = is_kva ? PCID_KERN : PCID_PROC;
tlb_shoot_addr1 = va;
CPU_INFO_FOREACH(cii, ci) {
if (isclr(mask, ci->ci_cpuid))
continue;
if (x86_fast_ipi(ci, LAPIC_IPI_INVLPG) != 0)
pmap_tlb_shootfail();
}
splx(s);
}
#endif
if (!pmap_use_pcid) {
if (shootself)
pmap_update_pg(va);
} else if (is_kva) {
invpcid(INVPCID_ADDR, PCID_PROC, va);
invpcid(INVPCID_ADDR, PCID_KERN, va);
invpcid(INVPCID_ADDR, PCID_TEMP, va);
} else if (shootself) {
invpcid(INVPCID_ADDR, PCID_PROC, va);
if (cpu_meltdown)
invpcid(INVPCID_ADDR, PCID_PROC_INTEL, va);
}
}
void
pmap_tlb_shootrange(struct pmap *pm, vaddr_t sva, vaddr_t eva, int shootself)
{
int is_kva = sva >= VM_MIN_KERNEL_ADDRESS;
vaddr_t va;
#ifdef MULTIPROCESSOR
struct cpu_info *ci, *self = curcpu();
CPU_INFO_ITERATOR cii;
int targets = 0;
u_int8_t mask[howmany(MAXCPUS, 8)] = { 0 };
CPU_INFO_FOREACH(cii, ci) {
if (ci == self || !(ci->ci_flags & CPUF_RUNNING))
continue;
if (!is_kva && !pmap_is_active(pm, ci))
continue;
setbit(mask, ci->ci_cpuid);
targets++;
}
if (targets) {
int s = splvm();
pmap_start_tlb_shoot(targets, __func__);
tlb_shoot_first_pcid = is_kva ? PCID_KERN : PCID_PROC;
tlb_shoot_addr1 = sva;
tlb_shoot_addr2 = eva;
CPU_INFO_FOREACH(cii, ci) {
if (isclr(mask, ci->ci_cpuid))
continue;
if (x86_fast_ipi(ci, LAPIC_IPI_INVLRANGE) != 0)
pmap_tlb_shootfail();
}
splx(s);
}
#endif
if (!pmap_use_pcid) {
if (shootself) {
for (va = sva; va < eva; va += PAGE_SIZE)
pmap_update_pg(va);
}
} else if (is_kva) {
for (va = sva; va < eva; va += PAGE_SIZE) {
invpcid(INVPCID_ADDR, PCID_PROC, va);
invpcid(INVPCID_ADDR, PCID_KERN, va);
invpcid(INVPCID_ADDR, PCID_TEMP, va);
}
} else if (shootself) {
if (cpu_meltdown) {
for (va = sva; va < eva; va += PAGE_SIZE) {
invpcid(INVPCID_ADDR, PCID_PROC, va);
invpcid(INVPCID_ADDR, PCID_PROC_INTEL, va);
}
} else {
for (va = sva; va < eva; va += PAGE_SIZE)
invpcid(INVPCID_ADDR, PCID_PROC, va);
}
}
}
void
pmap_tlb_shoottlb(struct pmap *pm, int shootself)
{
#ifdef MULTIPROCESSOR
struct cpu_info *ci, *self = curcpu();
CPU_INFO_ITERATOR cii;
int targets = 0;
u_int8_t mask[howmany(MAXCPUS, 8)] = { 0 };
KASSERT(pm != pmap_kernel());
CPU_INFO_FOREACH(cii, ci) {
if (ci == self || !pmap_is_active(pm, ci) ||
!(ci->ci_flags & CPUF_RUNNING))
continue;
setbit(mask, ci->ci_cpuid);
targets++;
}
if (targets) {
int s = splvm();
pmap_start_tlb_shoot(targets, __func__);
CPU_INFO_FOREACH(cii, ci) {
if (isclr(mask, ci->ci_cpuid))
continue;
if (x86_fast_ipi(ci, LAPIC_IPI_INVLTLB) != 0)
pmap_tlb_shootfail();
}
splx(s);
}
#endif
if (shootself) {
if (!pmap_use_pcid)
tlbflush();
else {
invpcid(INVPCID_PCID, PCID_PROC, 0);
if (cpu_meltdown)
invpcid(INVPCID_PCID, PCID_PROC_INTEL, 0);
}
}
}
#if NVMM > 0
void
pmap_shootept(struct pmap *pm, int shootself)
{
struct cpu_info *self = curcpu();
#ifdef MULTIPROCESSOR
struct cpu_info *ci;
CPU_INFO_ITERATOR cii;
int targets = 0;
u_int8_t mask[howmany(MAXCPUS, 8)] = { 0 };
KASSERT(pmap_is_ept(pm));
CPU_INFO_FOREACH(cii, ci) {
if (ci == self || !pmap_is_active(pm, ci) ||
!(ci->ci_flags & CPUF_RUNNING) ||
!(ci->ci_flags & CPUF_VMM))
continue;
setbit(mask, ci->ci_cpuid);
targets++;
}
if (targets) {
int s = splvm();
pmap_start_tlb_shoot(targets, __func__);
ept_shoot_mode = self->ci_vmm_cap.vcc_vmx.vmx_invept_mode;
ept_shoot_vid.vid_eptp = pm->eptp;
ept_shoot_vid.vid_reserved = 0;
CPU_INFO_FOREACH(cii, ci) {
if (isclr(mask, ci->ci_cpuid))
continue;
if (x86_fast_ipi(ci, LAPIC_IPI_INVEPT) != 0)
pmap_tlb_shootfail();
}
splx(s);
}
#endif
if (shootself && (self->ci_flags & CPUF_VMM)) {
struct vmx_invept_descriptor vid;
vid.vid_eptp = pm->eptp;
vid.vid_reserved = 0;
invept(self->ci_vmm_cap.vcc_vmx.vmx_invept_mode, &vid);
}
}
#endif