#include <sys/vm.h>
#include <sys/exec.h>
#include <sys/exechdr.h>
#include <vm/seg_kmem.h>
#include <sys/atomic.h>
#include <sys/archsystm.h>
#include <sys/machsystm.h>
#include <sys/kdi.h>
#include <sys/cpu_module.h>
#include <sys/secflags.h>
#include <vm/hat_sfmmu.h>
#include <sys/memnode.h>
#include <sys/mem_config.h>
#include <sys/mem_cage.h>
#include <vm/vm_dep.h>
#include <vm/page.h>
#include <sys/platform_module.h>
int do_pg_coloring = 0;
int use_page_coloring = 1;
extern uint_t page_colors;
extern uint_t page_colors_mask;
extern uint_t page_coloring_shift;
int cpu_page_colors;
uint_t vac_colors = 0;
uint_t vac_colors_mask = 0;
extern void page_coloring_init_cpu();
#pragma weak page_coloring_init_cpu
#define CPUSETSIZE() (cpunodes[CPU->cpu_id].ecache_setsize)
plcnt_t plcnt;
#if defined(SF_ERRATA_57)
caddr_t errata57_limit;
#endif
extern void page_relocate_hash(page_t *, page_t *);
extern void map_addr_proc(caddr_t *, size_t, offset_t, int, caddr_t,
struct proc *, uint_t);
extern page_t *page_get_freelist(struct vnode *, u_offset_t, struct seg *,
caddr_t, size_t, uint_t, struct lgrp *);
pfn_t
impl_obmem_pfnum(pfn_t pf)
{
return (pf);
}
int
pf_is_memory(pfn_t pf)
{
if (pf > physmax)
return (0);
return (1);
}
faultcode_t
pagefault(caddr_t addr, enum fault_type type, enum seg_rw rw, int iskernel)
{
struct as *as;
struct proc *p;
faultcode_t res;
caddr_t base;
size_t len;
int err;
if (INVALID_VADDR(addr))
return (FC_NOMAP);
if (iskernel) {
as = &kas;
} else {
p = curproc;
as = p->p_as;
#if defined(SF_ERRATA_57)
if (rw == S_EXEC && AS_TYPE_64BIT(as) &&
addr < errata57_limit) {
res = FC_NOMAP;
goto out;
}
#endif
}
res = as_fault(as->a_hat, as, addr, 1, type, rw);
if (!(res == FC_NOMAP && iskernel == 0))
goto out;
base = p->p_brkbase;
len = p->p_brksize;
if (addr < base || addr >= base + len) {
base = (caddr_t)(p->p_usrstack - p->p_stksize);
len = p->p_stksize;
if (addr < base || addr >= p->p_usrstack) {
res = FC_NOMAP;
goto out;
}
}
len = (((uintptr_t)base + len + PAGEOFFSET) & PAGEMASK) -
((uintptr_t)base & PAGEMASK);
base = (caddr_t)((uintptr_t)base & PAGEMASK);
as_rangelock(as);
as_purge(as);
if (as_gap(as, PAGESIZE, &base, &len, AH_CONTAIN, addr) == 0) {
err = as_map(as, base, len, segvn_create, zfod_argsp);
as_rangeunlock(as);
if (err) {
res = FC_MAKE_ERR(err);
goto out;
}
} else {
as_rangeunlock(as);
}
res = as_fault(as->a_hat, as, addr, 1, F_INVAL, rw);
out:
return (res);
}
void
map_addr(caddr_t *addrp, size_t len, offset_t off, int vacalign, uint_t flags)
{
struct proc *p = curproc;
caddr_t userlimit = flags & _MAP_LOW32 ?
(caddr_t)USERLIMIT32 : p->p_as->a_userlimit;
map_addr_proc(addrp, len, off, vacalign, userlimit, p, flags);
}
caddr_t hole_start, hole_end;
caddr_t kpm_vbase;
size_t kpm_size;
uchar_t kpm_size_shift;
int valid_va_range_aligned_wraparound;
int
valid_va_range_aligned(caddr_t *basep, size_t *lenp, size_t minlen, int dir,
size_t align, size_t redzone, size_t off)
{
caddr_t hi, lo;
size_t tot_len;
ASSERT(align == 0 ? off == 0 : off < align);
ASSERT(ISP2(align));
ASSERT(align == 0 || align >= PAGESIZE);
lo = *basep;
hi = lo + *lenp;
tot_len = minlen + 2 * redzone;
if (hi < lo) {
*lenp = 0UL - (uintptr_t)lo - 1UL;
valid_va_range_aligned_wraparound++;
hi = lo + *lenp;
}
if (*lenp < tot_len) {
return (0);
}
if (lo < hole_start) {
if (hi > hole_start)
if (hi < hole_end)
hi = hole_start;
else
if (dir == AH_LO) {
if (hole_start - lo >= tot_len)
hi = hole_start;
else if (hi - hole_end >= tot_len)
lo = hole_end;
else
return (0);
} else {
if (hi - hole_end >= tot_len)
lo = hole_end;
else if (hole_start - lo >= tot_len)
hi = hole_start;
else
return (0);
}
} else {
if (hi < hole_end)
return (0);
if (lo < hole_end)
lo = hole_end;
}
if (hi - lo < tot_len) {
return (0);
}
if (align > 1) {
caddr_t tlo = lo + redzone;
caddr_t thi = hi - redzone;
tlo = (caddr_t)P2PHASEUP((uintptr_t)tlo, align, off);
if (tlo < lo + redzone) {
return (0);
}
if (thi < tlo || thi - tlo < minlen) {
return (0);
}
}
*basep = lo;
*lenp = hi - lo;
return (1);
}
int
valid_va_range(caddr_t *basep, size_t *lenp, size_t minlen, int dir)
{
return (valid_va_range_aligned(basep, lenp, minlen, dir, 0, 0, 0));
}
uintptr_t forbidden_null_mapping_sz = 0x10000;
int
valid_usr_range(caddr_t addr, size_t len, uint_t prot, struct as *as,
caddr_t userlimit)
{
caddr_t eaddr = addr + len;
if (eaddr <= addr || addr >= userlimit || eaddr > userlimit)
return (RANGE_BADADDR);
if ((addr <= (caddr_t)forbidden_null_mapping_sz) &&
as->a_proc != NULL &&
secflag_enabled(as->a_proc, PROC_SEC_FORBIDNULLMAP))
return (RANGE_BADADDR);
if (eaddr > hole_start && addr < hole_end)
return (RANGE_BADADDR);
#if defined(SF_ERRATA_57)
ASSERT64(addr <= (caddr_t)0xffffffff80000000ul ||
errata57_limit == 0);
if (AS_TYPE_64BIT(as) &&
(addr < errata57_limit) &&
(prot & PROT_EXEC))
return (RANGE_BADPROT);
#endif
return (RANGE_OKAY);
}
int
chkaout(struct exdata *exp)
{
if (exp->ux_mach == M_SPARC)
return (0);
else
return (ENOEXEC);
}
int
map_addr_vacalign_check(caddr_t addr, u_offset_t off)
{
if (vac) {
return (((uintptr_t)addr ^ off) & shm_alignment - 1);
} else {
return (0);
}
}
pgcnt_t shm_lpg_min_physmem = 131072;
pgcnt_t privm_lpg_min_physmem = 131072;
static size_t
map_pgszheap(struct proc *p, caddr_t addr, size_t len)
{
size_t pgsz = MMU_PAGESIZE;
int szc;
if (len == 0) {
len = p->p_brkbase + p->p_brksize - p->p_bssbase;
}
len = MAX(len, default_uheap_lpsize);
for (szc = mmu_page_sizes - 1; szc >= 0; szc--) {
pgsz = hw_page_array[szc].hp_size;
if ((disable_auto_data_large_pages & (1 << szc)) ||
pgsz > max_uheap_lpsize)
continue;
if (len >= pgsz) {
break;
}
}
if (addr == 0 && (pgsz < hw_page_array[p->p_brkpageszc].hp_size)) {
pgsz = hw_page_array[p->p_brkpageszc].hp_size;
}
return (pgsz);
}
static size_t
map_pgszstk(struct proc *p, caddr_t addr, size_t len)
{
size_t pgsz = MMU_PAGESIZE;
int szc;
if (len == 0) {
len = p->p_stksize;
}
len = MAX(len, default_ustack_lpsize);
for (szc = mmu_page_sizes - 1; szc >= 0; szc--) {
pgsz = hw_page_array[szc].hp_size;
if ((disable_auto_data_large_pages & (1 << szc)) ||
pgsz > max_ustack_lpsize)
continue;
if (len >= pgsz) {
break;
}
}
if (addr == 0 && (pgsz < hw_page_array[p->p_stkpageszc].hp_size)) {
pgsz = hw_page_array[p->p_stkpageszc].hp_size;
}
return (pgsz);
}
static size_t
map_pgszism(caddr_t addr, size_t len)
{
uint_t szc;
size_t pgsz;
for (szc = mmu_page_sizes - 1; szc >= TTE4M; szc--) {
if (disable_ism_large_pages & (1 << szc))
continue;
pgsz = hw_page_array[szc].hp_size;
if ((len >= pgsz) && IS_P2ALIGNED(addr, pgsz))
return (pgsz);
}
return (DEFAULT_ISM_PAGESIZE);
}
size_t
map_pgsz(int maptype, struct proc *p, caddr_t addr, size_t len, int memcntl)
{
size_t pgsz = MMU_PAGESIZE;
ASSERT(maptype != MAPPGSZ_VA);
if (maptype != MAPPGSZ_ISM && physmem < privm_lpg_min_physmem) {
return (MMU_PAGESIZE);
}
switch (maptype) {
case MAPPGSZ_ISM:
pgsz = map_pgszism(addr, len);
break;
case MAPPGSZ_STK:
if (max_ustack_lpsize > MMU_PAGESIZE) {
pgsz = map_pgszstk(p, addr, len);
}
break;
case MAPPGSZ_HEAP:
if (max_uheap_lpsize > MMU_PAGESIZE) {
pgsz = map_pgszheap(p, addr, len);
}
break;
}
return (pgsz);
}
static uint_t
map_szcvec(caddr_t addr, size_t size, uintptr_t off, int disable_lpgs,
size_t max_lpsize, size_t min_physmem)
{
caddr_t eaddr = addr + size;
uint_t szcvec = 0;
caddr_t raddr;
caddr_t readdr;
size_t pgsz;
int i;
if (physmem < min_physmem || max_lpsize <= MMU_PAGESIZE) {
return (0);
}
for (i = mmu_page_sizes - 1; i > 0; i--) {
if (disable_lpgs & (1 << i)) {
continue;
}
pgsz = page_get_pagesize(i);
if (pgsz > max_lpsize) {
continue;
}
raddr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz);
readdr = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz);
if (raddr < addr || raddr >= readdr) {
continue;
}
if (P2PHASE((uintptr_t)addr ^ off, pgsz)) {
continue;
}
szcvec |= (1 << i);
szcvec |= P2PHASE(~disable_lpgs, (1 << i));
szcvec &= ~1;
break;
}
return (szcvec);
}
uint_t
map_pgszcvec(caddr_t addr, size_t size, uintptr_t off, int flags, int type,
int memcntl)
{
if (flags & MAP_TEXT) {
return (map_szcvec(addr, size, off,
disable_auto_text_large_pages,
max_utext_lpsize, shm_lpg_min_physmem));
} else if (flags & MAP_INITDATA) {
return (map_szcvec(addr, size, off,
disable_auto_data_large_pages,
max_uidata_lpsize, privm_lpg_min_physmem));
} else if (type == MAPPGSZC_SHM) {
return (map_szcvec(addr, size, off,
disable_auto_data_large_pages,
max_shm_lpsize, shm_lpg_min_physmem));
} else if (type == MAPPGSZC_HEAP) {
return (map_szcvec(addr, size, off,
disable_auto_data_large_pages,
max_uheap_lpsize, privm_lpg_min_physmem));
} else if (type == MAPPGSZC_STACK) {
return (map_szcvec(addr, size, off,
disable_auto_data_large_pages,
max_ustack_lpsize, privm_lpg_min_physmem));
} else {
return (map_szcvec(addr, size, off,
disable_auto_data_large_pages,
max_privmap_lpsize, privm_lpg_min_physmem));
}
}
page_t ***page_freelists[MMU_PAGE_SIZES][MAX_MEM_TYPES];
page_t ***page_cachelists[MAX_MEM_TYPES];
kmutex_t *fpc_mutex[NPC_MUTEX];
kmutex_t *cpc_mutex[NPC_MUTEX];
size_t
calc_free_pagelist_sz(void)
{
int szc;
size_t alloc_sz, cache_sz, free_sz;
cache_sz = (page_get_pagecolors(0) * sizeof (page_t *)) +
sizeof (page_t **);
cache_sz *= max_mem_nodes * MAX_MEM_TYPES;
free_sz = sizeof (page_t **);
for (szc = 0; szc < mmu_page_sizes; szc++)
free_sz += sizeof (page_t *) * page_get_pagecolors(szc);
free_sz *= max_mem_nodes * MAX_MEM_TYPES;
alloc_sz = cache_sz + free_sz + page_ctrs_sz();
return (alloc_sz);
}
caddr_t
alloc_page_freelists(caddr_t alloc_base)
{
int mnode, mtype;
int szc, clrs;
for (mtype = 0; mtype < MAX_MEM_TYPES; mtype++) {
page_cachelists[mtype] = (page_t ***)alloc_base;
alloc_base += (max_mem_nodes * sizeof (page_t **));
for (mnode = 0; mnode < max_mem_nodes; mnode++) {
page_cachelists[mtype][mnode] = (page_t **)alloc_base;
alloc_base +=
(page_get_pagecolors(0) * sizeof (page_t *));
}
}
for (szc = 0; szc < mmu_page_sizes; szc++) {
clrs = page_get_pagecolors(szc);
for (mtype = 0; mtype < MAX_MEM_TYPES; mtype++) {
page_freelists[szc][mtype] = (page_t ***)alloc_base;
alloc_base += (max_mem_nodes * sizeof (page_t **));
for (mnode = 0; mnode < max_mem_nodes; mnode++) {
page_freelists[szc][mtype][mnode] =
(page_t **)alloc_base;
alloc_base += (clrs * (sizeof (page_t *)));
}
}
}
alloc_base = page_ctrs_alloc(alloc_base);
return (alloc_base);
}
int
ndata_alloc_page_mutexs(struct memlist *ndata)
{
size_t alloc_sz;
caddr_t alloc_base;
int i;
void page_coloring_init();
page_coloring_init();
if (&mmu_init_mmu_page_sizes) {
if (!mmu_init_mmu_page_sizes(0)) {
cmn_err(CE_PANIC, "mmu_page_sizes %d not initialized",
mmu_page_sizes);
}
}
ASSERT(mmu_page_sizes >= DEFAULT_MMU_PAGE_SIZES);
alloc_sz = 2 * NPC_MUTEX * max_mem_nodes * sizeof (kmutex_t);
alloc_base = ndata_alloc(ndata, alloc_sz, ecache_alignsize);
if (alloc_base == NULL)
return (-1);
ASSERT(((uintptr_t)alloc_base & (ecache_alignsize - 1)) == 0);
for (i = 0; i < NPC_MUTEX; i++) {
fpc_mutex[i] = (kmutex_t *)alloc_base;
alloc_base += (sizeof (kmutex_t) * max_mem_nodes);
cpc_mutex[i] = (kmutex_t *)alloc_base;
alloc_base += (sizeof (kmutex_t) * max_mem_nodes);
}
return (0);
}
uint32_t color_start_current = 0;
uint32_t color_start_stride = 337;
int color_start_random = 0;
uint_t
get_color_start(struct as *as)
{
uint32_t old, new;
if (consistent_coloring == 2 || color_start_random) {
return ((uint_t)(((gettick()) << (vac_shift - MMU_PAGESHIFT)) &
(hw_page_array[0].hp_colors - 1)));
}
do {
old = color_start_current;
new = old + (color_start_stride << (vac_shift - MMU_PAGESHIFT));
} while (atomic_cas_32(&color_start_current, old, new) != old);
return ((uint_t)(new));
}
void
page_coloring_init()
{
int a, i;
uint_t colors;
if (do_pg_coloring == 0) {
page_colors = 1;
for (i = 0; i < mmu_page_sizes; i++) {
colorequivszc[i] = 0;
hw_page_array[i].hp_colors = 1;
}
return;
}
page_colors = ecache_setsize / MMU_PAGESIZE;
page_colors_mask = page_colors - 1;
vac_colors = vac_size / MMU_PAGESIZE;
vac_colors_mask = vac_colors -1;
page_coloring_shift = 0;
a = ecache_setsize;
while (a >>= 1) {
page_coloring_shift++;
}
for (i = 0; i < mmu_page_sizes; i++) {
hw_page_array[i].hp_colors = (page_colors_mask >>
(hw_page_array[i].hp_shift - hw_page_array[0].hp_shift))
+ 1;
colorequivszc[i] = 0;
}
if (cpu_setsize > 0 && cpu_page_colors == 0 &&
cpu_setsize < ecache_setsize) {
cpu_page_colors = cpu_setsize / MMU_PAGESIZE;
a = lowbit(page_colors) - lowbit(cpu_page_colors);
ASSERT(a > 0);
ASSERT(a < 16);
for (i = 0; i < mmu_page_sizes; i++) {
if ((colors = hw_page_array[i].hp_colors) <= 1) {
continue;
}
while ((colors >> a) == 0)
a--;
ASSERT(a >= 0);
colorequivszc[i] = (a << 4);
}
}
if (&page_coloring_init_cpu) {
page_coloring_init_cpu();
}
}
int
bp_color(struct buf *bp)
{
int color = -1;
if (vac) {
if ((bp->b_flags & B_PAGEIO) != 0) {
color = sfmmu_get_ppvcolor(bp->b_pages);
} else if (bp->b_un.b_addr != NULL) {
color = sfmmu_get_addrvcolor(bp->b_un.b_addr);
}
}
return (color < 0 ? 0 : ptob(color));
}
void
dcache_flushall()
{
sfmmu_cache_flushall();
}
static int
kdi_range_overlap(uintptr_t va1, size_t sz1, uintptr_t va2, size_t sz2)
{
if (va1 < va2 && va1 + sz1 <= va2)
return (0);
if (va2 < va1 && va2 + sz2 <= va1)
return (0);
return (1);
}
size_t
kdi_range_is_nontoxic(uintptr_t va, size_t sz, int write)
{
if (write && kdi_range_overlap(va, sz, OFW_START_ADDR, OFW_END_ADDR -
OFW_START_ADDR + 1))
return (va < OFW_START_ADDR ? OFW_START_ADDR - va : 0);
if (kdi_range_overlap(va, sz, PIOMAPBASE, PIOMAPSIZE))
return (va < PIOMAPBASE ? PIOMAPBASE - va : 0);
return (sz);
}
size_t segkmem_lpminphysmem = 0x40000000;
size_t
get_segkmem_lpsize(size_t lpsize)
{
size_t memtotal = physmem * PAGESIZE;
size_t mmusz;
uint_t szc;
if (memtotal < segkmem_lpminphysmem)
return (PAGESIZE);
if (plat_lpkmem_is_supported != NULL &&
plat_lpkmem_is_supported() == 0)
return (PAGESIZE);
mmusz = mmu_get_kernel_lpsize(lpsize);
szc = page_szc(mmusz);
while (szc) {
if (!(disable_large_pages & (1 << szc)))
return (page_get_pagesize(szc));
szc--;
}
return (PAGESIZE);
}