#include <sys/types.h>
#include <sys/inttypes.h>
#include <sys/t_lock.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kmem.h>
#include <sys/sysmacros.h>
#include <sys/vmsystm.h>
#include <sys/tuneable.h>
#include <sys/debug.h>
#include <sys/fs/swapnode.h>
#include <sys/cmn_err.h>
#include <sys/callb.h>
#include <sys/mem_config.h>
#include <sys/mman.h>
#include <vm/hat.h>
#include <vm/as.h>
#include <vm/seg.h>
#include <vm/seg_kmem.h>
#include <vm/seg_spt.h>
#include <vm/seg_vn.h>
#include <vm/anon.h>
segadvstat_t segadvstat = {
{ "MADV_FREE_hit", KSTAT_DATA_ULONG },
{ "MADV_FREE_miss", KSTAT_DATA_ULONG },
};
kstat_named_t *segadvstat_ptr = (kstat_named_t *)&segadvstat;
uint_t segadvstat_ndata = sizeof (segadvstat) / sizeof (kstat_named_t);
struct seg_pcache {
struct seg_pcache *p_hnext;
struct seg_pcache *p_hprev;
pcache_link_t p_plink;
void *p_htag0;
caddr_t p_addr;
size_t p_len;
size_t p_wlen;
struct page **p_pp;
seg_preclaim_cbfunc_t p_callback;
clock_t p_lbolt;
struct seg_phash *p_hashp;
uint_t p_active;
uchar_t p_write;
uchar_t p_ref;
ushort_t p_flags;
};
struct seg_phash {
struct seg_pcache *p_hnext;
struct seg_pcache *p_hprev;
kmutex_t p_hmutex;
pcache_link_t p_halink[2];
};
struct seg_phash_wired {
struct seg_pcache *p_hnext;
struct seg_pcache *p_hprev;
kmutex_t p_hmutex;
};
#define P_MAX_APURGE_BYTES (1024 * 1024 * 1024)
#define P_SHRINK_SHFT (5)
int segpcache_enabled = 1;
pgcnt_t segpcache_maxwindow = 0;
ulong_t segpcache_hashsize_win = 0;
ulong_t segpcache_hashsize_wired = 0;
int segpcache_reap_sec = 1;
clock_t segpcache_reap_ticks = 0;
int segpcache_pcp_maxage_sec = 1;
clock_t segpcache_pcp_maxage_ticks = 0;
int segpcache_shrink_shift = P_SHRINK_SHFT;
pgcnt_t segpcache_maxapurge_bytes = P_MAX_APURGE_BYTES;
static kmutex_t seg_pcache_mtx;
static kmutex_t seg_pasync_mtx;
static kcondvar_t seg_pasync_cv;
#pragma align 64(pctrl1)
#pragma align 64(pctrl2)
#pragma align 64(pctrl3)
static struct p_ctrl1 {
uint_t p_disabled;
pgcnt_t p_maxwin;
size_t p_hashwin_sz;
struct seg_phash *p_htabwin;
size_t p_hashwired_sz;
struct seg_phash_wired *p_htabwired;
kmem_cache_t *p_kmcache;
#ifdef _LP64
ulong_t pad[1];
#endif
} pctrl1;
static struct p_ctrl2 {
kmutex_t p_mem_mtx;
pgcnt_t p_locked_win;
pgcnt_t p_locked;
uchar_t p_ahcur;
uchar_t p_athr_on;
pcache_link_t p_ahhead[2];
} pctrl2;
static struct p_ctrl3 {
clock_t p_pcp_maxage;
ulong_t p_athr_empty_ahb;
ulong_t p_athr_full_ahb;
pgcnt_t p_maxapurge_npages;
int p_shrink_shft;
#ifdef _LP64
ulong_t pad[3];
#endif
} pctrl3;
#define seg_pdisabled pctrl1.p_disabled
#define seg_pmaxwindow pctrl1.p_maxwin
#define seg_phashsize_win pctrl1.p_hashwin_sz
#define seg_phashtab_win pctrl1.p_htabwin
#define seg_phashsize_wired pctrl1.p_hashwired_sz
#define seg_phashtab_wired pctrl1.p_htabwired
#define seg_pkmcache pctrl1.p_kmcache
#define seg_pmem_mtx pctrl2.p_mem_mtx
#define seg_plocked_window pctrl2.p_locked_win
#define seg_plocked pctrl2.p_locked
#define seg_pahcur pctrl2.p_ahcur
#define seg_pathr_on pctrl2.p_athr_on
#define seg_pahhead pctrl2.p_ahhead
#define seg_pmax_pcpage pctrl3.p_pcp_maxage
#define seg_pathr_empty_ahb pctrl3.p_athr_empty_ahb
#define seg_pathr_full_ahb pctrl3.p_athr_full_ahb
#define seg_pshrink_shift pctrl3.p_shrink_shft
#define seg_pmaxapurge_npages pctrl3.p_maxapurge_npages
#define P_HASHWIN_MASK (seg_phashsize_win - 1)
#define P_HASHWIRED_MASK (seg_phashsize_wired - 1)
#define P_BASESHIFT (6)
kthread_t *seg_pasync_thr;
extern struct seg_ops segvn_ops;
extern struct seg_ops segspt_shmops;
#define IS_PFLAGS_WIRED(flags) ((flags) & SEGP_FORCE_WIRED)
#define IS_PCP_WIRED(pcp) IS_PFLAGS_WIRED((pcp)->p_flags)
#define LBOLT_DELTA(t) ((ulong_t)(ddi_get_lbolt() - (t)))
#define PCP_AGE(pcp) LBOLT_DELTA((pcp)->p_lbolt)
#define P_HASHBP(seg, htag0, addr, flags) \
(IS_PFLAGS_WIRED((flags)) ? \
((struct seg_phash *)&seg_phashtab_wired[P_HASHWIRED_MASK & \
((uintptr_t)(htag0) >> P_BASESHIFT)]) : \
(&seg_phashtab_win[P_HASHWIN_MASK & \
(((uintptr_t)(htag0) >> 3) ^ \
((uintptr_t)(addr) >> ((flags & SEGP_PSHIFT) ? \
(flags >> 16) : page_get_shift((seg)->s_szc))))]))
#define P_MATCH(pcp, htag0, addr, len) \
((pcp)->p_htag0 == (htag0) && \
(pcp)->p_addr == (addr) && \
(pcp)->p_len >= (len))
#define P_MATCH_PP(pcp, htag0, addr, len, pp) \
((pcp)->p_pp == (pp) && \
(pcp)->p_htag0 == (htag0) && \
(pcp)->p_addr == (addr) && \
(pcp)->p_len >= (len))
#define plink2pcache(pl) ((struct seg_pcache *)((uintptr_t)(pl) - \
offsetof(struct seg_pcache, p_plink)))
#define hlink2phash(hl, l) ((struct seg_phash *)((uintptr_t)(hl) - \
offsetof(struct seg_phash, p_halink[l])))
static void
seg_padd_abuck(struct seg_phash *hp)
{
int lix;
ASSERT(MUTEX_HELD(&hp->p_hmutex));
ASSERT((struct seg_phash *)hp->p_hnext != hp);
ASSERT((struct seg_phash *)hp->p_hprev != hp);
ASSERT(hp->p_hnext == hp->p_hprev);
ASSERT(!IS_PCP_WIRED(hp->p_hnext));
ASSERT(hp->p_hnext->p_hnext == (struct seg_pcache *)hp);
ASSERT(hp->p_hprev->p_hprev == (struct seg_pcache *)hp);
ASSERT(hp >= seg_phashtab_win &&
hp < &seg_phashtab_win[seg_phashsize_win]);
mutex_enter(&seg_pmem_mtx);
lix = seg_pahcur;
ASSERT(lix >= 0 && lix <= 1);
if (hp->p_halink[lix].p_lnext != NULL) {
ASSERT(hp->p_halink[lix].p_lprev != NULL);
ASSERT(hp->p_halink[!lix].p_lnext == NULL);
ASSERT(hp->p_halink[!lix].p_lprev == NULL);
mutex_exit(&seg_pmem_mtx);
return;
}
ASSERT(hp->p_halink[lix].p_lprev == NULL);
if (hp->p_halink[!lix].p_lnext != NULL) {
ASSERT(hp->p_halink[!lix].p_lprev != NULL);
mutex_exit(&seg_pmem_mtx);
return;
}
ASSERT(hp->p_halink[!lix].p_lprev == NULL);
hp->p_halink[lix].p_lnext = &seg_pahhead[lix];
hp->p_halink[lix].p_lprev = seg_pahhead[lix].p_lprev;
seg_pahhead[lix].p_lprev->p_lnext = &hp->p_halink[lix];
seg_pahhead[lix].p_lprev = &hp->p_halink[lix];
mutex_exit(&seg_pmem_mtx);
}
static void
seg_premove_abuck(struct seg_phash *hp, int athr)
{
int lix;
ASSERT(MUTEX_HELD(&hp->p_hmutex));
ASSERT((struct seg_phash *)hp->p_hnext == hp);
ASSERT((struct seg_phash *)hp->p_hprev == hp);
ASSERT(hp >= seg_phashtab_win &&
hp < &seg_phashtab_win[seg_phashsize_win]);
if (athr) {
ASSERT(seg_pathr_on);
ASSERT(seg_pahcur <= 1);
lix = !seg_pahcur;
ASSERT(hp->p_halink[lix].p_lnext != NULL);
ASSERT(hp->p_halink[lix].p_lprev != NULL);
ASSERT(hp->p_halink[!lix].p_lnext == NULL);
ASSERT(hp->p_halink[!lix].p_lprev == NULL);
hp->p_halink[lix].p_lnext->p_lprev = hp->p_halink[lix].p_lprev;
hp->p_halink[lix].p_lprev->p_lnext = hp->p_halink[lix].p_lnext;
hp->p_halink[lix].p_lnext = NULL;
hp->p_halink[lix].p_lprev = NULL;
return;
}
mutex_enter(&seg_pmem_mtx);
lix = seg_pahcur;
ASSERT(lix >= 0 && lix <= 1);
if (hp->p_halink[lix].p_lnext != NULL) {
ASSERT(hp->p_halink[lix].p_lprev != NULL);
ASSERT(hp->p_halink[!lix].p_lnext == NULL);
ASSERT(hp->p_halink[!lix].p_lprev == NULL);
hp->p_halink[lix].p_lnext->p_lprev = hp->p_halink[lix].p_lprev;
hp->p_halink[lix].p_lprev->p_lnext = hp->p_halink[lix].p_lnext;
hp->p_halink[lix].p_lnext = NULL;
hp->p_halink[lix].p_lprev = NULL;
mutex_exit(&seg_pmem_mtx);
return;
}
ASSERT(hp->p_halink[lix].p_lprev == NULL);
lix = !lix;
ASSERT(hp->p_halink[lix].p_lnext != NULL);
ASSERT(hp->p_halink[lix].p_lprev != NULL);
if (!seg_pathr_on) {
hp->p_halink[lix].p_lnext->p_lprev = hp->p_halink[lix].p_lprev;
hp->p_halink[lix].p_lprev->p_lnext = hp->p_halink[lix].p_lnext;
hp->p_halink[lix].p_lnext = NULL;
hp->p_halink[lix].p_lprev = NULL;
}
mutex_exit(&seg_pmem_mtx);
}
static struct seg_pcache *
seg_plookup_checkdup(struct seg_phash *hp, void *htag0,
caddr_t addr, size_t len, int *found)
{
struct seg_pcache *pcp;
struct seg_pcache *delcallb_list = NULL;
ASSERT(MUTEX_HELD(&hp->p_hmutex));
*found = 0;
for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp;
pcp = pcp->p_hnext) {
ASSERT(pcp->p_hashp == hp);
if (pcp->p_htag0 == htag0 && pcp->p_addr == addr) {
ASSERT(!IS_PCP_WIRED(pcp));
if (pcp->p_len < len) {
pcache_link_t *plinkp;
if (pcp->p_active) {
continue;
}
plinkp = &pcp->p_plink;
plinkp->p_lprev->p_lnext = plinkp->p_lnext;
plinkp->p_lnext->p_lprev = plinkp->p_lprev;
pcp->p_hprev->p_hnext = pcp->p_hnext;
pcp->p_hnext->p_hprev = pcp->p_hprev;
pcp->p_hprev = delcallb_list;
delcallb_list = pcp;
} else {
*found = 1;
break;
}
}
}
return (delcallb_list);
}
struct page **
seg_plookup(struct seg *seg, struct anon_map *amp, caddr_t addr, size_t len,
enum seg_rw rw, uint_t flags)
{
struct seg_pcache *pcp;
struct seg_phash *hp;
void *htag0;
ASSERT(seg != NULL);
ASSERT(rw == S_READ || rw == S_WRITE);
if (seg_pdisabled) {
return (NULL);
}
ASSERT(seg_phashsize_win != 0);
htag0 = (amp == NULL ? (void *)seg : (void *)amp);
hp = P_HASHBP(seg, htag0, addr, flags);
mutex_enter(&hp->p_hmutex);
for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp;
pcp = pcp->p_hnext) {
ASSERT(pcp->p_hashp == hp);
if (P_MATCH(pcp, htag0, addr, len)) {
ASSERT(IS_PFLAGS_WIRED(flags) == IS_PCP_WIRED(pcp));
if (rw == S_WRITE && pcp->p_wlen < len) {
break;
}
if (pcp->p_active == UINT_MAX) {
break;
}
pcp->p_active++;
if (rw == S_WRITE && !pcp->p_write) {
pcp->p_write = 1;
}
mutex_exit(&hp->p_hmutex);
return (pcp->p_pp);
}
}
mutex_exit(&hp->p_hmutex);
return (NULL);
}
void
seg_pinactive(struct seg *seg, struct anon_map *amp, caddr_t addr,
size_t len, struct page **pp, enum seg_rw rw, uint_t flags,
seg_preclaim_cbfunc_t callback)
{
struct seg_pcache *pcp;
struct seg_phash *hp;
kmutex_t *pmtx = NULL;
pcache_link_t *pheadp;
void *htag0;
pgcnt_t npages = 0;
int keep = 0;
ASSERT(seg != NULL);
ASSERT(rw == S_READ || rw == S_WRITE);
htag0 = (amp == NULL ? (void *)seg : (void *)amp);
if (seg_phashsize_win == 0) {
goto out;
}
if (!IS_PFLAGS_WIRED(flags) && seg_pdisabled) {
if (amp == NULL) {
pheadp = &seg->s_phead;
pmtx = &seg->s_pmtx;
} else {
pheadp = &->a_phead;
pmtx = &->a_pmtx;
}
mutex_enter(pmtx);
}
hp = P_HASHBP(seg, htag0, addr, flags);
mutex_enter(&hp->p_hmutex);
again:
for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp;
pcp = pcp->p_hnext) {
ASSERT(pcp->p_hashp == hp);
if (P_MATCH_PP(pcp, htag0, addr, len, pp)) {
ASSERT(IS_PFLAGS_WIRED(flags) == IS_PCP_WIRED(pcp));
ASSERT(pcp->p_active);
if (keep) {
ASSERT(pmtx != NULL);
ASSERT(!IS_PFLAGS_WIRED(flags));
mutex_exit(pmtx);
pmtx = NULL;
}
pcp->p_active--;
if (pcp->p_active == 0 && (pmtx != NULL ||
(seg_pdisabled && IS_PFLAGS_WIRED(flags)))) {
ASSERT(callback == pcp->p_callback);
if (pmtx != NULL) {
pcache_link_t *plinkp = &pcp->p_plink;
ASSERT(!IS_PCP_WIRED(pcp));
ASSERT(pheadp->p_lnext != pheadp);
ASSERT(pheadp->p_lprev != pheadp);
plinkp->p_lprev->p_lnext =
plinkp->p_lnext;
plinkp->p_lnext->p_lprev =
plinkp->p_lprev;
}
pcp->p_hprev->p_hnext = pcp->p_hnext;
pcp->p_hnext->p_hprev = pcp->p_hprev;
if (!IS_PCP_WIRED(pcp) &&
hp->p_hnext == (struct seg_pcache *)hp) {
seg_premove_abuck(hp, 0);
}
mutex_exit(&hp->p_hmutex);
if (pmtx != NULL) {
mutex_exit(pmtx);
}
len = pcp->p_len;
npages = btop(len);
if (rw != S_WRITE && pcp->p_write) {
rw = S_WRITE;
}
kmem_cache_free(seg_pkmcache, pcp);
goto out;
} else {
if (!pcp->p_active && !IS_PCP_WIRED(pcp)) {
pcp->p_lbolt = ddi_get_lbolt();
pcp->p_ref = 1;
}
mutex_exit(&hp->p_hmutex);
if (pmtx != NULL) {
mutex_exit(pmtx);
}
return;
}
} else if (!IS_PFLAGS_WIRED(flags) &&
P_MATCH(pcp, htag0, addr, len)) {
if (!pcp->p_active) {
pcp->p_lbolt = ddi_get_lbolt();
pcp->p_ref = 1;
}
if (pmtx != NULL) {
keep = 0;
continue;
}
ASSERT(keep == 0);
if (amp == NULL) {
pheadp = &seg->s_phead;
pmtx = &seg->s_pmtx;
} else {
pheadp = &->a_phead;
pmtx = &->a_pmtx;
}
if (!mutex_tryenter(pmtx)) {
mutex_exit(&hp->p_hmutex);
mutex_enter(pmtx);
mutex_enter(&hp->p_hmutex);
keep = 1;
goto again;
}
}
}
mutex_exit(&hp->p_hmutex);
if (pmtx != NULL) {
mutex_exit(pmtx);
}
out:
(*callback)(htag0, addr, len, pp, rw, 0);
if (npages) {
mutex_enter(&seg_pmem_mtx);
ASSERT(seg_plocked >= npages);
seg_plocked -= npages;
if (!IS_PFLAGS_WIRED(flags)) {
ASSERT(seg_plocked_window >= npages);
seg_plocked_window -= npages;
}
mutex_exit(&seg_pmem_mtx);
}
}
#ifdef DEBUG
static uint32_t p_insert_chk_mtbf = 0;
#endif
int
seg_pinsert_check(struct seg *seg, struct anon_map *amp, caddr_t addr,
size_t len, uint_t flags)
{
ASSERT(seg != NULL);
#ifdef DEBUG
if (p_insert_chk_mtbf && !(gethrtime() % p_insert_chk_mtbf)) {
return (SEGP_FAIL);
}
#endif
if (seg_pdisabled) {
return (SEGP_FAIL);
}
ASSERT(seg_phashsize_win != 0);
if (IS_PFLAGS_WIRED(flags)) {
return (SEGP_SUCCESS);
}
if (seg_plocked_window + btop(len) > seg_pmaxwindow) {
return (SEGP_FAIL);
}
if (freemem < desfree) {
return (SEGP_FAIL);
}
return (SEGP_SUCCESS);
}
#ifdef DEBUG
static uint32_t p_insert_mtbf = 0;
#endif
int
seg_pinsert(struct seg *seg, struct anon_map *amp, caddr_t addr, size_t len,
size_t wlen, struct page **pp, enum seg_rw rw, uint_t flags,
seg_preclaim_cbfunc_t callback)
{
struct seg_pcache *pcp;
struct seg_phash *hp;
pgcnt_t npages;
pcache_link_t *pheadp;
kmutex_t *pmtx;
struct seg_pcache *delcallb_list = NULL;
ASSERT(seg != NULL);
ASSERT(rw == S_READ || rw == S_WRITE);
ASSERT(rw == S_READ || wlen == len);
ASSERT(rw == S_WRITE || wlen <= len);
ASSERT(amp == NULL || wlen == len);
#ifdef DEBUG
if (p_insert_mtbf && !(gethrtime() % p_insert_mtbf)) {
return (SEGP_FAIL);
}
#endif
if (seg_pdisabled) {
return (SEGP_FAIL);
}
ASSERT(seg_phashsize_win != 0);
ASSERT((len & PAGEOFFSET) == 0);
npages = btop(len);
mutex_enter(&seg_pmem_mtx);
if (!IS_PFLAGS_WIRED(flags)) {
if (seg_plocked_window + npages > seg_pmaxwindow) {
mutex_exit(&seg_pmem_mtx);
return (SEGP_FAIL);
}
seg_plocked_window += npages;
}
seg_plocked += npages;
mutex_exit(&seg_pmem_mtx);
pcp = kmem_cache_alloc(seg_pkmcache, KM_SLEEP);
if (amp == NULL) {
pcp->p_htag0 = (void *)seg;
pcp->p_flags = flags & 0xffff;
} else {
pcp->p_htag0 = (void *)amp;
pcp->p_flags = (flags & 0xffff) | SEGP_AMP;
}
pcp->p_addr = addr;
pcp->p_len = len;
pcp->p_wlen = wlen;
pcp->p_pp = pp;
pcp->p_write = (rw == S_WRITE);
pcp->p_callback = callback;
pcp->p_active = 1;
hp = P_HASHBP(seg, pcp->p_htag0, addr, flags);
if (!IS_PFLAGS_WIRED(flags)) {
int found;
void *htag0;
if (amp == NULL) {
pheadp = &seg->s_phead;
pmtx = &seg->s_pmtx;
htag0 = (void *)seg;
} else {
pheadp = &->a_phead;
pmtx = &->a_pmtx;
htag0 = (void *)amp;
}
mutex_enter(pmtx);
mutex_enter(&hp->p_hmutex);
delcallb_list = seg_plookup_checkdup(hp, htag0, addr,
len, &found);
if (found) {
mutex_exit(&hp->p_hmutex);
mutex_exit(pmtx);
mutex_enter(&seg_pmem_mtx);
seg_plocked -= npages;
seg_plocked_window -= npages;
mutex_exit(&seg_pmem_mtx);
kmem_cache_free(seg_pkmcache, pcp);
goto out;
}
pcp->p_plink.p_lnext = pheadp->p_lnext;
pcp->p_plink.p_lprev = pheadp;
pheadp->p_lnext->p_lprev = &pcp->p_plink;
pheadp->p_lnext = &pcp->p_plink;
} else {
mutex_enter(&hp->p_hmutex);
}
pcp->p_hashp = hp;
pcp->p_hnext = hp->p_hnext;
pcp->p_hprev = (struct seg_pcache *)hp;
hp->p_hnext->p_hprev = pcp;
hp->p_hnext = pcp;
if (!IS_PFLAGS_WIRED(flags) &&
hp->p_hprev == pcp) {
seg_padd_abuck(hp);
}
mutex_exit(&hp->p_hmutex);
if (!IS_PFLAGS_WIRED(flags)) {
mutex_exit(pmtx);
}
out:
npages = 0;
while (delcallb_list != NULL) {
pcp = delcallb_list;
delcallb_list = pcp->p_hprev;
ASSERT(!IS_PCP_WIRED(pcp) && !pcp->p_active);
(void) (*pcp->p_callback)(pcp->p_htag0, pcp->p_addr,
pcp->p_len, pcp->p_pp, pcp->p_write ? S_WRITE : S_READ, 0);
npages += btop(pcp->p_len);
kmem_cache_free(seg_pkmcache, pcp);
}
if (npages) {
ASSERT(!IS_PFLAGS_WIRED(flags));
mutex_enter(&seg_pmem_mtx);
ASSERT(seg_plocked >= npages);
ASSERT(seg_plocked_window >= npages);
seg_plocked -= npages;
seg_plocked_window -= npages;
mutex_exit(&seg_pmem_mtx);
}
return (SEGP_SUCCESS);
}
static void
seg_ppurge_async(int force)
{
struct seg_pcache *delcallb_list = NULL;
struct seg_pcache *pcp;
struct seg_phash *hp;
pgcnt_t npages = 0;
pgcnt_t npages_window = 0;
pgcnt_t npgs_to_purge;
pgcnt_t npgs_purged = 0;
int hlinks = 0;
int hlix;
pcache_link_t *hlinkp;
pcache_link_t *hlnextp = NULL;
int lowmem;
int trim;
ASSERT(seg_phashsize_win != 0);
if (seg_plocked == 0 || (!force && seg_plocked_window == 0)) {
return;
}
if (!force) {
lowmem = 0;
trim = 0;
if (freemem < lotsfree + needfree) {
spgcnt_t fmem = MAX((spgcnt_t)(freemem - needfree), 0);
if (fmem <= 5 * (desfree >> 2)) {
lowmem = 1;
} else if (fmem <= 7 * (lotsfree >> 3)) {
if (seg_plocked_window >=
(availrmem_initial >> 1)) {
lowmem = 1;
}
} else if (fmem < lotsfree) {
if (seg_plocked_window >=
3 * (availrmem_initial >> 2)) {
lowmem = 1;
}
}
}
if (seg_plocked_window >= 7 * (seg_pmaxwindow >> 3)) {
trim = 1;
}
if (!lowmem && !trim) {
return;
}
npgs_to_purge = seg_plocked_window >>
seg_pshrink_shift;
if (lowmem) {
npgs_to_purge = MIN(npgs_to_purge,
MAX(seg_pmaxapurge_npages, desfree));
} else {
npgs_to_purge = MIN(npgs_to_purge,
seg_pmaxapurge_npages);
}
if (npgs_to_purge == 0) {
return;
}
} else {
struct seg_phash_wired *hpw;
ASSERT(seg_phashsize_wired != 0);
for (hpw = seg_phashtab_wired;
hpw < &seg_phashtab_wired[seg_phashsize_wired]; hpw++) {
if (hpw->p_hnext == (struct seg_pcache *)hpw) {
continue;
}
mutex_enter(&hpw->p_hmutex);
for (pcp = hpw->p_hnext;
pcp != (struct seg_pcache *)hpw;
pcp = pcp->p_hnext) {
ASSERT(IS_PCP_WIRED(pcp));
ASSERT(pcp->p_hashp ==
(struct seg_phash *)hpw);
if (pcp->p_active) {
continue;
}
pcp->p_hprev->p_hnext = pcp->p_hnext;
pcp->p_hnext->p_hprev = pcp->p_hprev;
pcp->p_hprev = delcallb_list;
delcallb_list = pcp;
}
mutex_exit(&hpw->p_hmutex);
}
}
mutex_enter(&seg_pmem_mtx);
if (seg_pathr_on) {
mutex_exit(&seg_pmem_mtx);
goto runcb;
}
seg_pathr_on = 1;
mutex_exit(&seg_pmem_mtx);
ASSERT(seg_pahcur <= 1);
hlix = !seg_pahcur;
again:
for (hlinkp = seg_pahhead[hlix].p_lnext; hlinkp != &seg_pahhead[hlix];
hlinkp = hlnextp) {
hlnextp = hlinkp->p_lnext;
ASSERT(hlnextp != NULL);
hp = hlink2phash(hlinkp, hlix);
if (hp->p_hnext == (struct seg_pcache *)hp) {
seg_pathr_empty_ahb++;
continue;
}
seg_pathr_full_ahb++;
mutex_enter(&hp->p_hmutex);
for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp;
pcp = pcp->p_hnext) {
pcache_link_t *pheadp;
pcache_link_t *plinkp;
void *htag0;
kmutex_t *pmtx;
ASSERT(!IS_PCP_WIRED(pcp));
ASSERT(pcp->p_hashp == hp);
if (pcp->p_active) {
continue;
}
if (!force && pcp->p_ref &&
PCP_AGE(pcp) < seg_pmax_pcpage) {
pcp->p_ref = 0;
continue;
}
plinkp = &pcp->p_plink;
htag0 = pcp->p_htag0;
if (pcp->p_flags & SEGP_AMP) {
pheadp = &((amp_t *)htag0)->a_phead;
pmtx = &((amp_t *)htag0)->a_pmtx;
} else {
pheadp = &((seg_t *)htag0)->s_phead;
pmtx = &((seg_t *)htag0)->s_pmtx;
}
if (!mutex_tryenter(pmtx)) {
continue;
}
ASSERT(pheadp->p_lnext != pheadp);
ASSERT(pheadp->p_lprev != pheadp);
plinkp->p_lprev->p_lnext =
plinkp->p_lnext;
plinkp->p_lnext->p_lprev =
plinkp->p_lprev;
pcp->p_hprev->p_hnext = pcp->p_hnext;
pcp->p_hnext->p_hprev = pcp->p_hprev;
mutex_exit(pmtx);
pcp->p_hprev = delcallb_list;
delcallb_list = pcp;
npgs_purged += btop(pcp->p_len);
}
if (hp->p_hnext == (struct seg_pcache *)hp) {
seg_premove_abuck(hp, 1);
}
mutex_exit(&hp->p_hmutex);
if (npgs_purged >= seg_plocked_window) {
break;
}
if (!force) {
if (npgs_purged >= npgs_to_purge) {
break;
}
if (!trim && !(seg_pathr_full_ahb & 15)) {
ASSERT(lowmem);
if (freemem >= lotsfree + needfree) {
break;
}
}
}
}
if (hlinkp == &seg_pahhead[hlix]) {
mutex_enter(&seg_pmem_mtx);
ASSERT(seg_pathr_on);
ASSERT(seg_pahcur == !hlix);
seg_pahcur = hlix;
mutex_exit(&seg_pmem_mtx);
if (++hlinks < 2) {
hlix = !hlix;
goto again;
}
} else if ((hlinkp = hlnextp) != &seg_pahhead[hlix] &&
seg_pahhead[hlix].p_lnext != hlinkp) {
ASSERT(hlinkp != NULL);
ASSERT(hlinkp->p_lprev != &seg_pahhead[hlix]);
ASSERT(seg_pahhead[hlix].p_lnext != &seg_pahhead[hlix]);
ASSERT(seg_pahhead[hlix].p_lprev != &seg_pahhead[hlix]);
seg_pahhead[hlix].p_lnext->p_lprev = seg_pahhead[hlix].p_lprev;
seg_pahhead[hlix].p_lprev->p_lnext = seg_pahhead[hlix].p_lnext;
seg_pahhead[hlix].p_lnext = hlinkp;
seg_pahhead[hlix].p_lprev = hlinkp->p_lprev;
hlinkp->p_lprev->p_lnext = &seg_pahhead[hlix];
hlinkp->p_lprev = &seg_pahhead[hlix];
}
mutex_enter(&seg_pmem_mtx);
ASSERT(seg_pathr_on);
seg_pathr_on = 0;
mutex_exit(&seg_pmem_mtx);
runcb:
while (delcallb_list != NULL) {
pcp = delcallb_list;
delcallb_list = pcp->p_hprev;
ASSERT(!pcp->p_active);
(void) (*pcp->p_callback)(pcp->p_htag0, pcp->p_addr,
pcp->p_len, pcp->p_pp, pcp->p_write ? S_WRITE : S_READ, 1);
npages += btop(pcp->p_len);
if (!IS_PCP_WIRED(pcp)) {
npages_window += btop(pcp->p_len);
}
kmem_cache_free(seg_pkmcache, pcp);
}
if (npages) {
mutex_enter(&seg_pmem_mtx);
ASSERT(seg_plocked >= npages);
ASSERT(seg_plocked_window >= npages_window);
seg_plocked -= npages;
seg_plocked_window -= npages_window;
mutex_exit(&seg_pmem_mtx);
}
}
void
seg_ppurge_wiredpp(struct page **pp)
{
struct seg_pcache *pcp;
struct seg_phash_wired *hp;
pgcnt_t npages = 0;
struct seg_pcache *delcallb_list = NULL;
if (seg_plocked == 0) {
return;
}
ASSERT(seg_phashsize_wired != 0);
for (hp = seg_phashtab_wired;
hp < &seg_phashtab_wired[seg_phashsize_wired]; hp++) {
if (hp->p_hnext == (struct seg_pcache *)hp) {
continue;
}
mutex_enter(&hp->p_hmutex);
pcp = hp->p_hnext;
while (pcp != (struct seg_pcache *)hp) {
ASSERT(pcp->p_hashp == (struct seg_phash *)hp);
ASSERT(IS_PCP_WIRED(pcp));
if (!pcp->p_active && pcp->p_pp == pp) {
ASSERT(pcp->p_htag0 != NULL);
pcp->p_hprev->p_hnext = pcp->p_hnext;
pcp->p_hnext->p_hprev = pcp->p_hprev;
pcp->p_hprev = delcallb_list;
delcallb_list = pcp;
}
pcp = pcp->p_hnext;
}
mutex_exit(&hp->p_hmutex);
while (delcallb_list != NULL) {
int done;
pcp = delcallb_list;
delcallb_list = pcp->p_hprev;
ASSERT(!pcp->p_active);
done = (*pcp->p_callback)(pcp->p_htag0, pcp->p_addr,
pcp->p_len, pcp->p_pp,
pcp->p_write ? S_WRITE : S_READ, 1);
npages += btop(pcp->p_len);
ASSERT(IS_PCP_WIRED(pcp));
kmem_cache_free(seg_pkmcache, pcp);
if (done) {
ASSERT(delcallb_list == NULL);
goto out;
}
}
}
out:
mutex_enter(&seg_pmem_mtx);
ASSERT(seg_plocked >= npages);
seg_plocked -= npages;
mutex_exit(&seg_pmem_mtx);
}
void
seg_ppurge(struct seg *seg, struct anon_map *amp, uint_t flags)
{
struct seg_pcache *delcallb_list = NULL;
struct seg_pcache *pcp;
struct seg_phash *hp;
pgcnt_t npages = 0;
void *htag0;
if (seg_plocked == 0) {
return;
}
ASSERT(seg_phashsize_win != 0);
htag0 = (amp == NULL ? (void *)seg : (void *)amp);
ASSERT(htag0 != NULL);
if (IS_PFLAGS_WIRED(flags)) {
hp = P_HASHBP(seg, htag0, 0, flags);
mutex_enter(&hp->p_hmutex);
pcp = hp->p_hnext;
while (pcp != (struct seg_pcache *)hp) {
ASSERT(pcp->p_hashp == hp);
ASSERT(IS_PCP_WIRED(pcp));
if (pcp->p_htag0 == htag0) {
if (pcp->p_active) {
break;
}
pcp->p_hprev->p_hnext = pcp->p_hnext;
pcp->p_hnext->p_hprev = pcp->p_hprev;
pcp->p_hprev = delcallb_list;
delcallb_list = pcp;
}
pcp = pcp->p_hnext;
}
mutex_exit(&hp->p_hmutex);
} else {
pcache_link_t *plinkp;
pcache_link_t *pheadp;
kmutex_t *pmtx;
if (amp == NULL) {
ASSERT(seg != NULL);
pheadp = &seg->s_phead;
pmtx = &seg->s_pmtx;
} else {
pheadp = &->a_phead;
pmtx = &->a_pmtx;
}
mutex_enter(pmtx);
while ((plinkp = pheadp->p_lnext) != pheadp) {
pcp = plink2pcache(plinkp);
ASSERT(!IS_PCP_WIRED(pcp));
ASSERT(pcp->p_htag0 == htag0);
hp = pcp->p_hashp;
mutex_enter(&hp->p_hmutex);
if (pcp->p_active) {
mutex_exit(&hp->p_hmutex);
break;
}
ASSERT(plinkp->p_lprev == pheadp);
pheadp->p_lnext = plinkp->p_lnext;
plinkp->p_lnext->p_lprev = pheadp;
pcp->p_hprev->p_hnext = pcp->p_hnext;
pcp->p_hnext->p_hprev = pcp->p_hprev;
pcp->p_hprev = delcallb_list;
delcallb_list = pcp;
if (hp->p_hnext == (struct seg_pcache *)hp) {
seg_premove_abuck(hp, 0);
}
mutex_exit(&hp->p_hmutex);
}
mutex_exit(pmtx);
}
while (delcallb_list != NULL) {
pcp = delcallb_list;
delcallb_list = pcp->p_hprev;
ASSERT(!pcp->p_active);
(void) (*pcp->p_callback)(pcp->p_htag0, pcp->p_addr, pcp->p_len,
pcp->p_pp, pcp->p_write ? S_WRITE : S_READ, 0);
npages += btop(pcp->p_len);
kmem_cache_free(seg_pkmcache, pcp);
}
mutex_enter(&seg_pmem_mtx);
ASSERT(seg_plocked >= npages);
seg_plocked -= npages;
if (!IS_PFLAGS_WIRED(flags)) {
ASSERT(seg_plocked_window >= npages);
seg_plocked_window -= npages;
}
mutex_exit(&seg_pmem_mtx);
}
static void seg_pinit_mem_config(void);
static void
seg_pinit(void)
{
struct seg_phash *hp;
ulong_t i;
pgcnt_t physmegs;
seg_plocked = 0;
seg_plocked_window = 0;
if (segpcache_enabled == 0) {
seg_phashsize_win = 0;
seg_phashsize_wired = 0;
seg_pdisabled = 1;
return;
}
seg_pdisabled = 0;
seg_pkmcache = kmem_cache_create("seg_pcache",
sizeof (struct seg_pcache), 0, NULL, NULL, NULL, NULL, NULL, 0);
if (segpcache_pcp_maxage_ticks <= 0) {
segpcache_pcp_maxage_ticks = segpcache_pcp_maxage_sec * hz;
}
seg_pmax_pcpage = segpcache_pcp_maxage_ticks;
seg_pathr_empty_ahb = 0;
seg_pathr_full_ahb = 0;
seg_pshrink_shift = segpcache_shrink_shift;
seg_pmaxapurge_npages = btop(segpcache_maxapurge_bytes);
mutex_init(&seg_pcache_mtx, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&seg_pmem_mtx, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&seg_pasync_mtx, NULL, MUTEX_DEFAULT, NULL);
cv_init(&seg_pasync_cv, NULL, CV_DEFAULT, NULL);
physmegs = physmem >> (20 - PAGESHIFT);
if (segpcache_hashsize_win == 0 || segpcache_hashsize_win > physmem) {
pgcnt_t pages_per_bucket = MAX(btop(32 * 1024), 8);
segpcache_hashsize_win = MAX(1024, physmem / pages_per_bucket);
}
if (!ISP2(segpcache_hashsize_win)) {
ulong_t rndfac = ~(1UL <<
(highbit(segpcache_hashsize_win) - 1));
rndfac &= segpcache_hashsize_win;
segpcache_hashsize_win += rndfac;
segpcache_hashsize_win = 1 <<
(highbit(segpcache_hashsize_win) - 1);
}
seg_phashsize_win = segpcache_hashsize_win;
seg_phashtab_win = kmem_zalloc(
seg_phashsize_win * sizeof (struct seg_phash),
KM_SLEEP);
for (i = 0; i < seg_phashsize_win; i++) {
hp = &seg_phashtab_win[i];
hp->p_hnext = (struct seg_pcache *)hp;
hp->p_hprev = (struct seg_pcache *)hp;
mutex_init(&hp->p_hmutex, NULL, MUTEX_DEFAULT, NULL);
}
seg_pahcur = 0;
seg_pathr_on = 0;
seg_pahhead[0].p_lnext = &seg_pahhead[0];
seg_pahhead[0].p_lprev = &seg_pahhead[0];
seg_pahhead[1].p_lnext = &seg_pahhead[1];
seg_pahhead[1].p_lprev = &seg_pahhead[1];
if (segpcache_hashsize_wired == 0 ||
segpcache_hashsize_wired > physmem / 4) {
if (physmegs < 20 * 1024) {
segpcache_hashsize_wired = MAX(1024, physmegs << 3);
} else {
segpcache_hashsize_wired = 256 * 1024;
}
}
if (!ISP2(segpcache_hashsize_wired)) {
segpcache_hashsize_wired = 1 <<
highbit(segpcache_hashsize_wired);
}
seg_phashsize_wired = segpcache_hashsize_wired;
seg_phashtab_wired = kmem_zalloc(
seg_phashsize_wired * sizeof (struct seg_phash_wired), KM_SLEEP);
for (i = 0; i < seg_phashsize_wired; i++) {
hp = (struct seg_phash *)&seg_phashtab_wired[i];
hp->p_hnext = (struct seg_pcache *)hp;
hp->p_hprev = (struct seg_pcache *)hp;
mutex_init(&hp->p_hmutex, NULL, MUTEX_DEFAULT, NULL);
}
if (segpcache_maxwindow == 0) {
if (physmegs < 64) {
segpcache_maxwindow = availrmem >> 5;
} else if (physmegs < 512) {
segpcache_maxwindow = availrmem >> 3;
} else if (physmegs < 1024) {
segpcache_maxwindow = availrmem >> 2;
} else if (physmegs < 2048) {
segpcache_maxwindow = availrmem >> 1;
} else {
segpcache_maxwindow = (pgcnt_t)-1;
}
}
seg_pmaxwindow = segpcache_maxwindow;
seg_pinit_mem_config();
}
void
seg_preap(void)
{
if (seg_plocked_window == 0) {
return;
}
ASSERT(seg_phashsize_win != 0);
if (seg_pdisabled) {
return;
}
cv_signal(&seg_pasync_cv);
}
void
seg_pasync_thread(void)
{
callb_cpr_t cpr_info;
if (seg_phashsize_win == 0) {
thread_exit();
}
seg_pasync_thr = curthread;
CALLB_CPR_INIT(&cpr_info, &seg_pasync_mtx,
callb_generic_cpr, "seg_pasync");
if (segpcache_reap_ticks <= 0) {
segpcache_reap_ticks = segpcache_reap_sec * hz;
}
mutex_enter(&seg_pasync_mtx);
for (;;) {
CALLB_CPR_SAFE_BEGIN(&cpr_info);
(void) cv_reltimedwait(&seg_pasync_cv, &seg_pasync_mtx,
segpcache_reap_ticks, TR_CLOCK_TICK);
CALLB_CPR_SAFE_END(&cpr_info, &seg_pasync_mtx);
if (seg_pdisabled == 0) {
seg_ppurge_async(0);
}
}
}
static struct kmem_cache *seg_cache;
void
seg_init(void)
{
kstat_t *ksp;
seg_cache = kmem_cache_create("seg_cache", sizeof (struct seg),
0, NULL, NULL, NULL, NULL, NULL, 0);
ksp = kstat_create("unix", 0, "segadvstat", "vm", KSTAT_TYPE_NAMED,
segadvstat_ndata, KSTAT_FLAG_VIRTUAL);
if (ksp) {
ksp->ks_data = (void *)segadvstat_ptr;
kstat_install(ksp);
}
seg_pinit();
}
struct seg *
seg_alloc(struct as *as, caddr_t base, size_t size)
{
struct seg *new;
caddr_t segbase;
size_t segsize;
segbase = (caddr_t)((uintptr_t)base & (uintptr_t)PAGEMASK);
segsize = (((uintptr_t)(base + size) + PAGEOFFSET) & PAGEMASK) -
(uintptr_t)segbase;
if (!valid_va_range(&segbase, &segsize, segsize, AH_LO))
return ((struct seg *)NULL);
if (as != &kas &&
valid_usr_range(segbase, segsize, 0, as,
as->a_userlimit) != RANGE_OKAY)
return ((struct seg *)NULL);
new = kmem_cache_alloc(seg_cache, KM_SLEEP);
new->s_ops = NULL;
new->s_data = NULL;
new->s_szc = 0;
new->s_flags = 0;
mutex_init(&new->s_pmtx, NULL, MUTEX_DEFAULT, NULL);
new->s_phead.p_lnext = &new->s_phead;
new->s_phead.p_lprev = &new->s_phead;
if (seg_attach(as, segbase, segsize, new) < 0) {
kmem_cache_free(seg_cache, new);
return ((struct seg *)NULL);
}
return (new);
}
int
seg_attach(struct as *as, caddr_t base, size_t size, struct seg *seg)
{
seg->s_as = as;
seg->s_base = base;
seg->s_size = size;
return (as_addseg(as, seg));
}
void
seg_unmap(struct seg *seg)
{
#ifdef DEBUG
int ret;
#endif
ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
ASSERT(seg->s_data != NULL);
#ifdef DEBUG
ret = SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
ASSERT(ret == 0);
#else
SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
#endif
}
void
seg_free(struct seg *seg)
{
register struct as *as = seg->s_as;
struct seg *tseg = as_removeseg(as, seg);
ASSERT(tseg == seg);
if (seg->s_data != NULL)
SEGOP_FREE(seg);
mutex_destroy(&seg->s_pmtx);
ASSERT(seg->s_phead.p_lnext == &seg->s_phead);
ASSERT(seg->s_phead.p_lprev == &seg->s_phead);
kmem_cache_free(seg_cache, seg);
}
static void
seg_p_mem_config_post_add(
void *arg,
pgcnt_t delta_pages)
{
}
void
seg_p_enable(void)
{
mutex_enter(&seg_pcache_mtx);
ASSERT(seg_pdisabled != 0);
seg_pdisabled--;
mutex_exit(&seg_pcache_mtx);
}
int
seg_p_disable(void)
{
pgcnt_t old_plocked;
int stall_count = 0;
mutex_enter(&seg_pcache_mtx);
seg_pdisabled++;
ASSERT(seg_pdisabled != 0);
mutex_exit(&seg_pcache_mtx);
while (seg_plocked != 0) {
ASSERT(seg_phashsize_win != 0);
old_plocked = seg_plocked;
seg_ppurge_async(1);
if (seg_plocked == old_plocked) {
if (stall_count++ > SEGP_STALL_THRESHOLD) {
return (SEGP_FAIL);
}
} else
stall_count = 0;
if (seg_plocked != 0)
delay(hz/SEGP_PREDEL_DELAY_FACTOR);
}
return (SEGP_SUCCESS);
}
static int
seg_p_mem_config_pre_del(
void *arg,
pgcnt_t delta_pages)
{
if (seg_phashsize_win == 0) {
return (0);
}
if (seg_p_disable() != SEGP_SUCCESS)
cmn_err(CE_NOTE,
"!Pre-delete couldn't purge"" pagelock cache - continuing");
return (0);
}
static void
seg_p_mem_config_post_del(
void *arg,
pgcnt_t delta_pages,
int cancelled)
{
if (seg_phashsize_win == 0) {
return;
}
seg_p_enable();
}
static kphysm_setup_vector_t seg_p_mem_config_vec = {
KPHYSM_SETUP_VECTOR_VERSION,
seg_p_mem_config_post_add,
seg_p_mem_config_pre_del,
seg_p_mem_config_post_del,
};
static void
seg_pinit_mem_config(void)
{
int ret;
ret = kphysm_setup_func_register(&seg_p_mem_config_vec, (void *)NULL);
ASSERT(ret == 0);
}
boolean_t
seg_can_change_zones(struct seg *seg)
{
struct segvn_data *svd;
if (seg->s_ops == &segspt_shmops)
return (B_FALSE);
if (seg->s_ops == &segvn_ops) {
svd = (struct segvn_data *)seg->s_data;
if (svd->type == MAP_SHARED &&
svd->amp != NULL &&
svd->amp->swresv > 0)
return (B_FALSE);
}
return (B_TRUE);
}
size_t
seg_swresv(struct seg *seg)
{
struct segvn_data *svd;
size_t swap = 0;
if (seg->s_ops == &segvn_ops) {
svd = (struct segvn_data *)seg->s_data;
if (svd->type == MAP_PRIVATE && svd->swresv > 0)
swap = svd->swresv;
}
return (swap);
}
int
seg_inherit_notsup(struct seg *seg, caddr_t addr, size_t len, uint_t op)
{
return (ENOTSUP);
}