#include <sys/types.h>
#include <sys/param.h>
#include <sys/thread.h>
#include <sys/proc.h>
#include <sys/callb.h>
#include <sys/vnode.h>
#include <sys/debug.h>
#include <sys/systm.h>
#include <sys/memlist.h>
#include <sys/cmn_err.h>
#include <sys/sysmacros.h>
#include <sys/vmsystm.h>
#include <sys/atomic.h>
#include <sys/kmem.h>
#include <sys/errno.h>
#include <sys/mem_cage.h>
#include <vm/seg_kmem.h>
#include <vm/page.h>
#include <vm/hat.h>
#include <vm/vm_dep.h>
#include <sys/mem_config.h>
#include <sys/lgrp.h>
#include <sys/rwlock.h>
#include <sys/cpupart.h>
extern pri_t maxclsyspri;
#ifdef DEBUG
#define KCAGE_STATS
#endif
#ifdef KCAGE_STATS
#define KCAGE_STATS_VERSION 9
#define KCAGE_STATS_NSCANS 256
struct kcage_stats_scan {
clock_t scan_lbolt;
uint_t scan_id;
uint_t kt_passes;
clock_t kt_ticks;
pgcnt_t kt_kcage_freemem_start;
pgcnt_t kt_kcage_freemem_end;
pgcnt_t kt_freemem_start;
pgcnt_t kt_freemem_end;
uint_t kt_examined;
uint_t kt_cantlock;
uint_t kt_gotone;
uint_t kt_gotonefree;
uint_t kt_skipshared;
uint_t kt_skiprefd;
uint_t kt_destroy;
uint_t kip_reloclocked;
uint_t kip_relocmod;
uint_t kip_destroy;
uint_t kip_nomem;
uint_t kip_demotefailed;
uint_t ke_wanted;
uint_t ke_examined;
uint_t ke_lefthole;
uint_t ke_gotone;
uint_t ke_gotonefree;
};
struct kcage_stats {
uint_t version;
uint_t size;
uint_t kt_wakeups;
uint_t kt_scans;
uint_t kt_cageout_break;
uint_t ke_calls;
uint_t ke_nopfn;
uint_t ke_nopaget;
uint_t ke_isnoreloc;
uint_t ke_deleting;
uint_t ke_lowfreemem;
uint_t ke_terminate;
uint_t kfa_trottlewake;
uint_t kfs_cagewake;
uint_t kct_calls;
uint_t kct_cageout;
uint_t kct_critical;
uint_t kct_exempt;
uint_t kct_cagewake;
uint_t kct_wait;
uint_t kct_progress;
uint_t kct_noprogress;
uint_t kct_timeout;
uint_t kcw_expandearly;
uint_t scan_array_size;
uint_t scan_index;
struct kcage_stats_scan scans[KCAGE_STATS_NSCANS];
};
static struct kcage_stats kcage_stats;
static struct kcage_stats_scan kcage_stats_scan_zero;
#define KCAGE_STAT_INCR(m) kcage_stats.m++
#define KCAGE_STAT_NINCR(m, v) kcage_stats.m += (v)
#define KCAGE_STAT_INCR_SCAN(m) \
KCAGE_STAT_INCR(scans[kcage_stats.scan_index].m)
#define KCAGE_STAT_NINCR_SCAN(m, v) \
KCAGE_STAT_NINCR(scans[kcage_stats.scan_index].m, v)
#define KCAGE_STAT_SET(m, v) kcage_stats.m = (v)
#define KCAGE_STAT_SETZ(m, v) \
if (kcage_stats.m == 0) kcage_stats.m = (v)
#define KCAGE_STAT_SET_SCAN(m, v) \
KCAGE_STAT_SET(scans[kcage_stats.scan_index].m, v)
#define KCAGE_STAT_SETZ_SCAN(m, v) \
KCAGE_STAT_SETZ(scans[kcage_stats.scan_index].m, v)
#define KCAGE_STAT_INC_SCAN_INDEX \
KCAGE_STAT_SET_SCAN(scan_lbolt, ddi_get_lbolt()); \
KCAGE_STAT_SET_SCAN(scan_id, kcage_stats.scan_index); \
kcage_stats.scan_index = \
(kcage_stats.scan_index + 1) % KCAGE_STATS_NSCANS; \
kcage_stats.scans[kcage_stats.scan_index] = kcage_stats_scan_zero
#define KCAGE_STAT_INIT_SCAN_INDEX \
kcage_stats.version = KCAGE_STATS_VERSION; \
kcage_stats.size = sizeof (kcage_stats); \
kcage_stats.scan_array_size = KCAGE_STATS_NSCANS; \
kcage_stats.scan_index = 0
#else
#define KCAGE_STAT_INCR(v)
#define KCAGE_STAT_NINCR(m, v)
#define KCAGE_STAT_INCR_SCAN(v)
#define KCAGE_STAT_NINCR_SCAN(m, v)
#define KCAGE_STAT_SET(m, v)
#define KCAGE_STAT_SETZ(m, v)
#define KCAGE_STAT_SET_SCAN(m, v)
#define KCAGE_STAT_SETZ_SCAN(m, v)
#define KCAGE_STAT_INC_SCAN_INDEX
#define KCAGE_STAT_INIT_SCAN_INDEX
#endif
static kmutex_t kcage_throttle_mutex;
static kcondvar_t kcage_throttle_cv;
static kmutex_t kcage_cageout_mutex;
static kcondvar_t kcage_cageout_cv;
static int kcage_cageout_ready;
kthread_id_t kcage_cageout_thread;
static krwlock_t kcage_range_rwlock;
struct kcage_glist {
struct kcage_glist *next;
pfn_t base;
pfn_t lim;
pfn_t curr;
int decr;
};
static struct kcage_glist *kcage_glist;
static struct kcage_glist *kcage_current_glist;
static vmem_t *kcage_arena;
static struct kcage_glist kcage_glist_firstfree;
static struct kcage_glist *kcage_glist_freelist = &kcage_glist_firstfree;
static struct kcage_glist *kcage_glist_alloc(void);
static int kcage_glist_delete(pfn_t, pfn_t, struct kcage_glist **);
static void kcage_cageout(void);
static int kcage_invalidate_page(page_t *, pgcnt_t *);
static int kcage_setnoreloc_pages(page_t *, se_t);
static int kcage_range_add_internal(pfn_t base, pgcnt_t npgs, kcage_dir_t);
static void kcage_init(pgcnt_t preferred_size);
static int kcage_range_delete_internal(pfn_t base, pgcnt_t npgs);
int kcage_on = 0;
pgcnt_t kcage_freemem;
pgcnt_t kcage_needfree;
pgcnt_t kcage_lotsfree;
pgcnt_t kcage_desfree;
pgcnt_t kcage_minfree;
pgcnt_t kcage_throttlefree;
pgcnt_t kcage_reserve;
int kcage_maxwait = 10;
pgcnt_t kcage_kmemlp_mincage;
#ifdef DEBUG
pgcnt_t kcage_pagets;
#define KCAGEPAGETS_INC() kcage_pagets++
#else
#define KCAGEPAGETS_INC()
#endif
kmutex_t kcage_kstat_lock;
static int kcage_kstat_update(kstat_t *ksp, int rw);
static int kcage_kstat_snapshot(kstat_t *ksp, void *buf, int rw);
int
kcage_current_pfn(pfn_t *pfncur)
{
struct kcage_glist *lp = kcage_current_glist;
ASSERT(kcage_on);
ASSERT(lp != NULL);
*pfncur = lp->curr;
return (lp->decr);
}
int
kcage_next_range(int incage, pfn_t lo, pfn_t hi,
pfn_t *nlo, pfn_t *nhi)
{
struct kcage_glist *lp;
pfn_t tlo = hi;
pfn_t thi = hi;
ASSERT(lo <= hi);
rw_enter(&kcage_range_rwlock, RW_READER);
for (lp = incage ? kcage_glist : kcage_current_glist;
lp != NULL; lp = lp->next) {
pfn_t klo, khi;
if ((incage && lp->decr) || (!incage && !lp->decr)) {
klo = lp->curr;
khi = lp->lim;
} else {
klo = lp->base;
khi = lp->curr;
}
if (klo < tlo && klo < khi && lo < khi && klo < hi) {
tlo = MAX(lo, klo);
thi = MIN(hi, khi);
if (tlo == lo)
break;
}
if (incage && lp == kcage_current_glist) {
break;
}
}
rw_exit(&kcage_range_rwlock);
if (tlo == thi)
return (1);
ASSERT(lo <= tlo && tlo < thi && thi <= hi);
*nlo = tlo;
*nhi = thi;
return (0);
}
void
kcage_range_init(struct memlist *ml, kcage_dir_t d, pgcnt_t preferred_size)
{
int ret = 0;
ASSERT(kcage_arena == NULL);
kcage_arena = vmem_create("kcage_arena", NULL, 0, sizeof (uint64_t),
segkmem_alloc, segkmem_free, heap_arena, 0, VM_SLEEP);
ASSERT(kcage_arena != NULL);
if (d == KCAGE_DOWN) {
while (ml->ml_next != NULL)
ml = ml->ml_next;
}
rw_enter(&kcage_range_rwlock, RW_WRITER);
while (ml != NULL) {
ret = kcage_range_add_internal(btop(ml->ml_address),
btop(ml->ml_size), d);
if (ret)
panic("kcage_range_add_internal failed: "
"ml=%p, ret=0x%x\n", (void *)ml, ret);
ml = (d == KCAGE_DOWN ? ml->ml_prev : ml->ml_next);
}
rw_exit(&kcage_range_rwlock);
if (ret == 0)
kcage_init(preferred_size);
}
static int
kcage_range_add_internal(pfn_t base, pgcnt_t npgs, kcage_dir_t d)
{
struct kcage_glist *new, **lpp;
pfn_t lim;
ASSERT(rw_write_held(&kcage_range_rwlock));
ASSERT(npgs != 0);
if (npgs == 0)
return (EINVAL);
lim = base + npgs;
ASSERT(lim > base);
if (lim <= base)
return (EINVAL);
new = kcage_glist_alloc();
if (new == NULL) {
return (ENOMEM);
}
new->base = base;
new->lim = lim;
new->decr = (d == KCAGE_DOWN);
if (new->decr != 0)
new->curr = new->lim;
else
new->curr = new->base;
lpp = &kcage_glist;
while (*lpp != NULL) {
int ret;
ret = kcage_glist_delete((*lpp)->base, (*lpp)->lim, &new);
if (ret != 0)
return (ret);
lpp = &(*lpp)->next;
}
*lpp = new;
if (kcage_current_glist == NULL) {
kcage_current_glist = kcage_glist;
}
return (0);
}
int
kcage_range_add(pfn_t base, pgcnt_t npgs, kcage_dir_t d)
{
int ret;
rw_enter(&kcage_range_rwlock, RW_WRITER);
ret = kcage_range_add_internal(base, npgs, d);
rw_exit(&kcage_range_rwlock);
return (ret);
}
static int
kcage_range_delete_internal(pfn_t base, pgcnt_t npgs)
{
struct kcage_glist *lp;
pfn_t lim;
ASSERT(rw_write_held(&kcage_range_rwlock));
ASSERT(npgs != 0);
if (npgs == 0)
return (EINVAL);
lim = base + npgs;
ASSERT(lim > base);
if (lim <= base)
return (EINVAL);
for (lp = kcage_glist; lp != NULL; lp = lp->next) {
if ((lp->decr == 0 && lp->curr == lp->base) ||
(lp->decr != 0 && lp->curr == lp->lim))
continue;
if (base >= lp->lim || lim <= lp->base)
continue;
if (lp->decr == 0 && base < lp->curr && lim >= lp->base) {
return (EBUSY);
}
if (lp->decr != 0 && base < lp->lim && lim >= lp->curr) {
return (EBUSY);
}
}
return (kcage_glist_delete(base, lim, &kcage_glist));
}
int
kcage_range_delete(pfn_t base, pgcnt_t npgs)
{
int ret;
rw_enter(&kcage_range_rwlock, RW_WRITER);
ret = kcage_range_delete_internal(base, npgs);
rw_exit(&kcage_range_rwlock);
return (ret);
}
static int
kcage_range_delete_post_mem_del_internal(pfn_t base, pgcnt_t npgs)
{
pfn_t lim;
ASSERT(rw_write_held(&kcage_range_rwlock));
ASSERT(npgs != 0);
if (npgs == 0)
return (EINVAL);
lim = base + npgs;
ASSERT(lim > base);
if (lim <= base)
return (EINVAL);
return (kcage_glist_delete(base, lim, &kcage_glist));
}
int
kcage_range_delete_post_mem_del(pfn_t base, pgcnt_t npgs)
{
int ret;
rw_enter(&kcage_range_rwlock, RW_WRITER);
ret = kcage_range_delete_post_mem_del_internal(base, npgs);
rw_exit(&kcage_range_rwlock);
return (ret);
}
static struct kcage_glist *
kcage_glist_alloc(void)
{
struct kcage_glist *new;
if ((new = kcage_glist_freelist) != NULL) {
kcage_glist_freelist = new->next;
} else if (kernel_cage_enable) {
new = vmem_alloc(kcage_arena, sizeof (*new), VM_NOSLEEP);
} else {
new = kmem_zalloc(sizeof (*new), KM_NOSLEEP);
}
if (new != NULL)
bzero(new, sizeof (*new));
return (new);
}
static void
kcage_glist_free(struct kcage_glist *lp)
{
lp->next = kcage_glist_freelist;
kcage_glist_freelist = lp;
}
static int
kcage_glist_delete(pfn_t base, pfn_t lim, struct kcage_glist **lpp)
{
struct kcage_glist *lp, *prev = *lpp;
while ((lp = *lpp) != NULL) {
if (lim > lp->base && base < lp->lim) {
if (base <= lp->base && lim >= lp->lim) {
*lpp = lp->next;
if (lp == kcage_current_glist) {
ASSERT(kcage_current_glist != prev);
kcage_current_glist = prev;
}
kcage_glist_free(lp);
continue;
}
if (base > lp->base && lim < lp->lim) {
struct kcage_glist *new;
new = kcage_glist_alloc();
if (new == NULL) {
return (ENOMEM);
}
new->decr = lp->decr;
if (new->decr != 0) {
new->base = lp->base;
new->lim = base;
new->curr = base;
lp->base = lim;
} else {
new->base = lim;
new->lim = lp->lim;
new->curr = new->base;
lp->lim = base;
}
new->next = lp->next;
lp->next = new;
lpp = &lp->next;
} else {
if (base > lp->base) {
ASSERT(lim >= lp->lim);
ASSERT(base < lp->lim);
if (lp->decr != 0 &&
lp->curr == lp->lim)
lp->curr = base;
lp->lim = base;
} else {
ASSERT(base <= lp->base);
ASSERT(lim > lp->base);
if (lp->decr == 0 &&
lp->curr == lp->base)
lp->curr = lim;
lp->base = lim;
}
}
}
prev = *lpp;
lpp = &(*lpp)->next;
}
return (0);
}
static pfn_t
kcage_get_pfn(int lockit)
{
struct kcage_glist *lp;
pfn_t pfn = PFN_INVALID;
if (lockit && !rw_tryenter(&kcage_range_rwlock, RW_READER))
return (pfn);
lp = kcage_current_glist;
while (lp != NULL) {
if (lp->decr != 0) {
if (lp->curr != lp->base) {
pfn = --lp->curr;
break;
}
} else {
if (lp->curr != lp->lim) {
pfn = lp->curr++;
break;
}
}
lp = lp->next;
if (lp)
kcage_current_glist = lp;
}
if (lockit)
rw_exit(&kcage_range_rwlock);
return (pfn);
}
static pfn_t
kcage_walk_cage(int reset)
{
static struct kcage_glist *lp = NULL;
static pfn_t pfn;
if (reset)
lp = NULL;
if (lp == NULL) {
lp = kcage_glist;
pfn = PFN_INVALID;
}
again:
if (pfn == PFN_INVALID) {
if (lp == NULL)
return (PFN_INVALID);
if (lp->decr != 0) {
pfn = lp->curr;
} else {
pfn = lp->base;
}
}
if (lp->decr != 0) {
if (pfn == lp->lim) {
if (lp == kcage_current_glist)
lp = NULL;
else
lp = lp->next;
pfn = PFN_INVALID;
goto again;
}
ASSERT(pfn >= lp->curr && pfn < lp->lim);
} else {
if (pfn == lp->curr) {
if (lp == kcage_current_glist)
lp = NULL;
else
lp = lp->next;
pfn = PFN_INVALID;
goto again;
}
ASSERT(pfn >= lp->base && pfn < lp->curr);
}
return (pfn++);
}
static void
kcage_kphysm_postadd_cb(void *arg, pgcnt_t delta_pages)
{
kcage_recalc_thresholds();
}
static int
kcage_kphysm_predel_cb(void *arg, pgcnt_t delta_pages)
{
return (0);
}
static void
kcage_kphysm_postdel_cb(void *arg, pgcnt_t delta_pages, int cancelled)
{
kcage_recalc_thresholds();
}
static kphysm_setup_vector_t kcage_kphysm_vectors = {
KPHYSM_SETUP_VECTOR_VERSION,
kcage_kphysm_postadd_cb,
kcage_kphysm_predel_cb,
kcage_kphysm_postdel_cb
};
static boolean_t
kcage_cageout_cpr(void *arg, int code)
{
if (code == CB_CODE_CPR_CHKPT) {
ASSERT(kcage_cageout_ready);
kcage_cageout_ready = 0;
return (B_TRUE);
} else if (code == CB_CODE_CPR_RESUME) {
ASSERT(kcage_cageout_ready == 0);
kcage_cageout_ready = 1;
return (B_TRUE);
}
return (B_FALSE);
}
static pgcnt_t
kcage_recalc_preferred_size(pgcnt_t preferred_size)
{
if (SEGKMEM_USE_LARGEPAGES && segkmem_reloc == 0) {
pgcnt_t lpmincage = kcage_kmemlp_mincage;
if (lpmincage == 0) {
lpmincage = MIN(P2ROUNDUP(((physmem * PAGESIZE) / 8),
segkmem_heaplp_quantum), 0x40000000UL) / PAGESIZE;
}
kcage_kmemlp_mincage = MIN(lpmincage,
(segkmem_kmemlp_max / PAGESIZE));
preferred_size = MAX(kcage_kmemlp_mincage, preferred_size);
}
return (preferred_size);
}
static void
kcage_init(pgcnt_t preferred_size)
{
pgcnt_t wanted;
pfn_t pfn;
page_t *pp;
kstat_t *ksp;
extern void page_list_noreloc_startup(page_t *);
ASSERT(!kcage_on);
preferred_size = kcage_recalc_preferred_size(preferred_size);
KCAGE_STAT_INIT_SCAN_INDEX;
kcage_recalc_thresholds();
if (kphysm_setup_func_register(&kcage_kphysm_vectors, NULL)) {
ASSERT(0);
return;
}
wanted = MIN(MAX(preferred_size, kcage_minfree), availrmem);
kcage_freemem = 0;
pfn = PFN_INVALID;
while (wanted != 0) {
if ((pfn = kcage_get_pfn(0)) == PFN_INVALID)
break;
if ((pp = page_numtopp_nolock(pfn)) != NULL) {
KCAGEPAGETS_INC();
if (PP_ISFREE(pp)) {
if (PP_ISNORELOC(pp) == 0)
page_list_noreloc_startup(pp);
} else {
ASSERT(pp->p_szc == 0);
PP_SETNORELOC(pp);
}
}
PLCNT_XFER_NORELOC(pp);
wanted -= 1;
}
if (kvp.v_pages) {
pp = kvp.v_pages;
do {
ASSERT(!PP_ISFREE(pp));
ASSERT(pp->p_szc == 0);
if (PP_ISNORELOC(pp) == 0) {
PP_SETNORELOC(pp);
PLCNT_XFER_NORELOC(pp);
}
} while ((pp = pp->p_vpnext) != kvp.v_pages);
}
kcage_on = 1;
(void) callb_add(kcage_cageout_cpr, NULL, CB_CL_CPR_POST_KERNEL,
"cageout");
if (SEGKMEM_USE_LARGEPAGES) {
extern void page_freelist_coalesce_all(int mnode);
page_freelist_coalesce_all(-1);
}
ksp = kstat_create("kcage", 0, "kcage_page_list", "misc",
KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE | KSTAT_FLAG_VIRTUAL);
if (ksp != NULL) {
ksp->ks_update = kcage_kstat_update;
ksp->ks_snapshot = kcage_kstat_snapshot;
ksp->ks_lock = &kcage_kstat_lock;
kstat_install(ksp);
}
}
static int
kcage_kstat_update(kstat_t *ksp, int rw)
{
struct kcage_glist *lp;
uint_t count;
if (rw == KSTAT_WRITE)
return (EACCES);
count = 0;
rw_enter(&kcage_range_rwlock, RW_WRITER);
for (lp = kcage_glist; lp != NULL; lp = lp->next) {
if (lp->decr) {
if (lp->curr != lp->lim) {
count++;
}
} else {
if (lp->curr != lp->base) {
count++;
}
}
}
rw_exit(&kcage_range_rwlock);
ksp->ks_ndata = count;
ksp->ks_data_size = count * 2 * sizeof (uint64_t);
return (0);
}
static int
kcage_kstat_snapshot(kstat_t *ksp, void *buf, int rw)
{
struct kcage_glist *lp;
struct memunit {
uint64_t address;
uint64_t size;
} *kspmem;
if (rw == KSTAT_WRITE)
return (EACCES);
ksp->ks_snaptime = gethrtime();
kspmem = (struct memunit *)buf;
rw_enter(&kcage_range_rwlock, RW_WRITER);
for (lp = kcage_glist; lp != NULL; lp = lp->next, kspmem++) {
if ((caddr_t)kspmem >= (caddr_t)buf + ksp->ks_data_size)
break;
if (lp->decr) {
if (lp->curr != lp->lim) {
kspmem->address = ptob(lp->curr);
kspmem->size = ptob(lp->lim - lp->curr);
}
} else {
if (lp->curr != lp->base) {
kspmem->address = ptob(lp->base);
kspmem->size = ptob(lp->curr - lp->base);
}
}
}
rw_exit(&kcage_range_rwlock);
return (0);
}
void
kcage_recalc_thresholds()
{
static int first = 1;
static pgcnt_t init_lotsfree;
static pgcnt_t init_desfree;
static pgcnt_t init_minfree;
static pgcnt_t init_throttlefree;
static pgcnt_t init_reserve;
mutex_enter(&kcage_cageout_mutex);
mutex_enter(&freemem_lock);
if (first) {
first = 0;
init_lotsfree = kcage_lotsfree;
init_desfree = kcage_desfree;
init_minfree = kcage_minfree;
init_throttlefree = kcage_throttlefree;
init_reserve = kcage_reserve;
} else {
kcage_lotsfree = init_lotsfree;
kcage_desfree = init_desfree;
kcage_minfree = init_minfree;
kcage_throttlefree = init_throttlefree;
kcage_reserve = init_reserve;
}
if (kcage_lotsfree == 0)
kcage_lotsfree = MAX(32, total_pages / 256);
if (kcage_minfree == 0)
kcage_minfree = MAX(32, kcage_lotsfree / 2);
if (kcage_desfree == 0)
kcage_desfree = MAX(32, kcage_minfree);
if (kcage_throttlefree == 0)
kcage_throttlefree = MAX(32, kcage_minfree / 2);
if (kcage_reserve == 0)
kcage_reserve = MIN(32, kcage_throttlefree / 2);
mutex_exit(&freemem_lock);
mutex_exit(&kcage_cageout_mutex);
if (kcage_cageout_ready) {
if (kcage_freemem < kcage_desfree)
kcage_cageout_wakeup();
if (kcage_needfree) {
mutex_enter(&kcage_throttle_mutex);
cv_broadcast(&kcage_throttle_cv);
mutex_exit(&kcage_throttle_mutex);
}
}
}
void
kcage_cageout_init()
{
if (kcage_on) {
(void) lwp_kernel_create(proc_pageout, kcage_cageout, NULL,
TS_RUN, maxclsyspri - 1);
}
}
int
kcage_create_throttle(pgcnt_t npages, int flags)
{
KCAGE_STAT_INCR(kct_calls);
if (curthread == kcage_cageout_thread || panicstr) {
KCAGE_STAT_INCR(kct_cageout);
return (KCT_CRIT);
}
if (NOMEMWAIT()) {
if (kcage_freemem > kcage_throttlefree + npages) {
KCAGE_STAT_INCR(kct_exempt);
return (KCT_CRIT);
} else if (freemem < minfree) {
KCAGE_STAT_INCR(kct_critical);
return (KCT_CRIT);
}
}
if (DISP_PRIO(curthread) > maxclsyspri &&
kcage_freemem > kcage_reserve) {
KCAGE_STAT_INCR(kct_exempt);
return (KCT_CRIT);
}
while (kcage_freemem < kcage_throttlefree + npages) {
ASSERT(kcage_on);
if (kcage_cageout_ready) {
mutex_enter(&kcage_throttle_mutex);
kcage_needfree += npages;
KCAGE_STAT_INCR(kct_wait);
kcage_cageout_wakeup();
KCAGE_STAT_INCR(kct_cagewake);
cv_wait(&kcage_throttle_cv, &kcage_throttle_mutex);
kcage_needfree -= npages;
mutex_exit(&kcage_throttle_mutex);
} else {
atomic_add_long(&kcage_needfree, npages);
kcage_cageout_wakeup();
KCAGE_STAT_INCR(kct_cagewake);
atomic_add_long(&kcage_needfree, -npages);
}
if (NOMEMWAIT() && freemem < minfree) {
return (KCT_CRIT);
}
if ((flags & PG_WAIT) == 0) {
pgcnt_t limit = (flags & PG_NORMALPRI) ?
throttlefree : pageout_reserve;
if ((kcage_freemem < kcage_throttlefree + npages) &&
(freemem < limit + npages)) {
return (KCT_FAILURE);
} else {
return (KCT_NONCRIT);
}
}
}
return (KCT_NONCRIT);
}
void
kcage_freemem_add(pgcnt_t npages)
{
extern void wakeup_pcgs(void);
atomic_add_long(&kcage_freemem, npages);
wakeup_pcgs();
if (kcage_needfree != 0 &&
kcage_freemem >= (kcage_throttlefree + kcage_needfree)) {
mutex_enter(&kcage_throttle_mutex);
cv_broadcast(&kcage_throttle_cv);
KCAGE_STAT_INCR(kfa_trottlewake);
mutex_exit(&kcage_throttle_mutex);
}
}
void
kcage_freemem_sub(pgcnt_t npages)
{
atomic_add_long(&kcage_freemem, -npages);
if (kcage_freemem < kcage_desfree) {
kcage_cageout_wakeup();
KCAGE_STAT_INCR(kfs_cagewake);
}
}
static int
kcage_setnoreloc_pages(page_t *rootpp, se_t se)
{
pgcnt_t npgs, i;
page_t *pp;
pfn_t rootpfn = page_pptonum(rootpp);
uint_t szc;
ASSERT(!PP_ISFREE(rootpp));
ASSERT(PAGE_LOCKED_SE(rootpp, se));
if (!group_page_trylock(rootpp, se)) {
return (0);
}
szc = rootpp->p_szc;
if (szc == 0) {
ASSERT(rootpp->p_vnode != NULL &&
!PP_ISKAS(rootpp) &&
!IS_SWAPFSVP(rootpp->p_vnode));
PP_SETNORELOC(rootpp);
return (1);
}
npgs = page_get_pagecnt(szc);
ASSERT(IS_P2ALIGNED(rootpfn, npgs));
pp = rootpp;
for (i = 0; i < npgs; i++, pp++) {
ASSERT(PAGE_LOCKED_SE(pp, se));
ASSERT(!PP_ISFREE(pp));
ASSERT(pp->p_szc == szc);
PP_SETNORELOC(pp);
}
group_page_unlock(rootpp);
return (1);
}
static int
kcage_assimilate_page(page_t *pp, pgcnt_t *nfreedp)
{
if (page_trylock(pp, SE_EXCL)) {
if (PP_ISNORELOC(pp)) {
check_free_and_return:
if (PP_ISFREE(pp)) {
page_unlock(pp);
*nfreedp = 0;
return (0);
} else {
page_unlock(pp);
return (EBUSY);
}
}
} else {
if (page_trylock(pp, SE_SHARED)) {
if (PP_ISNORELOC(pp))
goto check_free_and_return;
} else {
return (EAGAIN);
}
if (!PP_ISFREE(pp)) {
page_unlock(pp);
return (EAGAIN);
}
if (!page_tryupgrade(pp)) {
page_unlock(pp);
return (EAGAIN);
}
}
ASSERT(PAGE_EXCL(pp));
if (PP_ISFREE(pp)) {
int which = PP_ISAGED(pp) ? PG_FREE_LIST : PG_CACHE_LIST;
page_list_sub(pp, which);
ASSERT(pp->p_szc == 0);
PP_SETNORELOC(pp);
PLCNT_XFER_NORELOC(pp);
page_list_add(pp, which | PG_LIST_TAIL);
page_unlock(pp);
*nfreedp = 1;
return (0);
} else {
if (pp->p_szc != 0) {
if (!kcage_setnoreloc_pages(pp, SE_EXCL)) {
page_unlock(pp);
return (EAGAIN);
}
ASSERT(PP_ISNORELOC(pp));
} else {
PP_SETNORELOC(pp);
}
PLCNT_XFER_NORELOC(pp);
return (kcage_invalidate_page(pp, nfreedp));
}
}
static int
kcage_expand()
{
int did_something = 0;
spgcnt_t wanted;
pfn_t pfn;
page_t *pp;
pgcnt_t n;
pgcnt_t nf, nfreed;
wanted = MAX(kcage_lotsfree, kcage_throttlefree + kcage_needfree)
- kcage_freemem;
if (wanted <= 0) {
return (0);
} else if (freemem < pageout_reserve + wanted) {
KCAGE_STAT_INCR(ke_lowfreemem);
return (0);
}
KCAGE_STAT_INCR(ke_calls);
KCAGE_STAT_SET_SCAN(ke_wanted, (uint_t)wanted);
n = 0;
nf = 0;
while (kcage_on && nf < wanted) {
pfn = kcage_get_pfn(1);
if (pfn == PFN_INVALID) {
KCAGE_STAT_INCR(ke_nopfn);
goto terminate;
}
KCAGE_STAT_INCR_SCAN(ke_examined);
if ((pp = page_numtopp_nolock(pfn)) == NULL) {
KCAGE_STAT_INCR(ke_nopaget);
continue;
}
KCAGEPAGETS_INC();
if (pfn_is_being_deleted(pfn)) {
KCAGE_STAT_INCR(ke_deleting);
continue;
}
if (PP_ISNORELOC(pp)) {
KCAGE_STAT_INCR(ke_isnoreloc);
continue;
}
switch (kcage_assimilate_page(pp, &nfreed)) {
case 0:
KCAGE_STAT_NINCR_SCAN(ke_gotonefree, nfreed);
did_something = 1;
nf += nfreed;
n++;
break;
case EBUSY:
case ERANGE:
KCAGE_STAT_INCR_SCAN(ke_gotone);
did_something = 1;
n++;
break;
case ENOMEM:
KCAGE_STAT_INCR(ke_terminate);
did_something = 1;
n++;
goto terminate;
case EAGAIN:
KCAGE_STAT_INCR_SCAN(ke_lefthole);
break;
default:
ASSERT(0);
break;
}
}
terminate:
return (did_something);
}
static int
kcage_relocate_page(page_t *pp, pgcnt_t *nfreedp)
{
page_t *opp = pp;
page_t *rpp = NULL;
spgcnt_t npgs;
int result;
ASSERT(!PP_ISFREE(opp));
ASSERT(PAGE_EXCL(opp));
result = page_relocate(&opp, &rpp, 1, 1, &npgs, NULL);
*nfreedp = npgs;
if (result == 0) {
while (npgs-- > 0) {
page_t *tpp;
ASSERT(rpp != NULL);
tpp = rpp;
page_sub(&rpp, tpp);
page_unlock(tpp);
}
ASSERT(rpp == NULL);
return (0);
}
page_unlock(opp);
return (result);
}
static int
kcage_invalidate_page(page_t *pp, pgcnt_t *nfreedp)
{
int result;
#if defined(__sparc)
ASSERT(pp->p_vnode != &promvp);
#endif
ASSERT(!PP_ISFREE(pp));
ASSERT(PAGE_EXCL(pp));
if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) {
result = kcage_relocate_page(pp, nfreedp);
#ifdef KCAGE_STATS
if (result == 0)
KCAGE_STAT_INCR_SCAN(kip_reloclocked);
else if (result == ENOMEM)
KCAGE_STAT_INCR_SCAN(kip_nomem);
#endif
return (result);
}
ASSERT(pp->p_vnode->v_type != VCHR);
(void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD);
if (hat_ismod(pp)) {
result = kcage_relocate_page(pp, nfreedp);
#ifdef KCAGE_STATS
if (result == 0)
KCAGE_STAT_INCR_SCAN(kip_relocmod);
else if (result == ENOMEM)
KCAGE_STAT_INCR_SCAN(kip_nomem);
#endif
return (result);
}
if (!page_try_demote_pages(pp)) {
KCAGE_STAT_INCR_SCAN(kip_demotefailed);
page_unlock(pp);
return (EAGAIN);
}
VN_DISPOSE(pp, B_INVAL, 0, kcred);
KCAGE_STAT_INCR_SCAN(kip_destroy);
*nfreedp = 1;
return (0);
}
static void
kcage_cageout()
{
pfn_t pfn;
page_t *pp;
callb_cpr_t cprinfo;
int did_something;
pfn_t start_pfn;
ulong_t shared_level = 8;
pgcnt_t nfreed;
#ifdef KCAGE_STATS
clock_t scan_start;
#endif
CALLB_CPR_INIT(&cprinfo, &kcage_cageout_mutex,
callb_generic_cpr, "cageout");
mutex_enter(&kcage_cageout_mutex);
kcage_cageout_thread = curthread;
pfn = PFN_INVALID;
start_pfn = PFN_INVALID;
kcage_cageout_ready = 1;
loop:
CALLB_CPR_SAFE_BEGIN(&cprinfo);
cv_wait(&kcage_cageout_cv, &kcage_cageout_mutex);
CALLB_CPR_SAFE_END(&cprinfo, &kcage_cageout_mutex);
KCAGE_STAT_INCR(kt_wakeups);
KCAGE_STAT_SET_SCAN(kt_freemem_start, freemem);
KCAGE_STAT_SET_SCAN(kt_kcage_freemem_start, kcage_freemem);
#ifdef KCAGE_STATS
scan_start = ddi_get_lbolt();
#endif
if (!kcage_on)
goto loop;
KCAGE_STAT_INCR(kt_scans);
KCAGE_STAT_INCR_SCAN(kt_passes);
did_something = 0;
while (kcage_freemem < kcage_lotsfree + kcage_needfree) {
if ((pfn = kcage_walk_cage(pfn == PFN_INVALID)) ==
PFN_INVALID) {
break;
}
if (start_pfn == PFN_INVALID)
start_pfn = pfn;
else if (start_pfn == pfn) {
if (cp_default.cp_ncpus == 1 && did_something == 0) {
KCAGE_STAT_INCR(kt_cageout_break);
break;
}
}
pp = page_numtopp_nolock(pfn);
if (pp == NULL) {
continue;
}
KCAGE_STAT_INCR_SCAN(kt_examined);
if (!PP_ISNORELOC(pp)) {
switch (kcage_assimilate_page(pp, &nfreed)) {
case 0:
did_something = 1;
KCAGE_STAT_NINCR_SCAN(kt_gotonefree,
nfreed);
break;
case EBUSY:
case ERANGE:
did_something = 1;
KCAGE_STAT_INCR_SCAN(kt_gotone);
break;
case EAGAIN:
case ENOMEM:
break;
default:
ASSERT(0);
break;
}
continue;
} else {
if (PP_ISFREE(pp)) {
continue;
}
if ((PP_ISKAS(pp) && pp->p_lckcnt > 0) ||
!page_trylock(pp, SE_EXCL)) {
KCAGE_STAT_INCR_SCAN(kt_cantlock);
continue;
}
ASSERT(PP_ISNORELOC(pp));
if (PP_ISFREE(pp) || (PP_ISKAS(pp) &&
pp->p_lckcnt > 0)) {
page_unlock(pp);
continue;
}
if (hat_page_checkshare(pp, shared_level)) {
page_unlock(pp);
KCAGE_STAT_INCR_SCAN(kt_skipshared);
continue;
}
if (kcage_invalidate_page(pp, &nfreed) == 0) {
did_something = 1;
KCAGE_STAT_NINCR_SCAN(kt_gotonefree, nfreed);
}
}
}
if (kcage_freemem < kcage_throttlefree + kcage_needfree)
(void) kcage_expand();
if (kcage_on && kcage_cageout_ready)
cv_broadcast(&kcage_throttle_cv);
KCAGE_STAT_SET_SCAN(kt_freemem_end, freemem);
KCAGE_STAT_SET_SCAN(kt_kcage_freemem_end, kcage_freemem);
KCAGE_STAT_SET_SCAN(kt_ticks, ddi_get_lbolt() - scan_start);
KCAGE_STAT_INC_SCAN_INDEX;
goto loop;
}
void
kcage_cageout_wakeup()
{
if (mutex_tryenter(&kcage_cageout_mutex)) {
if (kcage_cageout_ready) {
cv_signal(&kcage_cageout_cv);
} else if (kcage_freemem < kcage_minfree || kcage_needfree) {
KCAGE_STAT_INCR(kcw_expandearly);
(void) kcage_expand();
KCAGE_STAT_INC_SCAN_INDEX;
}
mutex_exit(&kcage_cageout_mutex);
}
}
void
kcage_tick()
{
if (kcage_on && kcage_cageout_ready)
cv_broadcast(&kcage_throttle_cv);
}