#include <sys/cdefs.h>
#include "opt_vm.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/blockcount.h>
#include <sys/eventhandler.h>
#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/kthread.h>
#include <sys/ktr.h>
#include <sys/mount.h>
#include <sys/racct.h>
#include <sys/resourcevar.h>
#include <sys/sched.h>
#include <sys/sdt.h>
#include <sys/signalvar.h>
#include <sys/smp.h>
#include <sys/time.h>
#include <sys/vnode.h>
#include <sys/vmmeter.h>
#include <sys/rwlock.h>
#include <sys/sx.h>
#include <sys/sysctl.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_map.h>
#include <vm/vm_pageout.h>
#include <vm/vm_pager.h>
#include <vm/vm_phys.h>
#include <vm/vm_pagequeue.h>
#include <vm/vm_radix.h>
#include <vm/swap_pager.h>
#include <vm/vm_extern.h>
#include <vm/uma.h>
static void vm_pageout(void);
static void vm_pageout_init(void *);
static int vm_pageout_clean(vm_page_t m, int *numpagedout);
static int vm_pageout_cluster(vm_page_t m);
static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage,
int starting_page_shortage);
SYSINIT(pagedaemon_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, vm_pageout_init,
NULL);
struct proc *pageproc;
static struct kproc_desc page_kp = {
"pagedaemon",
vm_pageout,
&pageproc
};
SYSINIT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_SECOND, kproc_start,
&page_kp);
SDT_PROVIDER_DEFINE(vm);
SDT_PROBE_DEFINE(vm, , , vm__lowmem_scan);
#define VM_LAUNDER_RATE 10
#define VM_INACT_SCAN_RATE 10
static int swapdev_enabled;
int vm_pageout_page_count = 32;
static int vm_panic_on_oom = 0;
SYSCTL_INT(_vm, OID_AUTO, panic_on_oom,
CTLFLAG_RWTUN, &vm_panic_on_oom, 0,
"Panic on the given number of out-of-memory errors instead of "
"killing the largest process");
static int vm_pageout_update_period;
SYSCTL_INT(_vm, OID_AUTO, pageout_update_period,
CTLFLAG_RWTUN, &vm_pageout_update_period, 0,
"Maximum active LRU update period");
static int pageout_cpus_per_thread = 16;
SYSCTL_INT(_vm, OID_AUTO, pageout_cpus_per_thread, CTLFLAG_RDTUN,
&pageout_cpus_per_thread, 0,
"Number of CPUs per pagedaemon worker thread");
static int lowmem_period = 10;
SYSCTL_INT(_vm, OID_AUTO, lowmem_period, CTLFLAG_RWTUN, &lowmem_period, 0,
"Low memory callback period");
static int disable_swap_pageouts;
SYSCTL_INT(_vm, OID_AUTO, disable_swapspace_pageouts,
CTLFLAG_RWTUN, &disable_swap_pageouts, 0,
"Disallow swapout of dirty pages");
static int pageout_lock_miss;
SYSCTL_INT(_vm, OID_AUTO, pageout_lock_miss,
CTLFLAG_RD, &pageout_lock_miss, 0,
"vget() lock misses during pageout");
static int vm_pageout_oom_seq = 12;
SYSCTL_INT(_vm, OID_AUTO, pageout_oom_seq,
CTLFLAG_RWTUN, &vm_pageout_oom_seq, 0,
"back-to-back calls to oom detector to start OOM");
static int
sysctl_laundry_weight(SYSCTL_HANDLER_ARGS)
{
int error, val;
val = *(int *)arg1;
error = sysctl_handle_int(oidp, &val, 0, req);
if (error != 0 || req->newptr == NULL)
return (error);
if (val < arg2 || val > 100)
return (EINVAL);
*(int *)arg1 = val;
return (0);
}
static int act_scan_laundry_weight = 3;
SYSCTL_PROC(_vm, OID_AUTO, act_scan_laundry_weight,
CTLTYPE_INT | CTLFLAG_RWTUN, &act_scan_laundry_weight, 1,
sysctl_laundry_weight, "I",
"weight given to clean vs. dirty pages in active queue scans");
static int inact_scan_laundry_weight = 1;
SYSCTL_PROC(_vm, OID_AUTO, inact_scan_laundry_weight,
CTLTYPE_INT | CTLFLAG_RWTUN, &inact_scan_laundry_weight, 0,
sysctl_laundry_weight, "I",
"weight given to clean vs. dirty pages in inactive queue scans");
static u_int vm_background_launder_rate = 4096;
SYSCTL_UINT(_vm, OID_AUTO, background_launder_rate, CTLFLAG_RWTUN,
&vm_background_launder_rate, 0,
"background laundering rate, in kilobytes per second");
static u_int vm_background_launder_max = 20 * 1024;
SYSCTL_UINT(_vm, OID_AUTO, background_launder_max, CTLFLAG_RWTUN,
&vm_background_launder_max, 0,
"background laundering cap, in kilobytes");
u_long vm_page_max_user_wired;
SYSCTL_ULONG(_vm, OID_AUTO, max_user_wired, CTLFLAG_RW,
&vm_page_max_user_wired, 0,
"system-wide limit to user-wired page count");
static u_int isqrt(u_int num);
static int vm_pageout_launder(struct vm_domain *vmd, int launder,
bool in_shortfall);
static void vm_pageout_laundry_worker(void *arg);
struct scan_state {
struct vm_batchqueue bq;
struct vm_pagequeue *pq;
vm_page_t marker;
int maxscan;
int scanned;
};
static void
vm_pageout_init_scan(struct scan_state *ss, struct vm_pagequeue *pq,
vm_page_t marker, vm_page_t after, int maxscan)
{
vm_pagequeue_assert_locked(pq);
KASSERT((marker->a.flags & PGA_ENQUEUED) == 0,
("marker %p already enqueued", marker));
if (after == NULL)
TAILQ_INSERT_HEAD(&pq->pq_pl, marker, plinks.q);
else
TAILQ_INSERT_AFTER(&pq->pq_pl, after, marker, plinks.q);
vm_page_aflag_set(marker, PGA_ENQUEUED);
vm_batchqueue_init(&ss->bq);
ss->pq = pq;
ss->marker = marker;
ss->maxscan = maxscan;
ss->scanned = 0;
vm_pagequeue_unlock(pq);
}
static void
vm_pageout_end_scan(struct scan_state *ss)
{
struct vm_pagequeue *pq;
pq = ss->pq;
vm_pagequeue_assert_locked(pq);
KASSERT((ss->marker->a.flags & PGA_ENQUEUED) != 0,
("marker %p not enqueued", ss->marker));
TAILQ_REMOVE(&pq->pq_pl, ss->marker, plinks.q);
vm_page_aflag_clear(ss->marker, PGA_ENQUEUED);
pq->pq_pdpages += ss->scanned;
}
static __always_inline void
vm_pageout_collect_batch(struct scan_state *ss, const bool dequeue)
{
struct vm_pagequeue *pq;
vm_page_t m, marker, n;
marker = ss->marker;
pq = ss->pq;
KASSERT((marker->a.flags & PGA_ENQUEUED) != 0,
("marker %p not enqueued", ss->marker));
vm_pagequeue_lock(pq);
for (m = TAILQ_NEXT(marker, plinks.q); m != NULL &&
ss->scanned < ss->maxscan && ss->bq.bq_cnt < VM_BATCHQUEUE_SIZE;
m = n, ss->scanned++) {
n = TAILQ_NEXT(m, plinks.q);
if ((m->flags & PG_MARKER) == 0) {
KASSERT((m->a.flags & PGA_ENQUEUED) != 0,
("page %p not enqueued", m));
KASSERT((m->flags & PG_FICTITIOUS) == 0,
("Fictitious page %p cannot be in page queue", m));
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("Unmanaged page %p cannot be in page queue", m));
} else if (dequeue)
continue;
(void)vm_batchqueue_insert(&ss->bq, m);
if (dequeue) {
TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
vm_page_aflag_clear(m, PGA_ENQUEUED);
}
}
TAILQ_REMOVE(&pq->pq_pl, marker, plinks.q);
if (__predict_true(m != NULL))
TAILQ_INSERT_BEFORE(m, marker, plinks.q);
else
TAILQ_INSERT_TAIL(&pq->pq_pl, marker, plinks.q);
if (dequeue)
vm_pagequeue_cnt_add(pq, -ss->bq.bq_cnt);
vm_pagequeue_unlock(pq);
}
static __always_inline vm_page_t
vm_pageout_next(struct scan_state *ss, const bool dequeue)
{
if (ss->bq.bq_cnt == 0)
vm_pageout_collect_batch(ss, dequeue);
return (vm_batchqueue_pop(&ss->bq));
}
static __always_inline bool
vm_pageout_defer(vm_page_t m, const uint8_t queue, const bool enqueued)
{
vm_page_astate_t as;
as = vm_page_astate_load(m);
if (__predict_false(as.queue != queue ||
((as.flags & PGA_ENQUEUED) != 0) != enqueued))
return (true);
if ((as.flags & PGA_QUEUE_OP_MASK) != 0) {
vm_page_pqbatch_submit(m, queue);
return (true);
}
return (false);
}
static bool
vm_pageout_flushable(vm_page_t m)
{
if (vm_page_tryxbusy(m) == 0)
return (false);
if (!vm_page_wired(m)) {
vm_page_test_dirty(m);
if (m->dirty != 0 && vm_page_in_laundry(m) &&
vm_page_try_remove_write(m))
return (true);
}
vm_page_xunbusy(m);
return (false);
}
static int
vm_pageout_cluster(vm_page_t m)
{
struct pctrie_iter pages;
vm_page_t mc[2 * vm_pageout_page_count - 1];
int alignment, page_base, pageout_count;
VM_OBJECT_ASSERT_WLOCKED(m->object);
vm_page_assert_xbusied(m);
vm_page_iter_init(&pages, m->object);
alignment = m->pindex % vm_pageout_page_count;
page_base = nitems(mc) / 2;
pageout_count = 1;
mc[page_base] = m;
if (alignment > 0) {
pages.index = mc[page_base]->pindex;
do {
m = vm_radix_iter_prev(&pages);
if (m == NULL || !vm_pageout_flushable(m))
break;
mc[--page_base] = m;
} while (pageout_count++ < alignment);
}
if (pageout_count < vm_pageout_page_count) {
pages.index = mc[page_base + pageout_count - 1]->pindex;
do {
m = vm_radix_iter_next(&pages);
if (m == NULL || !vm_pageout_flushable(m))
break;
mc[page_base + pageout_count] = m;
} while (++pageout_count < vm_pageout_page_count);
}
if (pageout_count < vm_pageout_page_count &&
alignment == nitems(mc) / 2 - page_base) {
pages.index = mc[page_base]->pindex;
do {
m = vm_radix_iter_prev(&pages);
if (m == NULL || !vm_pageout_flushable(m))
break;
mc[--page_base] = m;
} while (++pageout_count < vm_pageout_page_count);
}
return (vm_pageout_flush(&mc[page_base], pageout_count,
VM_PAGER_PUT_NOREUSE, NULL));
}
int
vm_pageout_flush(vm_page_t *mc, int count, int flags, bool *eio)
{
vm_object_t object = mc[0]->object;
int pageout_status[count];
int numpagedout = 0;
int i, runlen;
VM_OBJECT_ASSERT_WLOCKED(object);
for (i = 0; i < count; i++) {
KASSERT(vm_page_all_valid(mc[i]),
("vm_pageout_flush: partially invalid page %p index %d/%d",
mc[i], i, count));
KASSERT((mc[i]->a.flags & PGA_WRITEABLE) == 0,
("vm_pageout_flush: writeable page %p", mc[i]));
vm_page_busy_downgrade(mc[i]);
}
vm_object_pip_add(object, count);
vm_pager_put_pages(object, mc, count, flags, pageout_status);
runlen = count;
if (eio != NULL)
*eio = false;
for (i = 0; i < count; i++) {
vm_page_t mt = mc[i];
KASSERT(pageout_status[i] == VM_PAGER_PEND ||
!pmap_page_is_write_mapped(mt),
("vm_pageout_flush: page %p is not write protected", mt));
switch (pageout_status[i]) {
case VM_PAGER_OK:
if (vm_page_in_laundry(mt))
vm_page_deactivate_noreuse(mt);
case VM_PAGER_PEND:
numpagedout++;
break;
case VM_PAGER_BAD:
vm_page_undirty(mt);
if (vm_page_in_laundry(mt))
vm_page_deactivate_noreuse(mt);
break;
case VM_PAGER_ERROR:
case VM_PAGER_FAIL:
if ((object->flags & OBJ_SWAP) != 0 &&
pageout_status[i] == VM_PAGER_FAIL) {
vm_page_unswappable(mt);
numpagedout++;
} else
vm_page_activate(mt);
if (eio != NULL)
*eio = true;
break;
case VM_PAGER_AGAIN:
if (runlen == count)
runlen = i;
break;
}
if (pageout_status[i] != VM_PAGER_PEND) {
vm_object_pip_wakeup(object);
vm_page_sunbusy(mt);
}
}
if (eio != NULL)
return (runlen);
return (numpagedout);
}
static void
vm_pageout_swapon(void *arg __unused, struct swdevt *sp __unused)
{
atomic_store_rel_int(&swapdev_enabled, 1);
}
static void
vm_pageout_swapoff(void *arg __unused, struct swdevt *sp __unused)
{
if (swap_pager_nswapdev() == 1)
atomic_store_rel_int(&swapdev_enabled, 0);
}
static int
vm_pageout_clean(vm_page_t m, int *numpagedout)
{
struct vnode *vp;
struct mount *mp;
vm_object_t object;
vm_pindex_t pindex;
int error;
object = m->object;
VM_OBJECT_ASSERT_WLOCKED(object);
error = 0;
vp = NULL;
mp = NULL;
if (object->type == OBJT_VNODE) {
vm_page_xunbusy(m);
vp = object->handle;
if (vp->v_type == VREG &&
vn_start_write(vp, &mp, V_NOWAIT) != 0) {
mp = NULL;
error = EDEADLK;
goto unlock_all;
}
KASSERT(mp != NULL,
("vp %p with NULL v_mount", vp));
vm_object_reference_locked(object);
pindex = m->pindex;
VM_OBJECT_WUNLOCK(object);
if (vget(vp, vn_lktype_write(NULL, vp) | LK_TIMELOCK) != 0) {
vp = NULL;
error = EDEADLK;
goto unlock_mp;
}
VM_OBJECT_WLOCK(object);
if (vp->v_object != object) {
error = ENOENT;
goto unlock_all;
}
if (!vm_page_in_laundry(m) || m->object != object ||
m->pindex != pindex || m->dirty == 0) {
error = ENXIO;
goto unlock_all;
}
if (vm_page_tryxbusy(m) == 0) {
error = EBUSY;
goto unlock_all;
}
}
if (!vm_page_try_remove_write(m)) {
vm_page_xunbusy(m);
error = EBUSY;
goto unlock_all;
}
if ((*numpagedout = vm_pageout_cluster(m)) == 0)
error = EIO;
unlock_all:
VM_OBJECT_WUNLOCK(object);
unlock_mp:
if (mp != NULL) {
if (vp != NULL)
vput(vp);
vm_object_deallocate(object);
vn_finished_write(mp);
}
return (error);
}
static int
vm_pageout_launder(struct vm_domain *vmd, int launder, bool in_shortfall)
{
struct scan_state ss;
struct vm_pagequeue *pq;
vm_object_t object;
vm_page_t m, marker;
vm_page_astate_t new, old;
int act_delta, error, numpagedout, queue, refs, starting_target;
int vnodes_skipped;
bool pageout_ok;
object = NULL;
starting_target = launder;
vnodes_skipped = 0;
if (atomic_load_acq_int(&swapdev_enabled))
queue = PQ_UNSWAPPABLE;
else
queue = PQ_LAUNDRY;
scan:
marker = &vmd->vmd_markers[queue];
pq = &vmd->vmd_pagequeues[queue];
vm_pagequeue_lock(pq);
vm_pageout_init_scan(&ss, pq, marker, NULL, pq->pq_cnt);
while (launder > 0 && (m = vm_pageout_next(&ss, false)) != NULL) {
if (__predict_false((m->flags & PG_MARKER) != 0))
continue;
if (vm_pageout_defer(m, queue, true))
continue;
if (object == NULL || object != m->object) {
if (object != NULL)
VM_OBJECT_WUNLOCK(object);
object = atomic_load_ptr(&m->object);
if (__predict_false(object == NULL))
continue;
VM_OBJECT_WLOCK(object);
if (__predict_false(m->object != object)) {
VM_OBJECT_WUNLOCK(object);
object = NULL;
continue;
}
}
if (vm_page_tryxbusy(m) == 0)
continue;
if (__predict_false(vm_page_wired(m)))
goto skip_page;
if (vm_page_none_valid(m))
goto free_page;
refs = object->ref_count != 0 ? pmap_ts_referenced(m) : 0;
for (old = vm_page_astate_load(m);;) {
if (__predict_false(_vm_page_queue(old) == PQ_NONE))
goto skip_page;
new = old;
act_delta = refs;
if ((old.flags & PGA_REFERENCED) != 0) {
new.flags &= ~PGA_REFERENCED;
act_delta++;
}
if (act_delta == 0) {
;
} else if (object->ref_count != 0) {
new.act_count += ACT_ADVANCE +
act_delta;
if (new.act_count > ACT_MAX)
new.act_count = ACT_MAX;
new.flags &= ~PGA_QUEUE_OP_MASK;
new.flags |= PGA_REQUEUE;
new.queue = PQ_ACTIVE;
if (!vm_page_pqstate_commit(m, &old, new))
continue;
if (!in_shortfall)
launder--;
VM_CNT_INC(v_reactivated);
goto skip_page;
} else if ((object->flags & OBJ_DEAD) == 0) {
new.flags |= PGA_REQUEUE;
if (!vm_page_pqstate_commit(m, &old, new))
continue;
goto skip_page;
}
break;
}
if (object->ref_count != 0) {
vm_page_test_dirty(m);
if (m->dirty == 0 && !vm_page_try_remove_all(m))
goto skip_page;
}
if (m->dirty == 0) {
free_page:
if (vm_pageout_defer(m, queue, true))
goto skip_page;
vm_page_free(m);
VM_CNT_INC(v_dfree);
} else if ((object->flags & OBJ_DEAD) == 0) {
if ((object->flags & OBJ_SWAP) != 0)
pageout_ok = disable_swap_pageouts == 0;
else
pageout_ok = true;
if (!pageout_ok) {
vm_page_launder(m);
goto skip_page;
}
error = vm_pageout_clean(m, &numpagedout);
if (error == 0) {
launder -= numpagedout;
ss.scanned += numpagedout;
} else if (error == EDEADLK) {
pageout_lock_miss++;
vnodes_skipped++;
}
object = NULL;
} else {
skip_page:
vm_page_xunbusy(m);
}
}
if (object != NULL) {
VM_OBJECT_WUNLOCK(object);
object = NULL;
}
vm_pagequeue_lock(pq);
vm_pageout_end_scan(&ss);
vm_pagequeue_unlock(pq);
if (launder > 0 && queue == PQ_UNSWAPPABLE) {
queue = PQ_LAUNDRY;
goto scan;
}
if (vnodes_skipped > 0 && launder > 0)
(void)speedup_syncer();
return (starting_target - launder);
}
static u_int
isqrt(u_int num)
{
u_int bit, root, tmp;
bit = num != 0 ? (1u << ((fls(num) - 1) & ~1)) : 0;
root = 0;
while (bit != 0) {
tmp = root + bit;
root >>= 1;
if (num >= tmp) {
num -= tmp;
root += bit;
}
bit >>= 2;
}
return (root);
}
static void
vm_pageout_laundry_worker(void *arg)
{
struct vm_domain *vmd;
struct vm_pagequeue *pq;
uint64_t nclean, ndirty, nfreed;
int domain, last_target, launder, shortfall, shortfall_cycle, target;
bool in_shortfall;
domain = (uintptr_t)arg;
vmd = VM_DOMAIN(domain);
pq = &vmd->vmd_pagequeues[PQ_LAUNDRY];
KASSERT(vmd->vmd_segs != 0, ("domain without segments"));
shortfall = 0;
in_shortfall = false;
shortfall_cycle = 0;
last_target = target = 0;
nfreed = 0;
(void)EVENTHANDLER_REGISTER(swapon, vm_pageout_swapon, vmd,
EVENTHANDLER_PRI_ANY);
(void)EVENTHANDLER_REGISTER(swapoff, vm_pageout_swapoff, vmd,
EVENTHANDLER_PRI_ANY);
for (;;) {
KASSERT(target >= 0, ("negative target %d", target));
KASSERT(shortfall_cycle >= 0,
("negative cycle %d", shortfall_cycle));
launder = 0;
if (shortfall > 0) {
in_shortfall = true;
shortfall_cycle = VM_LAUNDER_RATE / VM_INACT_SCAN_RATE;
target = shortfall;
} else if (!in_shortfall)
goto trybackground;
else if (shortfall_cycle == 0 || vm_laundry_target(vmd) <= 0) {
in_shortfall = false;
target = 0;
goto trybackground;
}
launder = target / shortfall_cycle--;
goto dolaundry;
trybackground:
nclean = vmd->vmd_free_count +
vmd->vmd_pagequeues[PQ_INACTIVE].pq_cnt;
ndirty = vmd->vmd_pagequeues[PQ_LAUNDRY].pq_cnt;
if (target == 0 && ndirty * isqrt(howmany(nfreed + 1,
vmd->vmd_free_target - vmd->vmd_free_min)) >= nclean) {
target = vmd->vmd_background_launder_target;
}
if (target > 0) {
if (nfreed > 0) {
nfreed = 0;
last_target = target;
} else if (last_target - target >=
vm_background_launder_max * PAGE_SIZE / 1024) {
target = 0;
}
launder = vm_background_launder_rate * PAGE_SIZE / 1024;
launder /= VM_LAUNDER_RATE;
if (launder > target)
launder = target;
}
dolaundry:
if (launder > 0) {
target -= min(vm_pageout_launder(vmd, launder,
in_shortfall), target);
pause("laundp", hz / VM_LAUNDER_RATE);
}
vm_pagequeue_lock(pq);
if (target == 0 && vmd->vmd_laundry_request == VM_LAUNDRY_IDLE)
(void)mtx_sleep(&vmd->vmd_laundry_request,
vm_pagequeue_lockptr(pq), PVM, "launds", 0);
if (vmd->vmd_laundry_request == VM_LAUNDRY_SHORTFALL &&
(!in_shortfall || shortfall_cycle == 0)) {
shortfall = vm_laundry_target(vmd) +
vmd->vmd_pageout_deficit;
target = 0;
} else
shortfall = 0;
if (target == 0)
vmd->vmd_laundry_request = VM_LAUNDRY_IDLE;
nfreed += vmd->vmd_clean_pages_freed;
vmd->vmd_clean_pages_freed = 0;
vm_pagequeue_unlock(pq);
}
}
static int
vm_pageout_active_target(struct vm_domain *vmd)
{
int shortage;
shortage = vmd->vmd_inactive_target + vm_paging_target(vmd) -
(vmd->vmd_pagequeues[PQ_INACTIVE].pq_cnt +
vmd->vmd_pagequeues[PQ_LAUNDRY].pq_cnt / act_scan_laundry_weight);
shortage *= act_scan_laundry_weight;
return (shortage);
}
static void
vm_pageout_scan_active(struct vm_domain *vmd, int page_shortage)
{
struct scan_state ss;
vm_object_t object;
vm_page_t m, marker;
struct vm_pagequeue *pq;
vm_page_astate_t old, new;
long min_scan;
int act_delta, max_scan, ps_delta, refs, scan_tick;
uint8_t nqueue;
marker = &vmd->vmd_markers[PQ_ACTIVE];
pq = &vmd->vmd_pagequeues[PQ_ACTIVE];
vm_pagequeue_lock(pq);
scan_tick = ticks;
if (vm_pageout_update_period != 0) {
min_scan = pq->pq_cnt;
min_scan *= scan_tick - vmd->vmd_last_active_scan;
min_scan /= hz * vm_pageout_update_period;
} else
min_scan = 0;
if (min_scan > 0 || (page_shortage > 0 && pq->pq_cnt > 0))
vmd->vmd_last_active_scan = scan_tick;
max_scan = page_shortage > 0 ? pq->pq_cnt : min_scan;
act_scan:
vm_pageout_init_scan(&ss, pq, marker, &vmd->vmd_clock[0], max_scan);
while ((m = vm_pageout_next(&ss, false)) != NULL) {
if (__predict_false(m == &vmd->vmd_clock[1])) {
vm_pagequeue_lock(pq);
TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_clock[0], plinks.q);
TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_clock[1], plinks.q);
TAILQ_INSERT_HEAD(&pq->pq_pl, &vmd->vmd_clock[0],
plinks.q);
TAILQ_INSERT_TAIL(&pq->pq_pl, &vmd->vmd_clock[1],
plinks.q);
max_scan -= ss.scanned;
vm_pageout_end_scan(&ss);
goto act_scan;
}
if (__predict_false((m->flags & PG_MARKER) != 0))
continue;
if (vm_pageout_defer(m, PQ_ACTIVE, true))
continue;
object = atomic_load_ptr(&m->object);
if (__predict_false(object == NULL))
continue;
if ((m->a.flags & PGA_SWAP_FREE) != 0 &&
VM_OBJECT_TRYWLOCK(object)) {
if (m->object == object)
vm_pager_page_unswapped(m);
VM_OBJECT_WUNLOCK(object);
}
refs = object->ref_count != 0 ? pmap_ts_referenced(m) : 0;
old = vm_page_astate_load(m);
do {
if (__predict_false(_vm_page_queue(old) == PQ_NONE)) {
ps_delta = 0;
break;
}
new = old;
act_delta = refs;
if ((old.flags & PGA_REFERENCED) != 0) {
new.flags &= ~PGA_REFERENCED;
act_delta++;
}
if (act_delta != 0) {
new.act_count += ACT_ADVANCE + act_delta;
if (new.act_count > ACT_MAX)
new.act_count = ACT_MAX;
} else {
new.act_count -= min(new.act_count,
ACT_DECLINE);
}
if (new.act_count > 0) {
ps_delta = 0;
if (old.queue != PQ_ACTIVE) {
new.flags &= ~PGA_QUEUE_OP_MASK;
new.flags |= PGA_REQUEUE;
new.queue = PQ_ACTIVE;
}
} else {
if (page_shortage <= 0) {
nqueue = PQ_INACTIVE;
ps_delta = 0;
} else if (m->dirty == 0) {
nqueue = PQ_INACTIVE;
ps_delta = act_scan_laundry_weight;
} else {
nqueue = PQ_LAUNDRY;
ps_delta = 1;
}
new.flags &= ~PGA_QUEUE_OP_MASK;
new.flags |= PGA_REQUEUE;
new.queue = nqueue;
}
} while (!vm_page_pqstate_commit(m, &old, new));
page_shortage -= ps_delta;
}
vm_pagequeue_lock(pq);
TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_clock[0], plinks.q);
TAILQ_INSERT_AFTER(&pq->pq_pl, marker, &vmd->vmd_clock[0], plinks.q);
vm_pageout_end_scan(&ss);
vm_pagequeue_unlock(pq);
}
static int
vm_pageout_reinsert_inactive_page(struct vm_pagequeue *pq, vm_page_t marker,
vm_page_t m)
{
vm_page_astate_t as;
vm_pagequeue_assert_locked(pq);
as = vm_page_astate_load(m);
if (as.queue != PQ_INACTIVE || (as.flags & PGA_ENQUEUED) != 0)
return (0);
vm_page_aflag_set(m, PGA_ENQUEUED);
TAILQ_INSERT_BEFORE(marker, m, plinks.q);
return (1);
}
static void
vm_pageout_reinsert_inactive(struct scan_state *ss, struct vm_batchqueue *bq,
vm_page_t m)
{
struct vm_pagequeue *pq;
vm_page_t marker;
int delta;
delta = 0;
marker = ss->marker;
pq = ss->pq;
if (m != NULL) {
if (vm_batchqueue_insert(bq, m) != 0)
return;
vm_pagequeue_lock(pq);
delta += vm_pageout_reinsert_inactive_page(pq, marker, m);
} else
vm_pagequeue_lock(pq);
while ((m = vm_batchqueue_pop(bq)) != NULL)
delta += vm_pageout_reinsert_inactive_page(pq, marker, m);
vm_pagequeue_cnt_add(pq, delta);
vm_pagequeue_unlock(pq);
vm_batchqueue_init(bq);
}
static void
vm_pageout_scan_inactive(struct vm_domain *vmd, int page_shortage)
{
struct timeval start, end;
struct scan_state ss;
struct vm_batchqueue rq;
struct vm_page marker_page;
vm_page_t m, marker;
struct vm_pagequeue *pq;
vm_object_t object;
vm_page_astate_t old, new;
int act_delta, addl_page_shortage, dirty_count, dirty_thresh;
int starting_page_shortage, refs;
object = NULL;
vm_batchqueue_init(&rq);
getmicrouptime(&start);
addl_page_shortage = 0;
dirty_count = 0;
dirty_thresh = inact_scan_laundry_weight * page_shortage;
if (dirty_thresh == 0)
dirty_thresh = INT_MAX;
starting_page_shortage = page_shortage;
marker = &marker_page;
vm_page_init_marker(marker, PQ_INACTIVE, 0);
pq = &vmd->vmd_pagequeues[PQ_INACTIVE];
vm_pagequeue_lock(pq);
vm_pageout_init_scan(&ss, pq, marker, NULL, pq->pq_cnt);
while (page_shortage > 0 && dirty_count < dirty_thresh) {
if (object != NULL && vm_batchqueue_empty(&ss.bq)) {
VM_OBJECT_WUNLOCK(object);
object = NULL;
}
m = vm_pageout_next(&ss, true);
if (m == NULL)
break;
KASSERT((m->flags & PG_MARKER) == 0,
("marker page %p was dequeued", m));
if (vm_pageout_defer(m, PQ_INACTIVE, false))
continue;
if (object == NULL || object != m->object) {
if (object != NULL)
VM_OBJECT_WUNLOCK(object);
object = atomic_load_ptr(&m->object);
if (__predict_false(object == NULL))
continue;
VM_OBJECT_WLOCK(object);
if (__predict_false(m->object != object)) {
VM_OBJECT_WUNLOCK(object);
object = NULL;
goto reinsert;
}
}
if (vm_page_tryxbusy(m) == 0) {
addl_page_shortage++;
goto reinsert;
}
if ((m->a.flags & PGA_SWAP_FREE) != 0)
vm_pager_page_unswapped(m);
if (__predict_false(vm_page_wired(m)))
goto skip_page;
if (vm_page_none_valid(m))
goto free_page;
refs = object->ref_count != 0 ? pmap_ts_referenced(m) : 0;
for (old = vm_page_astate_load(m);;) {
if (__predict_false(_vm_page_queue(old) == PQ_NONE))
goto skip_page;
new = old;
act_delta = refs;
if ((old.flags & PGA_REFERENCED) != 0) {
new.flags &= ~PGA_REFERENCED;
act_delta++;
}
if (act_delta == 0) {
;
} else if (object->ref_count != 0) {
new.act_count += ACT_ADVANCE +
act_delta;
if (new.act_count > ACT_MAX)
new.act_count = ACT_MAX;
new.flags &= ~PGA_QUEUE_OP_MASK;
new.flags |= PGA_REQUEUE;
new.queue = PQ_ACTIVE;
if (!vm_page_pqstate_commit(m, &old, new))
continue;
VM_CNT_INC(v_reactivated);
goto skip_page;
} else if ((object->flags & OBJ_DEAD) == 0) {
new.queue = PQ_INACTIVE;
new.flags |= PGA_REQUEUE;
if (!vm_page_pqstate_commit(m, &old, new))
continue;
goto skip_page;
}
break;
}
if (object->ref_count != 0) {
vm_page_test_dirty(m);
if (m->dirty == 0 && !vm_page_try_remove_all(m))
goto skip_page;
}
if (m->dirty == 0) {
free_page:
if (vm_pageout_defer(m, PQ_INACTIVE, false))
goto skip_page;
m->a.queue = PQ_NONE;
vm_page_free(m);
page_shortage--;
continue;
}
if ((object->flags & OBJ_DEAD) == 0) {
vm_page_launder(m);
if ((object->flags & OBJ_SWAP) == 0 ||
!atomic_load_bool(&swap_pager_almost_full))
dirty_count++;
}
skip_page:
vm_page_xunbusy(m);
continue;
reinsert:
vm_pageout_reinsert_inactive(&ss, &rq, m);
}
if (object != NULL)
VM_OBJECT_WUNLOCK(object);
vm_pageout_reinsert_inactive(&ss, &rq, NULL);
vm_pageout_reinsert_inactive(&ss, &ss.bq, NULL);
vm_pagequeue_lock(pq);
vm_pageout_end_scan(&ss);
vm_pagequeue_unlock(pq);
atomic_add_int(&vmd->vmd_addl_shortage, addl_page_shortage);
getmicrouptime(&end);
timevalsub(&end, &start);
atomic_add_int(&vmd->vmd_inactive_us,
end.tv_sec * 1000000 + end.tv_usec);
atomic_add_int(&vmd->vmd_inactive_freed,
starting_page_shortage - page_shortage);
}
static int
vm_pageout_inactive_dispatch(struct vm_domain *vmd, int shortage)
{
u_int freed, pps, slop, threads, us;
vmd->vmd_inactive_shortage = shortage;
slop = 0;
if ((threads = vmd->vmd_inactive_threads) > 1 &&
vmd->vmd_helper_threads_enabled &&
vmd->vmd_inactive_pps != 0 &&
shortage > vmd->vmd_inactive_pps / VM_INACT_SCAN_RATE / 4) {
vmd->vmd_inactive_shortage /= threads;
slop = shortage % threads;
vm_domain_pageout_lock(vmd);
blockcount_acquire(&vmd->vmd_inactive_starting, threads - 1);
blockcount_acquire(&vmd->vmd_inactive_running, threads - 1);
wakeup(&vmd->vmd_inactive_shortage);
vm_domain_pageout_unlock(vmd);
}
vm_pageout_scan_inactive(vmd, vmd->vmd_inactive_shortage + slop);
blockcount_wait(&vmd->vmd_inactive_running, NULL, "vmpoid", PVM);
freed = atomic_readandclear_int(&vmd->vmd_inactive_freed);
VM_CNT_ADD(v_dfree, freed);
us = max(atomic_readandclear_int(&vmd->vmd_inactive_us), 1);
if (us > 1000000)
pps = (freed * 10) / ((us * 10) / 1000000);
else
pps = (1000000 / us) * freed;
vmd->vmd_inactive_pps = (vmd->vmd_inactive_pps / 2) + (pps / 2);
return (shortage - freed);
}
static int
vm_pageout_inactive(struct vm_domain *vmd, int shortage, int *addl_shortage)
{
struct vm_pagequeue *pq;
u_int addl_page_shortage, deficit, page_shortage;
u_int starting_page_shortage;
deficit = atomic_readandclear_int(&vmd->vmd_pageout_deficit);
starting_page_shortage = shortage + deficit;
page_shortage = vm_pageout_inactive_dispatch(vmd, starting_page_shortage);
addl_page_shortage = atomic_readandclear_int(&vmd->vmd_addl_shortage);
if (starting_page_shortage > 0) {
pq = &vmd->vmd_pagequeues[PQ_LAUNDRY];
vm_pagequeue_lock(pq);
if (vmd->vmd_laundry_request == VM_LAUNDRY_IDLE &&
(pq->pq_cnt > 0 || atomic_load_acq_int(&swapdev_enabled))) {
if (page_shortage > 0) {
vmd->vmd_laundry_request = VM_LAUNDRY_SHORTFALL;
VM_CNT_INC(v_pdshortfalls);
} else if (vmd->vmd_laundry_request !=
VM_LAUNDRY_SHORTFALL)
vmd->vmd_laundry_request =
VM_LAUNDRY_BACKGROUND;
wakeup(&vmd->vmd_laundry_request);
}
vmd->vmd_clean_pages_freed +=
starting_page_shortage - page_shortage;
vm_pagequeue_unlock(pq);
}
vm_pageout_mightbe_oom(vmd, page_shortage, starting_page_shortage);
*addl_shortage = addl_page_shortage + deficit;
return (page_shortage <= 0);
}
static int vm_pageout_oom_vote;
static void
vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage,
int starting_page_shortage)
{
int old_vote;
if (starting_page_shortage <= 0 || starting_page_shortage !=
page_shortage || !vm_paging_needed(vmd, vmd->vmd_free_count))
vmd->vmd_oom_seq = 0;
else
vmd->vmd_oom_seq++;
if (vmd->vmd_oom_seq < vm_pageout_oom_seq) {
if (vmd->vmd_oom) {
vmd->vmd_oom = false;
atomic_subtract_int(&vm_pageout_oom_vote, 1);
}
return;
}
vmd->vmd_oom_seq = 0;
if (vmd->vmd_oom)
return;
vmd->vmd_oom = true;
old_vote = atomic_fetchadd_int(&vm_pageout_oom_vote, 1);
if (old_vote != vm_ndomains - 1)
return;
vm_pageout_oom(VM_OOM_MEM);
vmd->vmd_oom = false;
atomic_subtract_int(&vm_pageout_oom_vote, 1);
}
static long
vm_pageout_oom_pagecount(struct vmspace *vmspace)
{
vm_map_t map;
vm_map_entry_t entry;
vm_object_t obj;
long res;
map = &vmspace->vm_map;
KASSERT(!vm_map_is_system(map), ("system map"));
sx_assert(&map->lock, SA_LOCKED);
res = 0;
VM_MAP_ENTRY_FOREACH(entry, map) {
if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0)
continue;
obj = entry->object.vm_object;
if (obj == NULL)
continue;
if ((entry->eflags & MAP_ENTRY_NEEDS_COPY) != 0 &&
obj->ref_count != 1)
continue;
if (obj->type == OBJT_PHYS || obj->type == OBJT_VNODE ||
(obj->flags & OBJ_SWAP) != 0)
res += obj->resident_page_count;
}
return (res);
}
static int vm_oom_ratelim_last;
static int vm_oom_pf_secs = 10;
SYSCTL_INT(_vm, OID_AUTO, oom_pf_secs, CTLFLAG_RWTUN, &vm_oom_pf_secs, 0,
"");
static struct mtx vm_oom_ratelim_mtx;
void
vm_pageout_oom(int shortage)
{
const char *reason;
struct proc *p, *bigproc;
vm_offset_t size, bigsize;
struct thread *td;
struct vmspace *vm;
int now;
bool breakout;
mtx_lock(&vm_oom_ratelim_mtx);
now = ticks;
if (shortage == VM_OOM_MEM_PF &&
(u_int)(now - vm_oom_ratelim_last) < hz * vm_oom_pf_secs) {
mtx_unlock(&vm_oom_ratelim_mtx);
return;
}
vm_oom_ratelim_last = now;
mtx_unlock(&vm_oom_ratelim_mtx);
bigproc = NULL;
bigsize = 0;
sx_slock(&allproc_lock);
FOREACH_PROC_IN_SYSTEM(p) {
PROC_LOCK(p);
if (p->p_state != PRS_NORMAL || (p->p_flag & (P_INEXEC |
P_PROTECTED | P_SYSTEM | P_WEXIT)) != 0 ||
p->p_pid == 1 || P_KILLED(p) ||
(p->p_pid < 48 && swap_pager_avail != 0)) {
PROC_UNLOCK(p);
continue;
}
breakout = false;
FOREACH_THREAD_IN_PROC(p, td) {
thread_lock(td);
if (!TD_ON_RUNQ(td) &&
!TD_IS_RUNNING(td) &&
!TD_IS_SLEEPING(td) &&
!TD_IS_SUSPENDED(td)) {
thread_unlock(td);
breakout = true;
break;
}
thread_unlock(td);
}
if (breakout) {
PROC_UNLOCK(p);
continue;
}
vm = vmspace_acquire_ref(p);
if (vm == NULL) {
PROC_UNLOCK(p);
continue;
}
_PHOLD(p);
PROC_UNLOCK(p);
sx_sunlock(&allproc_lock);
if (!vm_map_trylock_read(&vm->vm_map)) {
vmspace_free(vm);
sx_slock(&allproc_lock);
PRELE(p);
continue;
}
size = vmspace_swap_count(vm);
if (shortage == VM_OOM_MEM || shortage == VM_OOM_MEM_PF)
size += vm_pageout_oom_pagecount(vm);
vm_map_unlock_read(&vm->vm_map);
vmspace_free(vm);
sx_slock(&allproc_lock);
if (size > bigsize) {
if (bigproc != NULL)
PRELE(bigproc);
bigproc = p;
bigsize = size;
} else {
PRELE(p);
}
}
sx_sunlock(&allproc_lock);
if (bigproc != NULL) {
switch (shortage) {
case VM_OOM_MEM:
reason = "failed to reclaim memory";
break;
case VM_OOM_MEM_PF:
reason = "a thread waited too long to allocate a page";
break;
case VM_OOM_SWAPZ:
reason = "out of swap space";
break;
default:
panic("unknown OOM reason %d", shortage);
}
if (vm_panic_on_oom != 0 && --vm_panic_on_oom == 0)
panic("%s", reason);
PROC_LOCK(bigproc);
killproc(bigproc, reason);
sched_nice(bigproc, PRIO_MIN);
_PRELE(bigproc);
PROC_UNLOCK(bigproc);
}
}
static bool
vm_pageout_lowmem(void)
{
static int lowmem_ticks = 0;
int last;
bool ret;
ret = false;
last = atomic_load_int(&lowmem_ticks);
while ((u_int)(ticks - last) / hz >= lowmem_period) {
if (atomic_fcmpset_int(&lowmem_ticks, &last, ticks) == 0)
continue;
SDT_PROBE0(vm, , , vm__lowmem_scan);
EVENTHANDLER_INVOKE(vm_lowmem, VM_LOW_PAGES);
uma_reclaim(UMA_RECLAIM_TRIM);
ret = true;
break;
}
if (vm_page_count_severe())
uma_reclaim_wakeup();
return (ret);
}
static void
vm_pageout_worker(void *arg)
{
struct vm_domain *vmd;
u_int ofree;
int addl_shortage, domain, shortage;
bool target_met;
domain = (uintptr_t)arg;
vmd = VM_DOMAIN(domain);
shortage = 0;
target_met = true;
KASSERT(vmd->vmd_segs != 0, ("domain without segments"));
vmd->vmd_last_active_scan = ticks;
while (TRUE) {
vm_domain_pageout_lock(vmd);
atomic_store_int(&vmd->vmd_pageout_wanted, 0);
if (vm_paging_needed(vmd, vmd->vmd_free_count)) {
vm_domain_pageout_unlock(vmd);
if (!target_met)
pause("pwait", hz / VM_INACT_SCAN_RATE);
} else {
if (mtx_sleep(&vmd->vmd_pageout_wanted,
vm_domain_pageout_lockptr(vmd), PDROP | PVM,
"psleep", hz / VM_INACT_SCAN_RATE) == 0)
VM_CNT_INC(v_pdwakeups);
}
atomic_store_int(&vmd->vmd_pageout_wanted, 1);
shortage = pidctrl_daemon(&vmd->vmd_pid, vmd->vmd_free_count);
if (shortage > 0) {
ofree = vmd->vmd_free_count;
if (vm_pageout_lowmem() && vmd->vmd_free_count > ofree)
shortage -= min(vmd->vmd_free_count - ofree,
(u_int)shortage);
target_met = vm_pageout_inactive(vmd, shortage,
&addl_shortage);
} else
addl_shortage = 0;
shortage = vm_pageout_active_target(vmd) + addl_shortage;
vm_pageout_scan_active(vmd, shortage);
}
}
static void
vm_pageout_helper(void *arg)
{
struct vm_domain *vmd;
int domain;
domain = (uintptr_t)arg;
vmd = VM_DOMAIN(domain);
vm_domain_pageout_lock(vmd);
for (;;) {
msleep(&vmd->vmd_inactive_shortage,
vm_domain_pageout_lockptr(vmd), PVM, "psleep", 0);
blockcount_release(&vmd->vmd_inactive_starting, 1);
vm_domain_pageout_unlock(vmd);
vm_pageout_scan_inactive(vmd, vmd->vmd_inactive_shortage);
vm_domain_pageout_lock(vmd);
blockcount_release(&vmd->vmd_inactive_running, 1);
}
}
static int
get_pageout_threads_per_domain(const struct vm_domain *vmd)
{
unsigned total_pageout_threads, eligible_cpus, domain_cpus;
if (VM_DOMAIN_EMPTY(vmd->vmd_domain))
return (0);
if (pageout_cpus_per_thread < 2)
pageout_cpus_per_thread = 2;
else if (pageout_cpus_per_thread > mp_ncpus)
pageout_cpus_per_thread = mp_ncpus;
total_pageout_threads = howmany(mp_ncpus, pageout_cpus_per_thread);
domain_cpus = CPU_COUNT(&cpuset_domain[vmd->vmd_domain]);
eligible_cpus = mp_ncpus;
for (unsigned i = 0; i < vm_ndomains; i++)
if (VM_DOMAIN_EMPTY(i))
eligible_cpus -= CPU_COUNT(&cpuset_domain[i]);
return (howmany(total_pageout_threads * domain_cpus, eligible_cpus));
}
static void
vm_pageout_init_domain(int domain)
{
struct vm_domain *vmd;
struct sysctl_oid *oid;
vmd = VM_DOMAIN(domain);
vmd->vmd_interrupt_free_min = 2;
vmd->vmd_pageout_free_min = 2 * MAXBSIZE / PAGE_SIZE +
vmd->vmd_interrupt_free_min;
vmd->vmd_free_reserved = vm_pageout_page_count +
vmd->vmd_pageout_free_min + vmd->vmd_page_count / 768;
vmd->vmd_free_min = vmd->vmd_page_count / 200;
vmd->vmd_free_severe = vmd->vmd_free_min / 2;
vmd->vmd_free_target = 4 * vmd->vmd_free_min + vmd->vmd_free_reserved;
vmd->vmd_free_min += vmd->vmd_free_reserved;
vmd->vmd_free_severe += vmd->vmd_free_reserved;
vmd->vmd_inactive_target = (3 * vmd->vmd_free_target) / 2;
if (vmd->vmd_inactive_target > vmd->vmd_free_count / 3)
vmd->vmd_inactive_target = vmd->vmd_free_count / 3;
vmd->vmd_pageout_wakeup_thresh = (vmd->vmd_free_target / 10) * 9;
vmd->vmd_background_launder_target = (vmd->vmd_free_target -
vmd->vmd_free_min) / 10;
pidctrl_init(&vmd->vmd_pid, hz / VM_INACT_SCAN_RATE,
vmd->vmd_free_target, PIDCTRL_BOUND,
PIDCTRL_KPD, PIDCTRL_KID, PIDCTRL_KDD);
oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(vmd->vmd_oid), OID_AUTO,
"pidctrl", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
pidctrl_init_sysctl(&vmd->vmd_pid, SYSCTL_CHILDREN(oid));
vmd->vmd_inactive_threads = get_pageout_threads_per_domain(vmd);
SYSCTL_ADD_BOOL(NULL, SYSCTL_CHILDREN(vmd->vmd_oid), OID_AUTO,
"pageout_helper_threads_enabled", CTLFLAG_RWTUN,
&vmd->vmd_helper_threads_enabled, 0,
"Enable multi-threaded inactive queue scanning");
}
static void
vm_pageout_init(void *dummy __unused)
{
u_long freecount;
int i;
freecount = 0;
for (i = 0; i < vm_ndomains; i++) {
struct vm_domain *vmd;
vm_pageout_init_domain(i);
vmd = VM_DOMAIN(i);
vm_cnt.v_free_reserved += vmd->vmd_free_reserved;
vm_cnt.v_free_target += vmd->vmd_free_target;
vm_cnt.v_free_min += vmd->vmd_free_min;
vm_cnt.v_inactive_target += vmd->vmd_inactive_target;
vm_cnt.v_pageout_free_min += vmd->vmd_pageout_free_min;
vm_cnt.v_interrupt_free_min += vmd->vmd_interrupt_free_min;
vm_cnt.v_free_severe += vmd->vmd_free_severe;
freecount += vmd->vmd_free_count;
}
if (vm_pageout_update_period == 0)
vm_pageout_update_period = 600;
if (vm_page_max_user_wired == 0)
vm_page_max_user_wired = 4 * freecount / 5;
}
static void
vm_pageout(void)
{
struct proc *p;
struct thread *td;
int error, first, i, j, pageout_threads;
p = curproc;
td = curthread;
mtx_init(&vm_oom_ratelim_mtx, "vmoomr", NULL, MTX_DEF);
swap_pager_swap_init();
for (first = -1, i = 0; i < vm_ndomains; i++) {
if (VM_DOMAIN_EMPTY(i)) {
if (bootverbose)
printf("domain %d empty; skipping pageout\n",
i);
continue;
}
if (first == -1)
first = i;
else {
error = kthread_add(vm_pageout_worker,
(void *)(uintptr_t)i, p, NULL, 0, 0, "dom%d", i);
if (error != 0)
panic("starting pageout for domain %d: %d\n",
i, error);
}
pageout_threads = VM_DOMAIN(i)->vmd_inactive_threads;
for (j = 0; j < pageout_threads - 1; j++) {
error = kthread_add(vm_pageout_helper,
(void *)(uintptr_t)i, p, NULL, 0, 0,
"dom%d helper%d", i, j);
if (error != 0)
panic("starting pageout helper %d for domain "
"%d: %d\n", j, i, error);
}
error = kthread_add(vm_pageout_laundry_worker,
(void *)(uintptr_t)i, p, NULL, 0, 0, "laundry: dom%d", i);
if (error != 0)
panic("starting laundry for domain %d: %d", i, error);
}
error = kthread_add(uma_reclaim_worker, NULL, p, NULL, 0, 0, "uma");
if (error != 0)
panic("starting uma_reclaim helper, error %d\n", error);
snprintf(td->td_name, sizeof(td->td_name), "dom%d", first);
vm_pageout_worker((void *)(uintptr_t)first);
}
void
pagedaemon_wakeup(int domain)
{
struct vm_domain *vmd;
vmd = VM_DOMAIN(domain);
vm_domain_pageout_assert_unlocked(vmd);
if (curproc == pageproc)
return;
if (atomic_fetchadd_int(&vmd->vmd_pageout_wanted, 1) == 0) {
vm_domain_pageout_lock(vmd);
atomic_store_int(&vmd->vmd_pageout_wanted, 1);
wakeup(&vmd->vmd_pageout_wanted);
vm_domain_pageout_unlock(vmd);
}
}