#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/percpu.h>
#include <sys/proc.h>
#include <sys/malloc.h>
#include <sys/mman.h>
#include <sys/tracepoint.h>
#include <uvm/uvm.h>
struct uvm_advice {
int nback;
int nforw;
};
static struct uvm_advice uvmadvice[MADV_MASK + 1];
#define UVM_MAXRANGE 16
static inline void uvmfault_anonflush(struct vm_anon **, int);
void uvmfault_unlockmaps(struct uvm_faultinfo *, boolean_t);
void uvmfault_update_stats(struct uvm_faultinfo *);
static inline void
uvmfault_anonflush(struct vm_anon **anons, int n)
{
int lcv;
struct vm_page *pg;
for (lcv = 0; lcv < n; lcv++) {
if (anons[lcv] == NULL)
continue;
KASSERT(rw_lock_held(anons[lcv]->an_lock));
pg = anons[lcv]->an_page;
if (pg && (pg->pg_flags & PG_BUSY) == 0) {
uvm_pagedeactivate(pg);
}
}
}
void
uvmfault_init(void)
{
int npages;
npages = atop(16384);
if (npages > 0) {
KASSERT(npages <= UVM_MAXRANGE / 2);
uvmadvice[MADV_NORMAL].nforw = npages;
uvmadvice[MADV_NORMAL].nback = npages - 1;
}
npages = atop(32768);
if (npages > 0) {
KASSERT(npages <= UVM_MAXRANGE / 2);
uvmadvice[MADV_SEQUENTIAL].nforw = npages - 1;
uvmadvice[MADV_SEQUENTIAL].nback = npages;
}
}
int
uvmfault_anonget(struct uvm_faultinfo *ufi, struct vm_amap *amap,
struct vm_anon *anon)
{
struct vm_page *pg;
int lock_type;
int error;
KASSERT(rw_lock_held(anon->an_lock));
KASSERT(anon->an_lock == amap->am_lock);
counters_inc(uvmexp_counters, flt_anget);
if (anon->an_page) {
curproc->p_ru.ru_minflt++;
} else {
curproc->p_ru.ru_majflt++;
}
error = 0;
for (;;) {
boolean_t we_own, locked;
we_own = FALSE;
pg = anon->an_page;
lock_type = rw_status(anon->an_lock);
if (pg) {
KASSERT(pg->pg_flags & PQ_ANON);
KASSERT(pg->uanon == anon);
if ((pg->pg_flags & (PG_BUSY|PG_RELEASED)) == 0)
return 0;
counters_inc(uvmexp_counters, flt_pgwait);
KASSERT(pg->uobject == NULL);
uvmfault_unlockall(ufi, NULL, NULL);
uvm_pagewait(pg, anon->an_lock, "anonget");
} else {
if (lock_type == RW_READ) {
return ENOLCK;
}
pg = uvm_pagealloc(NULL, 0, anon, 0);
if (pg == NULL) {
uvmfault_unlockall(ufi, amap, NULL);
counters_inc(uvmexp_counters, flt_noram);
uvm_wait("flt_noram1");
} else {
we_own = TRUE;
uvmfault_unlockall(ufi, amap, NULL);
counters_inc(uvmexp_counters, pageins);
error = uvm_swap_get(pg, anon->an_swslot,
PGO_SYNCIO);
}
}
locked = uvmfault_relock(ufi);
if (locked || we_own) {
rw_enter(anon->an_lock, lock_type);
}
if (we_own) {
if (pg->pg_flags & PG_WANTED) {
wakeup(pg);
}
if (pg->pg_flags & PG_RELEASED) {
KASSERT(anon->an_ref == 0);
if (locked)
uvmfault_unlockall(ufi, NULL, NULL);
uvm_anon_release(anon);
counters_inc(uvmexp_counters, flt_pgrele);
return ERESTART;
}
if (error != VM_PAGER_OK) {
KASSERT(error != VM_PAGER_PEND);
anon->an_page = NULL;
uvm_swap_markbad(anon->an_swslot, 1);
anon->an_swslot = SWSLOT_BAD;
uvm_pagefree(pg);
if (locked) {
uvmfault_unlockall(ufi, NULL, NULL);
}
rw_exit(anon->an_lock);
return EACCES;
}
pmap_clear_modify(pg);
uvm_pageactivate(pg);
atomic_clearbits_int(&pg->pg_flags,
PG_WANTED|PG_BUSY|PG_FAKE);
UVM_PAGE_OWN(pg, NULL);
}
if (!locked) {
if (we_own) {
rw_exit(anon->an_lock);
}
return ERESTART;
}
if (ufi != NULL && amap_lookup(&ufi->entry->aref,
ufi->orig_rvaddr - ufi->entry->start) != anon) {
uvmfault_unlockall(ufi, amap, NULL);
return ERESTART;
}
counters_inc(uvmexp_counters, flt_anretry);
continue;
}
}
int
uvmfault_promote(struct uvm_faultinfo *ufi,
struct vm_page *uobjpage,
struct vm_anon **nanon,
struct vm_page **npg)
{
struct vm_amap *amap = ufi->entry->aref.ar_amap;
struct uvm_object *uobj = NULL;
struct vm_anon *anon;
struct vm_page *pg = NULL;
if (uobjpage != PGO_DONTCARE)
uobj = uobjpage->uobject;
KASSERT(rw_write_held(amap->am_lock));
KASSERT(uobj == NULL || rw_lock_held(uobj->vmobjlock));
anon = uvm_analloc();
if (anon) {
anon->an_lock = amap->am_lock;
pg = uvm_pagealloc(NULL, 0, anon,
(uobjpage == PGO_DONTCARE) ? UVM_PGA_ZERO : 0);
}
if (anon == NULL || pg == NULL) {
uvmfault_unlockall(ufi, amap, uobj);
if (anon == NULL)
counters_inc(uvmexp_counters, flt_noanon);
else {
anon->an_lock = NULL;
anon->an_ref--;
uvm_anfree(anon);
counters_inc(uvmexp_counters, flt_noram);
}
if (uvm_swapisfull())
return ENOMEM;
if (anon == NULL)
uvm_anwait();
else
uvm_wait("flt_noram3");
return ERESTART;
}
if (uobjpage != PGO_DONTCARE)
uvm_pagecopy(uobjpage, pg);
*nanon = anon;
*npg = pg;
return 0;
}
void
uvmfault_update_stats(struct uvm_faultinfo *ufi)
{
struct vm_map *map;
struct proc *p;
vsize_t res;
map = ufi->orig_map;
if (pmap_nested(map->pmap))
return;
if (map->flags & VM_MAP_ISVMSPACE) {
p = curproc;
KASSERT(p != NULL && &p->p_vmspace->vm_map == map);
res = pmap_resident_count(map->pmap);
res <<= (PAGE_SHIFT - 10);
if (p->p_ru.ru_maxrss < res)
p->p_ru.ru_maxrss = res;
}
}
#define MASK(entry) (UVM_ET_ISCOPYONWRITE(entry) ? \
~PROT_WRITE : PROT_MASK)
struct uvm_faultctx {
vm_prot_t enter_prot;
vm_prot_t access_type;
vaddr_t startva;
int npages;
int centeridx;
boolean_t narrow;
boolean_t wired;
paddr_t pa_flags;
boolean_t promote;
int upper_lock_type;
int lower_lock_type;
};
int uvm_fault_check(
struct uvm_faultinfo *, struct uvm_faultctx *,
struct vm_anon ***, vm_fault_t);
int uvm_fault_upper(
struct uvm_faultinfo *, struct uvm_faultctx *,
struct vm_anon **);
boolean_t uvm_fault_upper_lookup(
struct uvm_faultinfo *, const struct uvm_faultctx *,
struct vm_anon **, struct vm_page **);
int uvm_fault_lower(
struct uvm_faultinfo *, struct uvm_faultctx *,
struct vm_page **);
int uvm_fault_lower_io(
struct uvm_faultinfo *, struct uvm_faultctx *,
struct uvm_object **, struct vm_page **);
int
uvm_fault(vm_map_t orig_map, vaddr_t vaddr, vm_fault_t fault_type,
vm_prot_t access_type)
{
struct uvm_faultinfo ufi;
struct uvm_faultctx flt;
boolean_t shadowed;
struct vm_anon *anons_store[UVM_MAXRANGE], **anons;
struct vm_page *pages[UVM_MAXRANGE];
int error;
counters_inc(uvmexp_counters, faults);
TRACEPOINT(uvm, fault, vaddr, fault_type, access_type, NULL);
ufi.orig_map = orig_map;
ufi.orig_rvaddr = trunc_page(vaddr);
ufi.orig_size = PAGE_SIZE;
flt.access_type = access_type;
flt.narrow = FALSE;
flt.wired = FALSE;
#if defined(__amd64__) || defined(__arm64__)
flt.upper_lock_type = RW_READ;
flt.lower_lock_type = RW_READ;
#else
flt.upper_lock_type = RW_WRITE;
flt.lower_lock_type = RW_WRITE;
#endif
error = ERESTART;
while (error == ERESTART) {
anons = anons_store;
error = uvm_fault_check(&ufi, &flt, &anons, fault_type);
if (error != 0)
continue;
shadowed = uvm_fault_upper_lookup(&ufi, &flt, anons, pages);
if (shadowed == TRUE) {
error = uvm_fault_upper(&ufi, &flt, anons);
} else {
struct uvm_object *uobj = ufi.entry->object.uvm_obj;
if (uobj != NULL && uobj->pgops->pgo_fault != NULL) {
rw_enter(uobj->vmobjlock, RW_WRITE);
KERNEL_LOCK();
error = uobj->pgops->pgo_fault(&ufi,
flt.startva, pages, flt.npages,
flt.centeridx, fault_type, flt.access_type,
PGO_LOCKED);
KERNEL_UNLOCK();
} else {
error = uvm_fault_lower(&ufi, &flt, pages);
}
}
}
return error;
}
int
uvm_fault_check(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
struct vm_anon ***ranons, vm_fault_t fault_type)
{
struct vm_amap *amap;
struct uvm_object *uobj;
int nback, nforw;
boolean_t write_locked = FALSE;
lookup:
if (uvmfault_lookup(ufi, write_locked) == FALSE) {
return EFAULT;
}
#ifdef DIAGNOSTIC
if ((ufi->map->flags & VM_MAP_PAGEABLE) == 0)
panic("uvm_fault: fault on non-pageable map (%p, 0x%lx)",
ufi->map, ufi->orig_rvaddr);
#endif
if ((ufi->entry->protection & flt->access_type) != flt->access_type) {
uvmfault_unlockmaps(ufi, write_locked);
return EACCES;
}
flt->enter_prot = ufi->entry->protection;
flt->pa_flags = UVM_ET_ISWC(ufi->entry) ? PMAP_WC : 0;
if (VM_MAPENT_ISWIRED(ufi->entry) || (fault_type == VM_FAULT_WIRE)) {
flt->wired = TRUE;
flt->access_type = flt->enter_prot;
flt->narrow = TRUE;
flt->upper_lock_type = RW_WRITE;
flt->lower_lock_type = RW_WRITE;
}
if (UVM_ET_ISNEEDSCOPY(ufi->entry)) {
if ((flt->access_type & PROT_WRITE) ||
(ufi->entry->object.uvm_obj == NULL)) {
if (!write_locked) {
write_locked = TRUE;
if (!vm_map_upgrade(ufi->map)) {
uvmfault_unlockmaps(ufi, FALSE);
goto lookup;
}
}
amap_copy(ufi->map, ufi->entry, M_NOWAIT,
UVM_ET_ISSTACK(ufi->entry) ? FALSE : TRUE,
ufi->orig_rvaddr, ufi->orig_rvaddr + 1);
if (UVM_ET_ISNEEDSCOPY(ufi->entry)) {
uvmfault_unlockmaps(ufi, write_locked);
uvm_wait("fltamapcopy");
return ERESTART;
}
counters_inc(uvmexp_counters, flt_amcopy);
} else {
flt->enter_prot &= ~PROT_WRITE;
}
}
if (write_locked) {
vm_map_downgrade(ufi->map);
write_locked = FALSE;
}
amap = ufi->entry->aref.ar_amap;
uobj = ufi->entry->object.uvm_obj;
if (amap == NULL && uobj == NULL) {
uvmfault_unlockmaps(ufi, FALSE);
return EFAULT;
}
if (uobj != NULL && amap != NULL &&
(flt->access_type & PROT_WRITE) != 0) {
flt->narrow = TRUE;
}
if (flt->narrow == FALSE) {
nback = min(uvmadvice[ufi->entry->advice].nback,
(ufi->orig_rvaddr - ufi->entry->start) >> PAGE_SHIFT);
flt->startva = ufi->orig_rvaddr - ((vsize_t)nback << PAGE_SHIFT);
nforw = min(uvmadvice[ufi->entry->advice].nforw,
((ufi->entry->end - ufi->orig_rvaddr) >> PAGE_SHIFT) - 1);
flt->npages = nback + nforw + 1;
flt->centeridx = nback;
flt->narrow = TRUE;
} else {
nback = nforw = 0;
flt->startva = ufi->orig_rvaddr;
flt->npages = 1;
flt->centeridx = 0;
}
if (amap) {
if ((flt->access_type & PROT_WRITE) != 0) {
flt->upper_lock_type = RW_WRITE;
}
amap_lock(amap, flt->upper_lock_type);
amap_lookups(&ufi->entry->aref,
flt->startva - ufi->entry->start, *ranons, flt->npages);
} else {
*ranons = NULL;
}
if ((flt->access_type & PROT_WRITE) != 0) {
flt->lower_lock_type = RW_WRITE;
}
if (ufi->entry->advice == MADV_SEQUENTIAL && nback != 0) {
if (amap)
uvmfault_anonflush(*ranons, nback);
if (uobj) {
voff_t uoff;
uoff = (flt->startva - ufi->entry->start) + ufi->entry->offset;
flt->lower_lock_type = RW_WRITE;
rw_enter(uobj->vmobjlock, RW_WRITE);
(void) uobj->pgops->pgo_flush(uobj, uoff, uoff +
((vsize_t)nback << PAGE_SHIFT), PGO_DEACTIVATE);
rw_exit(uobj->vmobjlock);
}
if (amap)
*ranons += nback;
flt->startva += ((vsize_t)nback << PAGE_SHIFT);
flt->npages -= nback;
flt->centeridx = 0;
}
return 0;
}
static inline int
uvm_fault_upper_upgrade(struct uvm_faultctx *flt, struct vm_amap *amap)
{
KASSERT(flt->upper_lock_type == rw_status(amap->am_lock));
if (flt->upper_lock_type == RW_WRITE) {
return 0;
}
flt->upper_lock_type = RW_WRITE;
if (rw_enter(amap->am_lock, RW_UPGRADE|RW_NOSLEEP)) {
counters_inc(uvmexp_counters, flt_noup);
return ERESTART;
}
counters_inc(uvmexp_counters, flt_up);
KASSERT(flt->upper_lock_type == rw_status(amap->am_lock));
return 0;
}
boolean_t
uvm_fault_upper_lookup(struct uvm_faultinfo *ufi,
const struct uvm_faultctx *flt, struct vm_anon **anons,
struct vm_page **pages)
{
struct vm_amap *amap = ufi->entry->aref.ar_amap;
struct vm_anon *anon;
struct vm_page *pg;
boolean_t shadowed;
vaddr_t currva;
paddr_t pa;
int lcv, entered = 0;
KASSERT(amap == NULL ||
rw_status(amap->am_lock) == flt->upper_lock_type);
currva = flt->startva;
shadowed = FALSE;
for (lcv = 0; lcv < flt->npages; lcv++, currva += PAGE_SIZE) {
if (amap == NULL || anons[lcv] == NULL) {
pages[lcv] = NULL;
continue;
}
pages[lcv] = PGO_DONTCARE;
if (lcv == flt->centeridx) {
shadowed = TRUE;
continue;
}
anon = anons[lcv];
pg = anon->an_page;
KASSERT(anon->an_lock == amap->am_lock);
if (pg && (pg->pg_flags & (PG_RELEASED|PG_BUSY)) == 0 &&
!pmap_extract(ufi->orig_map->pmap, currva, &pa)) {
uvm_pageactivate(pg);
counters_inc(uvmexp_counters, flt_namap);
KASSERT(flt->wired == FALSE);
(void) pmap_enter(ufi->orig_map->pmap, currva,
VM_PAGE_TO_PHYS(pg) | flt->pa_flags,
(anon->an_ref > 1) ?
(flt->enter_prot & ~PROT_WRITE) : flt->enter_prot,
PMAP_CANFAIL);
entered++;
}
}
if (entered > 0)
pmap_update(ufi->orig_map->pmap);
return shadowed;
}
int
uvm_fault_upper(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
struct vm_anon **anons)
{
struct vm_amap *amap = ufi->entry->aref.ar_amap;
struct vm_anon *oanon, *anon = anons[flt->centeridx];
struct vm_page *pg = NULL;
int error, ret;
KASSERT(rw_status(amap->am_lock) == flt->upper_lock_type);
KASSERT(anon->an_lock == amap->am_lock);
retry:
error = uvmfault_anonget(ufi, amap, anon);
switch (error) {
case 0:
break;
case ERESTART:
return ERESTART;
case ENOLCK:
error = uvm_fault_upper_upgrade(flt, amap);
if (error != 0) {
uvmfault_unlockall(ufi, amap, NULL);
return error;
}
KASSERT(rw_write_held(amap->am_lock));
goto retry;
default:
return error;
}
KASSERT(rw_status(amap->am_lock) == flt->upper_lock_type);
KASSERT(anon->an_lock == amap->am_lock);
if ((flt->access_type & PROT_WRITE) != 0 && anon->an_ref > 1) {
error = uvm_fault_upper_upgrade(flt, amap);
if (error != 0) {
uvmfault_unlockall(ufi, amap, NULL);
return error;
}
KASSERT(rw_write_held(amap->am_lock));
counters_inc(uvmexp_counters, flt_acow);
oanon = anon;
error = uvmfault_promote(ufi, oanon->an_page, &anon, &pg);
if (error)
return error;
atomic_clearbits_int(&pg->pg_flags, PG_BUSY|PG_FAKE);
UVM_PAGE_OWN(pg, NULL);
ret = amap_add(&ufi->entry->aref,
ufi->orig_rvaddr - ufi->entry->start, anon, 1);
KASSERT(ret == 0);
KASSERT(anon->an_lock == oanon->an_lock);
KASSERT(oanon->an_ref > 1);
oanon->an_ref--;
KASSERT(anon->an_lock == amap->am_lock);
KASSERT(oanon->an_lock == amap->am_lock);
#if defined(MULTIPROCESSOR) && !defined(__HAVE_PMAP_MPSAFE_ENTER_COW)
if (P_HASSIBLING(curproc)) {
flt->enter_prot &= ~PROT_WRITE;
flt->access_type &= ~PROT_WRITE;
}
#endif
} else {
counters_inc(uvmexp_counters, flt_anon);
oanon = anon;
pg = anon->an_page;
if (anon->an_ref > 1)
flt->enter_prot = flt->enter_prot & ~PROT_WRITE;
}
if (pmap_enter(ufi->orig_map->pmap, ufi->orig_rvaddr,
VM_PAGE_TO_PHYS(pg) | flt->pa_flags, flt->enter_prot,
flt->access_type | PMAP_CANFAIL | (flt->wired ? PMAP_WIRED : 0)) != 0) {
uvmfault_unlockall(ufi, amap, NULL);
if (uvm_swapisfull()) {
return ENOMEM;
}
#ifdef __HAVE_PMAP_POPULATE
pmap_populate(ufi->orig_map->pmap, ufi->orig_rvaddr);
#else
uvm_wait("flt_pmfail1");
#endif
return ERESTART;
}
if (flt->wired) {
uvm_pagewire(pg);
} else {
uvm_pageactivate(pg);
}
if (flt->wired) {
atomic_clearbits_int(&pg->pg_flags, PG_CLEAN);
uvm_anon_dropswap(anon);
}
uvmfault_unlockall(ufi, amap, NULL);
pmap_update(ufi->orig_map->pmap);
return 0;
}
struct vm_page *
uvm_fault_lower_lookup(
struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt,
struct vm_page **pages)
{
struct uvm_object *uobj = ufi->entry->object.uvm_obj;
struct vm_page *uobjpage = NULL;
int lcv, gotpages, entered;
vaddr_t currva;
paddr_t pa;
rw_enter(uobj->vmobjlock, flt->lower_lock_type);
counters_inc(uvmexp_counters, flt_lget);
gotpages = flt->npages;
(void) uobj->pgops->pgo_get(uobj,
ufi->entry->offset + (flt->startva - ufi->entry->start),
pages, &gotpages, flt->centeridx,
flt->access_type & MASK(ufi->entry), ufi->entry->advice,
PGO_LOCKED);
if (gotpages == 0) {
return NULL;
}
entered = 0;
currva = flt->startva;
for (lcv = 0; lcv < flt->npages; lcv++, currva += PAGE_SIZE) {
if (pages[lcv] == NULL ||
pages[lcv] == PGO_DONTCARE)
continue;
KASSERT((pages[lcv]->pg_flags & PG_BUSY) == 0);
KASSERT((pages[lcv]->pg_flags & PG_RELEASED) == 0);
if (lcv == flt->centeridx) {
uobjpage = pages[lcv];
continue;
}
if (pmap_extract(ufi->orig_map->pmap, currva, &pa))
continue;
uvm_pageactivate(pages[lcv]);
counters_inc(uvmexp_counters, flt_nomap);
KASSERT(flt->wired == FALSE);
(void) pmap_enter(ufi->orig_map->pmap, currva,
VM_PAGE_TO_PHYS(pages[lcv]) | flt->pa_flags,
flt->enter_prot & MASK(ufi->entry), PMAP_CANFAIL);
entered++;
}
if (entered > 0)
pmap_update(ufi->orig_map->pmap);
return uobjpage;
}
static inline int
uvm_fault_lower_upgrade(struct uvm_faultctx *flt, struct uvm_object *uobj)
{
KASSERT(flt->lower_lock_type == rw_status(uobj->vmobjlock));
if (flt->lower_lock_type == RW_WRITE)
return 0;
flt->lower_lock_type = RW_WRITE;
if (rw_enter(uobj->vmobjlock, RW_UPGRADE|RW_NOSLEEP)) {
counters_inc(uvmexp_counters, flt_noup);
return ERESTART;
}
counters_inc(uvmexp_counters, flt_up);
KASSERT(flt->lower_lock_type == rw_status(uobj->vmobjlock));
return 0;
}
int
uvm_fault_lower(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
struct vm_page **pages)
{
struct vm_amap *amap = ufi->entry->aref.ar_amap;
struct uvm_object *uobj = ufi->entry->object.uvm_obj;
int dropswap = 0;
struct vm_page *uobjpage, *pg = NULL;
struct vm_anon *anon = NULL;
int error;
if (uobj == NULL) {
uobjpage = NULL;
} else {
uobjpage = uvm_fault_lower_lookup(ufi, flt, pages);
}
KASSERT(amap == NULL ||
rw_status(amap->am_lock) == flt->upper_lock_type);
KASSERT(uobj == NULL ||
rw_status(uobj->vmobjlock) == flt->lower_lock_type);
if (uobj == NULL) {
uobjpage = PGO_DONTCARE;
flt->promote = TRUE;
} else {
KASSERT(uobjpage != PGO_DONTCARE);
flt->promote = (flt->access_type & PROT_WRITE) &&
UVM_ET_ISCOPYONWRITE(ufi->entry);
}
if (uobjpage) {
curproc->p_ru.ru_minflt++;
if (uobjpage != PGO_DONTCARE) {
uvm_pageactivate(uobjpage);
}
} else {
error = uvm_fault_lower_io(ufi, flt, &uobj, &uobjpage);
if (error != 0)
return error;
}
if (flt->promote == FALSE) {
counters_inc(uvmexp_counters, flt_obj);
if (UVM_ET_ISCOPYONWRITE(ufi->entry))
flt->enter_prot &= ~PROT_WRITE;
pg = uobjpage;
} else {
KASSERT(amap != NULL);
error = uvm_fault_upper_upgrade(flt, amap);
if (error != 0) {
uvmfault_unlockall(ufi, amap, uobj);
return error;
}
KASSERT(rw_write_held(amap->am_lock));
KASSERT(uobj == NULL ||
rw_status(uobj->vmobjlock) == flt->lower_lock_type);
error = uvmfault_promote(ufi, uobjpage, &anon, &pg);
if (error)
return error;
if (uobjpage != PGO_DONTCARE) {
counters_inc(uvmexp_counters, flt_prcopy);
if ((amap_flags(amap) & AMAP_SHARED) != 0) {
pmap_page_protect(uobjpage, PROT_NONE);
}
#if defined(MULTIPROCESSOR) && !defined(__HAVE_PMAP_MPSAFE_ENTER_COW)
else if (P_HASSIBLING(curproc)) {
pmap_page_protect(uobjpage, PROT_NONE);
}
#endif
rw_exit(uobj->vmobjlock);
uobj = NULL;
} else {
counters_inc(uvmexp_counters, flt_przero);
}
if (amap_add(&ufi->entry->aref,
ufi->orig_rvaddr - ufi->entry->start, anon, 0)) {
if (pg->pg_flags & PG_WANTED)
wakeup(pg);
atomic_clearbits_int(&pg->pg_flags,
PG_BUSY|PG_FAKE|PG_WANTED);
UVM_PAGE_OWN(pg, NULL);
uvmfault_unlockall(ufi, amap, uobj);
uvm_anfree(anon);
counters_inc(uvmexp_counters, flt_noamap);
if (uvm_swapisfull())
return (ENOMEM);
amap_populate(&ufi->entry->aref,
ufi->orig_rvaddr - ufi->entry->start);
return ERESTART;
}
}
KASSERT(amap == NULL ||
rw_status(amap->am_lock) == flt->upper_lock_type);
KASSERT(uobj == NULL ||
rw_status(uobj->vmobjlock) == flt->lower_lock_type);
KASSERT(anon == NULL || anon->an_lock == amap->am_lock);
if (pmap_enter(ufi->orig_map->pmap, ufi->orig_rvaddr,
VM_PAGE_TO_PHYS(pg) | flt->pa_flags, flt->enter_prot,
flt->access_type | PMAP_CANFAIL | (flt->wired ? PMAP_WIRED : 0)) != 0) {
if (pg->pg_flags & PG_WANTED)
wakeup(pg);
atomic_clearbits_int(&pg->pg_flags, PG_BUSY|PG_FAKE|PG_WANTED);
UVM_PAGE_OWN(pg, NULL);
uvmfault_unlockall(ufi, amap, uobj);
if (uvm_swapisfull()) {
return (ENOMEM);
}
#ifdef __HAVE_PMAP_POPULATE
pmap_populate(ufi->orig_map->pmap, ufi->orig_rvaddr);
#else
uvm_wait("flt_pmfail2");
#endif
return ERESTART;
}
if (flt->wired) {
uvm_pagewire(pg);
if (pg->pg_flags & PQ_AOBJ) {
KASSERT(uobj != NULL);
KASSERT(uobj->vmobjlock == pg->uobject->vmobjlock);
atomic_clearbits_int(&pg->pg_flags, PG_CLEAN);
dropswap = 1;
}
} else {
uvm_pageactivate(pg);
}
if (dropswap)
uao_dropswap(uobj, pg->offset >> PAGE_SHIFT);
if (pg->pg_flags & PG_WANTED)
wakeup(pg);
atomic_clearbits_int(&pg->pg_flags, PG_BUSY|PG_FAKE|PG_WANTED);
UVM_PAGE_OWN(pg, NULL);
uvmfault_unlockall(ufi, amap, uobj);
pmap_update(ufi->orig_map->pmap);
return (0);
}
int
uvm_fault_lower_io(
struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
struct uvm_object **ruobj, struct vm_page **ruobjpage)
{
struct vm_amap * const amap = ufi->entry->aref.ar_amap;
struct uvm_object *uobj = *ruobj;
struct vm_page *pg;
boolean_t locked;
int gotpages, advice;
int error, result;
voff_t uoff;
vm_prot_t access_type;
uoff = (ufi->orig_rvaddr - ufi->entry->start) + ufi->entry->offset;
access_type = flt->access_type & MASK(ufi->entry);
advice = ufi->entry->advice;
error = uvm_fault_lower_upgrade(flt, uobj);
if (error != 0) {
uvmfault_unlockall(ufi, amap, uobj);
return error;
}
uvmfault_unlockall(ufi, amap, NULL);
curproc->p_ru.ru_majflt++;
KASSERT(rw_write_held(uobj->vmobjlock));
counters_inc(uvmexp_counters, flt_get);
gotpages = 1;
pg = NULL;
result = uobj->pgops->pgo_get(uobj, uoff, &pg, &gotpages,
0, access_type, advice, PGO_SYNCIO);
if (result != VM_PAGER_OK) {
KASSERT(result != VM_PAGER_PEND);
if (result == VM_PAGER_AGAIN) {
tsleep_nsec(&nowake, PVM, "fltagain2", MSEC_TO_NSEC(5));
return ERESTART;
}
if (!UVM_ET_ISNOFAULT(ufi->entry))
return (EIO);
pg = PGO_DONTCARE;
uobj = NULL;
flt->promote = TRUE;
}
locked = uvmfault_relock(ufi);
if (locked && amap != NULL)
amap_lock(amap, flt->upper_lock_type);
if (pg != PGO_DONTCARE) {
uobj = pg->uobject;
rw_enter(uobj->vmobjlock, flt->lower_lock_type);
KASSERT((pg->pg_flags & PG_BUSY) != 0);
KASSERT(flt->lower_lock_type == RW_WRITE);
}
if (locked && amap && amap_lookup(&ufi->entry->aref,
ufi->orig_rvaddr - ufi->entry->start)) {
if (locked)
uvmfault_unlockall(ufi, amap, NULL);
locked = FALSE;
}
if (pg != PGO_DONTCARE) {
uvm_pageactivate(pg);
if (pg->pg_flags & PG_WANTED)
wakeup(pg);
atomic_clearbits_int(&pg->pg_flags, PG_BUSY|PG_WANTED);
UVM_PAGE_OWN(pg, NULL);
}
if (locked == FALSE) {
if (pg != PGO_DONTCARE)
rw_exit(uobj->vmobjlock);
return ERESTART;
}
*ruobj = uobj;
*ruobjpage = pg;
return 0;
}
int
uvm_fault_wire(vm_map_t map, vaddr_t start, vaddr_t end, vm_prot_t access_type)
{
vaddr_t va;
int rv;
for (va = start ; va < end ; va += PAGE_SIZE) {
rv = uvm_fault(map, va, VM_FAULT_WIRE, access_type);
if (rv) {
if (va != start) {
uvm_fault_unwire(map, start, va);
}
return (rv);
}
}
return (0);
}
void
uvm_fault_unwire(vm_map_t map, vaddr_t start, vaddr_t end)
{
vm_map_lock_read(map);
uvm_fault_unwire_locked(map, start, end);
vm_map_unlock_read(map);
}
void
uvm_fault_unwire_locked(vm_map_t map, vaddr_t start, vaddr_t end)
{
vm_map_entry_t entry, oentry = NULL, next;
pmap_t pmap = vm_map_pmap(map);
vaddr_t va;
paddr_t pa;
struct vm_page *pg;
KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
vm_map_assert_anylock(map);
KASSERT(start >= vm_map_min(map) && end <= vm_map_max(map));
if (uvm_map_lookup_entry(map, start, &entry) == FALSE)
panic("uvm_fault_unwire_locked: address not in map");
for (va = start; va < end ; va += PAGE_SIZE) {
KASSERT(va >= entry->start);
while (va >= entry->end) {
next = RBT_NEXT(uvm_map_addr, entry);
KASSERT(next != NULL && next->start <= entry->end);
entry = next;
}
if (entry != oentry) {
if (oentry != NULL) {
uvm_map_unlock_entry(oentry);
}
uvm_map_lock_entry(entry);
oentry = entry;
}
if (!pmap_extract(pmap, va, &pa))
continue;
if (VM_MAPENT_ISWIRED(entry) == 0)
pmap_unwire(pmap, va);
pg = PHYS_TO_VM_PAGE(pa);
if (pg) {
uvm_pageunwire(pg);
}
}
if (oentry != NULL) {
uvm_map_unlock_entry(entry);
}
}
void
uvmfault_unlockmaps(struct uvm_faultinfo *ufi, boolean_t write_locked)
{
if (ufi == NULL) {
return;
}
uvmfault_update_stats(ufi);
if (write_locked) {
vm_map_unlock(ufi->map);
} else {
vm_map_unlock_read(ufi->map);
}
}
void
uvmfault_unlockall(struct uvm_faultinfo *ufi, struct vm_amap *amap,
struct uvm_object *uobj)
{
if (uobj)
rw_exit(uobj->vmobjlock);
if (amap != NULL)
amap_unlock(amap);
uvmfault_unlockmaps(ufi, FALSE);
}
boolean_t
uvmfault_lookup(struct uvm_faultinfo *ufi, boolean_t write_lock)
{
vm_map_t tmpmap;
ufi->map = ufi->orig_map;
ufi->size = ufi->orig_size;
while (1) {
if (ufi->orig_rvaddr < ufi->map->min_offset ||
ufi->orig_rvaddr >= ufi->map->max_offset)
return FALSE;
if (write_lock) {
vm_map_lock(ufi->map);
} else {
vm_map_lock_read(ufi->map);
}
if (!uvm_map_lookup_entry(ufi->map, ufi->orig_rvaddr,
&ufi->entry)) {
uvmfault_unlockmaps(ufi, write_lock);
return FALSE;
}
if (ufi->entry->end - ufi->orig_rvaddr < ufi->size)
ufi->size = ufi->entry->end - ufi->orig_rvaddr;
if (UVM_ET_ISSUBMAP(ufi->entry)) {
tmpmap = ufi->entry->object.sub_map;
uvmfault_unlockmaps(ufi, write_lock);
ufi->map = tmpmap;
continue;
}
ufi->mapv = ufi->map->timestamp;
return TRUE;
}
}
boolean_t
uvmfault_relock(struct uvm_faultinfo *ufi)
{
if (ufi == NULL) {
return TRUE;
}
vm_map_lock_read(ufi->map);
if (ufi->mapv != ufi->map->timestamp) {
vm_map_unlock_read(ufi->map);
counters_inc(uvmexp_counters, flt_norelck);
return FALSE;
}
counters_inc(uvmexp_counters, flt_relck);
return TRUE;
}