usr/src/uts/common/vm/seg_map.c

root/usr/src/uts/common/vm/seg_map.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

/*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T     */
/*        All Rights Reserved   */

/*
 * Portions of this source code were derived from Berkeley 4.3 BSD
 * under license from the Regents of the University of California.
 */

/*
 * VM - generic vnode mapping segment.
 *
 * The segmap driver is used only by the kernel to get faster (than seg_vn)
 * mappings [lower routine overhead; more persistent cache] to random
 * vnode/offsets.  Note than the kernel may (and does) use seg_vn as well.
 */

#include <sys/types.h>
#include <sys/t_lock.h>
#include <sys/param.h>
#include <sys/sysmacros.h>
#include <sys/buf.h>
#include <sys/systm.h>
#include <sys/vnode.h>
#include <sys/mman.h>
#include <sys/errno.h>
#include <sys/cred.h>
#include <sys/kmem.h>
#include <sys/vtrace.h>
#include <sys/cmn_err.h>
#include <sys/debug.h>
#include <sys/thread.h>
#include <sys/dumphdr.h>
#include <sys/bitmap.h>
#include <sys/lgrp.h>

#include <vm/seg_kmem.h>
#include <vm/hat.h>
#include <vm/as.h>
#include <vm/seg.h>
#include <vm/seg_kpm.h>
#include <vm/seg_map.h>
#include <vm/page.h>
#include <vm/pvn.h>
#include <vm/rm.h>

/*
 * Private seg op routines.
 */
static void     segmap_free(struct seg *seg);
faultcode_t segmap_fault(struct hat *hat, struct seg *seg, caddr_t addr,
                        size_t len, enum fault_type type, enum seg_rw rw);
static faultcode_t segmap_faulta(struct seg *seg, caddr_t addr);
static int      segmap_checkprot(struct seg *seg, caddr_t addr, size_t len,
                        uint_t prot);
static int      segmap_kluster(struct seg *seg, caddr_t addr, ssize_t);
static int      segmap_getprot(struct seg *seg, caddr_t addr, size_t len,
                        uint_t *protv);
static u_offset_t       segmap_getoffset(struct seg *seg, caddr_t addr);
static int      segmap_gettype(struct seg *seg, caddr_t addr);
static int      segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp);
static void     segmap_dump(struct seg *seg);
static int      segmap_pagelock(struct seg *seg, caddr_t addr, size_t len,
                        struct page ***ppp, enum lock_type type,
                        enum seg_rw rw);
static void     segmap_badop(void);
static int      segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp);
static lgrp_mem_policy_info_t   *segmap_getpolicy(struct seg *seg,
    caddr_t addr);
static int      segmap_capable(struct seg *seg, segcapability_t capability);

/* segkpm support */
static caddr_t  segmap_pagecreate_kpm(struct seg *, vnode_t *, u_offset_t,
                        struct smap *, enum seg_rw);
struct smap     *get_smap_kpm(caddr_t, page_t **);

#define SEGMAP_BADOP(t) (t(*)())segmap_badop

static struct seg_ops segmap_ops = {
        SEGMAP_BADOP(int),      /* dup */
        SEGMAP_BADOP(int),      /* unmap */
        segmap_free,
        segmap_fault,
        segmap_faulta,
        SEGMAP_BADOP(int),      /* setprot */
        segmap_checkprot,
        segmap_kluster,
        SEGMAP_BADOP(size_t),   /* swapout */
        SEGMAP_BADOP(int),      /* sync */
        SEGMAP_BADOP(size_t),   /* incore */
        SEGMAP_BADOP(int),      /* lockop */
        segmap_getprot,
        segmap_getoffset,
        segmap_gettype,
        segmap_getvp,
        SEGMAP_BADOP(int),      /* advise */
        segmap_dump,
        segmap_pagelock,        /* pagelock */
        SEGMAP_BADOP(int),      /* setpgsz */
        segmap_getmemid,        /* getmemid */
        segmap_getpolicy,       /* getpolicy */
        segmap_capable,         /* capable */
        seg_inherit_notsup      /* inherit */
};

/*
 * Private segmap routines.
 */
static void     segmap_unlock(struct hat *hat, struct seg *seg, caddr_t addr,
                        size_t len, enum seg_rw rw, struct smap *smp);
static void     segmap_smapadd(struct smap *smp);
static struct smap *segmap_hashin(struct smap *smp, struct vnode *vp,
                        u_offset_t off, int hashid);
static void     segmap_hashout(struct smap *smp);


/*
 * Statistics for segmap operations.
 *
 * No explicit locking to protect these stats.
 */
struct segmapcnt segmapcnt = {
        { "fault",              KSTAT_DATA_ULONG },
        { "faulta",             KSTAT_DATA_ULONG },
        { "getmap",             KSTAT_DATA_ULONG },
        { "get_use",            KSTAT_DATA_ULONG },
        { "get_reclaim",        KSTAT_DATA_ULONG },
        { "get_reuse",          KSTAT_DATA_ULONG },
        { "get_unused",         KSTAT_DATA_ULONG },
        { "get_nofree",         KSTAT_DATA_ULONG },
        { "rel_async",          KSTAT_DATA_ULONG },
        { "rel_write",          KSTAT_DATA_ULONG },
        { "rel_free",           KSTAT_DATA_ULONG },
        { "rel_abort",          KSTAT_DATA_ULONG },
        { "rel_dontneed",       KSTAT_DATA_ULONG },
        { "release",            KSTAT_DATA_ULONG },
        { "pagecreate",         KSTAT_DATA_ULONG },
        { "free_notfree",       KSTAT_DATA_ULONG },
        { "free_dirty",         KSTAT_DATA_ULONG },
        { "free",               KSTAT_DATA_ULONG },
        { "stolen",             KSTAT_DATA_ULONG },
        { "get_nomtx",          KSTAT_DATA_ULONG }
};

kstat_named_t *segmapcnt_ptr = (kstat_named_t *)&segmapcnt;
uint_t segmapcnt_ndata = sizeof (segmapcnt) / sizeof (kstat_named_t);

/*
 * Return number of map pages in segment.
 */
#define MAP_PAGES(seg)          ((seg)->s_size >> MAXBSHIFT)

/*
 * Translate addr into smap number within segment.
 */
#define MAP_PAGE(seg, addr)  (((addr) - (seg)->s_base) >> MAXBSHIFT)

/*
 * Translate addr in seg into struct smap pointer.
 */
#define GET_SMAP(seg, addr)     \
        &(((struct segmap_data *)((seg)->s_data))->smd_sm[MAP_PAGE(seg, addr)])

/*
 * Bit in map (16 bit bitmap).
 */
#define SMAP_BIT_MASK(bitindex) (1 << ((bitindex) & 0xf))

static int smd_colormsk = 0;
static int smd_ncolor = 0;
static int smd_nfree = 0;
static int smd_freemsk = 0;
#ifdef DEBUG
static int *colors_used;
#endif
static struct smap *smd_smap;
static struct smaphash *smd_hash;
#ifdef SEGMAP_HASHSTATS
static unsigned int *smd_hash_len;
#endif
static struct smfree *smd_free;
static ulong_t smd_hashmsk = 0;

#define SEGMAP_MAXCOLOR         2
#define SEGMAP_CACHE_PAD        64

union segmap_cpu {
        struct {
                uint32_t        scpu_free_ndx[SEGMAP_MAXCOLOR];
                struct smap     *scpu_last_smap;
                ulong_t         scpu_getmap;
                ulong_t         scpu_release;
                ulong_t         scpu_get_reclaim;
                ulong_t         scpu_fault;
                ulong_t         scpu_pagecreate;
                ulong_t         scpu_get_reuse;
        } scpu;
        char    scpu_pad[SEGMAP_CACHE_PAD];
};
static union segmap_cpu *smd_cpu;

/*
 * There are three locks in seg_map:
 *      - per freelist mutexes
 *      - per hashchain mutexes
 *      - per smap mutexes
 *
 * The lock ordering is to get the smap mutex to lock down the slot
 * first then the hash lock (for hash in/out (vp, off) list) or the
 * freelist lock to put the slot back on the free list.
 *
 * The hash search is done by only holding the hashchain lock, when a wanted
 * slot is found, we drop the hashchain lock then lock the slot so there
 * is no overlapping of hashchain and smap locks. After the slot is
 * locked, we verify again if the slot is still what we are looking
 * for.
 *
 * Allocation of a free slot is done by holding the freelist lock,
 * then locking the smap slot at the head of the freelist. This is
 * in reversed lock order so mutex_tryenter() is used.
 *
 * The smap lock protects all fields in smap structure except for
 * the link fields for hash/free lists which are protected by
 * hashchain and freelist locks.
 */

#define SHASHMTX(hashid)        (&smd_hash[hashid].sh_mtx)

#define SMP2SMF(smp)            (&smd_free[(smp - smd_smap) & smd_freemsk])
#define SMP2SMF_NDX(smp)        (ushort_t)((smp - smd_smap) & smd_freemsk)

#define SMAPMTX(smp) (&smp->sm_mtx)

#define SMAP_HASHFUNC(vp, off, hashid) \
        { \
        hashid = ((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \
                ((off) >> MAXBSHIFT)) & smd_hashmsk); \
        }

/*
 * The most frequently updated kstat counters are kept in the
 * per cpu array to avoid hot cache blocks. The update function
 * sums the cpu local counters to update the global counters.
 */

/* ARGSUSED */
int
segmap_kstat_update(kstat_t *ksp, int rw)
{
        int i;
        ulong_t getmap, release, get_reclaim;
        ulong_t fault, pagecreate, get_reuse;

        if (rw == KSTAT_WRITE)
                return (EACCES);
        getmap = release = get_reclaim = (ulong_t)0;
        fault = pagecreate = get_reuse = (ulong_t)0;
        for (i = 0; i < max_ncpus; i++) {
                getmap += smd_cpu[i].scpu.scpu_getmap;
                release  += smd_cpu[i].scpu.scpu_release;
                get_reclaim += smd_cpu[i].scpu.scpu_get_reclaim;
                fault  += smd_cpu[i].scpu.scpu_fault;
                pagecreate  += smd_cpu[i].scpu.scpu_pagecreate;
                get_reuse += smd_cpu[i].scpu.scpu_get_reuse;
        }
        segmapcnt.smp_getmap.value.ul = getmap;
        segmapcnt.smp_release.value.ul = release;
        segmapcnt.smp_get_reclaim.value.ul = get_reclaim;
        segmapcnt.smp_fault.value.ul = fault;
        segmapcnt.smp_pagecreate.value.ul = pagecreate;
        segmapcnt.smp_get_reuse.value.ul = get_reuse;
        return (0);
}

int
segmap_create(struct seg *seg, void *argsp)
{
        struct segmap_data *smd;
        struct smap *smp;
        struct smfree *sm;
        struct segmap_crargs *a = (struct segmap_crargs *)argsp;
        struct smaphash *shashp;
        union segmap_cpu *scpu;
        long i, npages;
        size_t hashsz;
        uint_t nfreelist;
        extern void prefetch_smap_w(void *);
        extern int max_ncpus;

        ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock));

        if (((uintptr_t)seg->s_base | seg->s_size) & MAXBOFFSET) {
                panic("segkmap not MAXBSIZE aligned");
                /*NOTREACHED*/
        }

        smd = kmem_zalloc(sizeof (struct segmap_data), KM_SLEEP);

        seg->s_data = (void *)smd;
        seg->s_ops = &segmap_ops;
        smd->smd_prot = a->prot;

        /*
         * Scale the number of smap freelists to be
         * proportional to max_ncpus * number of virtual colors.
         * The caller can over-ride this scaling by providing
         * a non-zero a->nfreelist argument.
         */
        nfreelist = a->nfreelist;
        if (nfreelist == 0)
                nfreelist = max_ncpus;
        else if (nfreelist > 4 * max_ncpus) {
                cmn_err(CE_WARN, "segmap_create: nfreelist out of range "
                "%d, using %d", nfreelist, max_ncpus);
                nfreelist = max_ncpus;
        }
        if (!ISP2(nfreelist)) {
                /* round up nfreelist to the next power of two. */
                nfreelist = 1 << (highbit(nfreelist));
        }

        /*
         * Get the number of virtual colors - must be a power of 2.
         */
        if (a->shmsize)
                smd_ncolor = a->shmsize >> MAXBSHIFT;
        else
                smd_ncolor = 1;
        ASSERT((smd_ncolor & (smd_ncolor - 1)) == 0);
        ASSERT(smd_ncolor <= SEGMAP_MAXCOLOR);
        smd_colormsk = smd_ncolor - 1;
        smd->smd_nfree = smd_nfree = smd_ncolor * nfreelist;
        smd_freemsk = smd_nfree - 1;

        /*
         * Allocate and initialize the freelist headers.
         * Note that sm_freeq[1] starts out as the release queue. This
         * is known when the smap structures are initialized below.
         */
        smd_free = smd->smd_free =
            kmem_zalloc(smd_nfree * sizeof (struct smfree), KM_SLEEP);
        for (i = 0; i < smd_nfree; i++) {
                sm = &smd->smd_free[i];
                mutex_init(&sm->sm_freeq[0].smq_mtx, NULL, MUTEX_DEFAULT, NULL);
                mutex_init(&sm->sm_freeq[1].smq_mtx, NULL, MUTEX_DEFAULT, NULL);
                sm->sm_allocq = &sm->sm_freeq[0];
                sm->sm_releq = &sm->sm_freeq[1];
        }

        /*
         * Allocate and initialize the smap hash chain headers.
         * Compute hash size rounding down to the next power of two.
         */
        npages = MAP_PAGES(seg);
        smd->smd_npages = npages;
        hashsz = npages / SMAP_HASHAVELEN;
        hashsz = 1 << (highbit(hashsz)-1);
        smd_hashmsk = hashsz - 1;
        smd_hash = smd->smd_hash =
            kmem_alloc(hashsz * sizeof (struct smaphash), KM_SLEEP);
#ifdef SEGMAP_HASHSTATS
        smd_hash_len =
            kmem_zalloc(hashsz * sizeof (unsigned int), KM_SLEEP);
#endif
        for (i = 0, shashp = smd_hash; i < hashsz; i++, shashp++) {
                shashp->sh_hash_list = NULL;
                mutex_init(&shashp->sh_mtx, NULL, MUTEX_DEFAULT, NULL);
        }

        /*
         * Allocate and initialize the smap structures.
         * Link all slots onto the appropriate freelist.
         * The smap array is large enough to affect boot time
         * on large systems, so use memory prefetching and only
         * go through the array 1 time. Inline a optimized version
         * of segmap_smapadd to add structures to freelists with
         * knowledge that no locks are needed here.
         */
        smd_smap = smd->smd_sm =
            kmem_alloc(sizeof (struct smap) * npages, KM_SLEEP);

        for (smp = &smd->smd_sm[MAP_PAGES(seg) - 1];
            smp >= smd->smd_sm; smp--) {
                struct smap *smpfreelist;
                struct sm_freeq *releq;

                prefetch_smap_w((char *)smp);

                smp->sm_vp = NULL;
                smp->sm_hash = NULL;
                smp->sm_off = 0;
                smp->sm_bitmap = 0;
                smp->sm_refcnt = 0;
                mutex_init(&smp->sm_mtx, NULL, MUTEX_DEFAULT, NULL);
                smp->sm_free_ndx = SMP2SMF_NDX(smp);

                sm = SMP2SMF(smp);
                releq = sm->sm_releq;

                smpfreelist = releq->smq_free;
                if (smpfreelist == 0) {
                        releq->smq_free = smp->sm_next = smp->sm_prev = smp;
                } else {
                        smp->sm_next = smpfreelist;
                        smp->sm_prev = smpfreelist->sm_prev;
                        smpfreelist->sm_prev = smp;
                        smp->sm_prev->sm_next = smp;
                        releq->smq_free = smp->sm_next;
                }

                /*
                 * sm_flag = 0 (no SM_QNDX_ZERO) implies smap on sm_freeq[1]
                 */
                smp->sm_flags = 0;

#ifdef  SEGKPM_SUPPORT
                /*
                 * Due to the fragile prefetch loop no
                 * separate function is used here.
                 */
                smp->sm_kpme_next = NULL;
                smp->sm_kpme_prev = NULL;
                smp->sm_kpme_page = NULL;
#endif
        }

        /*
         * Allocate the per color indices that distribute allocation
         * requests over the free lists. Each cpu will have a private
         * rotor index to spread the allocations even across the available
         * smap freelists. Init the scpu_last_smap field to the first
         * smap element so there is no need to check for NULL.
         */
        smd_cpu =
            kmem_zalloc(sizeof (union segmap_cpu) * max_ncpus, KM_SLEEP);
        for (i = 0, scpu = smd_cpu; i < max_ncpus; i++, scpu++) {
                int j;
                for (j = 0; j < smd_ncolor; j++)
                        scpu->scpu.scpu_free_ndx[j] = j;
                scpu->scpu.scpu_last_smap = smd_smap;
        }

        vpm_init();

#ifdef DEBUG
        /*
         * Keep track of which colors are used more often.
         */
        colors_used = kmem_zalloc(smd_nfree * sizeof (int), KM_SLEEP);
#endif /* DEBUG */

        return (0);
}

static void
segmap_free(seg)
        struct seg *seg;
{
        ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock));
}

/*
 * Do a F_SOFTUNLOCK call over the range requested.
 * The range must have already been F_SOFTLOCK'ed.
 */
static void
segmap_unlock(
        struct hat *hat,
        struct seg *seg,
        caddr_t addr,
        size_t len,
        enum seg_rw rw,
        struct smap *smp)
{
        page_t *pp;
        caddr_t adr;
        u_offset_t off;
        struct vnode *vp;
        kmutex_t *smtx;

        ASSERT(smp->sm_refcnt > 0);

#ifdef lint
        seg = seg;
#endif

        if (segmap_kpm && IS_KPM_ADDR(addr)) {

                /*
                 * We're called only from segmap_fault and this was a
                 * NOP in case of a kpm based smap, so dangerous things
                 * must have happened in the meantime. Pages are prefaulted
                 * and locked in segmap_getmapflt and they will not be
                 * unlocked until segmap_release.
                 */
                panic("segmap_unlock: called with kpm addr %p", (void *)addr);
                /*NOTREACHED*/
        }

        vp = smp->sm_vp;
        off = smp->sm_off + (u_offset_t)((uintptr_t)addr & MAXBOFFSET);

        hat_unlock(hat, addr, P2ROUNDUP(len, PAGESIZE));
        for (adr = addr; adr < addr + len; adr += PAGESIZE, off += PAGESIZE) {
                ushort_t bitmask;

                /*
                 * Use page_find() instead of page_lookup() to
                 * find the page since we know that it has
                 * "shared" lock.
                 */
                pp = page_find(vp, off);
                if (pp == NULL) {
                        panic("segmap_unlock: page not found");
                        /*NOTREACHED*/
                }

                if (rw == S_WRITE) {
                        hat_setrefmod(pp);
                } else if (rw != S_OTHER) {
                        TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT,
                        "segmap_fault:pp %p vp %p offset %llx", pp, vp, off);
                        hat_setref(pp);
                }

                /*
                 * Clear bitmap, if the bit corresponding to "off" is set,
                 * since the page and translation are being unlocked.
                 */
                bitmask = SMAP_BIT_MASK((off - smp->sm_off) >> PAGESHIFT);

                /*
                 * Large Files: Following assertion is to verify
                 * the correctness of the cast to (int) above.
                 */
                ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
                smtx = SMAPMTX(smp);
                mutex_enter(smtx);
                if (smp->sm_bitmap & bitmask) {
                        smp->sm_bitmap &= ~bitmask;
                }
                mutex_exit(smtx);

                page_unlock(pp);
        }
}

#define MAXPPB  (MAXBSIZE/4096) /* assumes minimum page size of 4k */

/*
 * This routine is called via a machine specific fault handling
 * routine.  It is also called by software routines wishing to
 * lock or unlock a range of addresses.
 *
 * Note that this routine expects a page-aligned "addr".
 */
faultcode_t
segmap_fault(
        struct hat *hat,
        struct seg *seg,
        caddr_t addr,
        size_t len,
        enum fault_type type,
        enum seg_rw rw)
{
        struct segmap_data *smd = (struct segmap_data *)seg->s_data;
        struct smap *smp;
        page_t *pp, **ppp;
        struct vnode *vp;
        u_offset_t off;
        page_t *pl[MAXPPB + 1];
        uint_t prot;
        u_offset_t addroff;
        caddr_t adr;
        int err;
        u_offset_t sm_off;
        int hat_flag;

        if (segmap_kpm && IS_KPM_ADDR(addr)) {
                int newpage;
                kmutex_t *smtx;

                /*
                 * Pages are successfully prefaulted and locked in
                 * segmap_getmapflt and can't be unlocked until
                 * segmap_release. No hat mappings have to be locked
                 * and they also can't be unlocked as long as the
                 * caller owns an active kpm addr.
                 */
#ifndef DEBUG
                if (type != F_SOFTUNLOCK)
                        return (0);
#endif

                if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
                        panic("segmap_fault: smap not found "
                            "for addr %p", (void *)addr);
                        /*NOTREACHED*/
                }

                smtx = SMAPMTX(smp);
#ifdef  DEBUG
                newpage = smp->sm_flags & SM_KPM_NEWPAGE;
                if (newpage) {
                        cmn_err(CE_WARN, "segmap_fault: newpage? smp %p",
                            (void *)smp);
                }

                if (type != F_SOFTUNLOCK) {
                        mutex_exit(smtx);
                        return (0);
                }
#endif
                mutex_exit(smtx);
                vp = smp->sm_vp;
                sm_off = smp->sm_off;

                if (vp == NULL)
                        return (FC_MAKE_ERR(EIO));

                ASSERT(smp->sm_refcnt > 0);

                addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET);
                if (addroff + len > MAXBSIZE)
                        panic("segmap_fault: endaddr %p exceeds MAXBSIZE chunk",
                            (void *)(addr + len));

                off = sm_off + addroff;

                pp = page_find(vp, off);

                if (pp == NULL)
                        panic("segmap_fault: softunlock page not found");

                /*
                 * Set ref bit also here in case of S_OTHER to avoid the
                 * overhead of supporting other cases than F_SOFTUNLOCK
                 * with segkpm. We can do this because the underlying
                 * pages are locked anyway.
                 */
                if (rw == S_WRITE) {
                        hat_setrefmod(pp);
                } else {
                        TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT,
                            "segmap_fault:pp %p vp %p offset %llx",
                            pp, vp, off);
                        hat_setref(pp);
                }

                return (0);
        }

        smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++;
        smp = GET_SMAP(seg, addr);
        vp = smp->sm_vp;
        sm_off = smp->sm_off;

        if (vp == NULL)
                return (FC_MAKE_ERR(EIO));

        ASSERT(smp->sm_refcnt > 0);

        addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET);
        if (addroff + len > MAXBSIZE) {
                panic("segmap_fault: endaddr %p "
                    "exceeds MAXBSIZE chunk", (void *)(addr + len));
                /*NOTREACHED*/
        }
        off = sm_off + addroff;

        /*
         * First handle the easy stuff
         */
        if (type == F_SOFTUNLOCK) {
                segmap_unlock(hat, seg, addr, len, rw, smp);
                return (0);
        }

        TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE,
            "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp);
        err = VOP_GETPAGE(vp, (offset_t)off, len, &prot, pl, MAXBSIZE,
            seg, addr, rw, CRED(), NULL);

        if (err)
                return (FC_MAKE_ERR(err));

        prot &= smd->smd_prot;

        /*
         * Handle all pages returned in the pl[] array.
         * This loop is coded on the assumption that if
         * there was no error from the VOP_GETPAGE routine,
         * that the page list returned will contain all the
         * needed pages for the vp from [off..off + len].
         */
        ppp = pl;
        while ((pp = *ppp++) != NULL) {
                u_offset_t poff;
                ASSERT(pp->p_vnode == vp);
                hat_flag = HAT_LOAD;

                /*
                 * Verify that the pages returned are within the range
                 * of this segmap region.  Note that it is theoretically
                 * possible for pages outside this range to be returned,
                 * but it is not very likely.  If we cannot use the
                 * page here, just release it and go on to the next one.
                 */
                if (pp->p_offset < sm_off ||
                    pp->p_offset >= sm_off + MAXBSIZE) {
                        (void) page_release(pp, 1);
                        continue;
                }

                ASSERT(hat == kas.a_hat);
                poff = pp->p_offset;
                adr = addr + (poff - off);
                if (adr >= addr && adr < addr + len) {
                        hat_setref(pp);
                        TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT,
                            "segmap_fault:pp %p vp %p offset %llx",
                            pp, vp, poff);
                        if (type == F_SOFTLOCK)
                                hat_flag = HAT_LOAD_LOCK;
                }

                /*
                 * Deal with VMODSORT pages here. If we know this is a write
                 * do the setmod now and allow write protection.
                 * As long as it's modified or not S_OTHER, remove write
                 * protection. With S_OTHER it's up to the FS to deal with this.
                 */
                if (IS_VMODSORT(vp)) {
                        if (rw == S_WRITE)
                                hat_setmod(pp);
                        else if (rw != S_OTHER && !hat_ismod(pp))
                                prot &= ~PROT_WRITE;
                }

                hat_memload(hat, adr, pp, prot, hat_flag);
                if (hat_flag != HAT_LOAD_LOCK)
                        page_unlock(pp);
        }
        return (0);
}

/*
 * This routine is used to start I/O on pages asynchronously.
 */
static faultcode_t
segmap_faulta(struct seg *seg, caddr_t addr)
{
        struct smap *smp;
        struct vnode *vp;
        u_offset_t off;
        int err;

        if (segmap_kpm && IS_KPM_ADDR(addr)) {
                int     newpage;
                kmutex_t *smtx;

                /*
                 * Pages are successfully prefaulted and locked in
                 * segmap_getmapflt and can't be unlocked until
                 * segmap_release. No hat mappings have to be locked
                 * and they also can't be unlocked as long as the
                 * caller owns an active kpm addr.
                 */
#ifdef  DEBUG
                if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
                        panic("segmap_faulta: smap not found "
                            "for addr %p", (void *)addr);
                        /*NOTREACHED*/
                }

                smtx = SMAPMTX(smp);
                newpage = smp->sm_flags & SM_KPM_NEWPAGE;
                mutex_exit(smtx);
                if (newpage)
                        cmn_err(CE_WARN, "segmap_faulta: newpage? smp %p",
                            (void *)smp);
#endif
                return (0);
        }

        segmapcnt.smp_faulta.value.ul++;
        smp = GET_SMAP(seg, addr);

        ASSERT(smp->sm_refcnt > 0);

        vp = smp->sm_vp;
        off = smp->sm_off;

        if (vp == NULL) {
                cmn_err(CE_WARN, "segmap_faulta - no vp");
                return (FC_MAKE_ERR(EIO));
        }

        TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE,
            "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp);

        err = VOP_GETPAGE(vp, (offset_t)(off + ((offset_t)((uintptr_t)addr
            & MAXBOFFSET))), PAGESIZE, (uint_t *)NULL, (page_t **)NULL, 0,
            seg, addr, S_READ, CRED(), NULL);

        if (err)
                return (FC_MAKE_ERR(err));
        return (0);
}

/*ARGSUSED*/
static int
segmap_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
{
        struct segmap_data *smd = (struct segmap_data *)seg->s_data;

        ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock));

        /*
         * Need not acquire the segment lock since
         * "smd_prot" is a read-only field.
         */
        return (((smd->smd_prot & prot) != prot) ? EACCES : 0);
}

static int
segmap_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
{
        struct segmap_data *smd = (struct segmap_data *)seg->s_data;
        size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;

        ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));

        if (pgno != 0) {
                do {
                        protv[--pgno] = smd->smd_prot;
                } while (pgno != 0);
        }
        return (0);
}

static u_offset_t
segmap_getoffset(struct seg *seg, caddr_t addr)
{
        struct segmap_data *smd = (struct segmap_data *)seg->s_data;

        ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock));

        return ((u_offset_t)smd->smd_sm->sm_off + (addr - seg->s_base));
}

/*ARGSUSED*/
static int
segmap_gettype(struct seg *seg, caddr_t addr)
{
        ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock));

        return (MAP_SHARED);
}

/*ARGSUSED*/
static int
segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
{
        struct segmap_data *smd = (struct segmap_data *)seg->s_data;

        ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock));

        /* XXX - This doesn't make any sense */
        *vpp = smd->smd_sm->sm_vp;
        return (0);
}

/*
 * Check to see if it makes sense to do kluster/read ahead to
 * addr + delta relative to the mapping at addr.  We assume here
 * that delta is a signed PAGESIZE'd multiple (which can be negative).
 *
 * For segmap we always "approve" of this action from our standpoint.
 */
/*ARGSUSED*/
static int
segmap_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
{
        return (0);
}

static void
segmap_badop()
{
        panic("segmap_badop");
        /*NOTREACHED*/
}

/*
 * Special private segmap operations
 */

/*
 * Add smap to the appropriate free list.
 */
static void
segmap_smapadd(struct smap *smp)
{
        struct smfree *sm;
        struct smap *smpfreelist;
        struct sm_freeq *releq;

        ASSERT(MUTEX_HELD(SMAPMTX(smp)));

        if (smp->sm_refcnt != 0) {
                panic("segmap_smapadd");
                /*NOTREACHED*/
        }

        sm = &smd_free[smp->sm_free_ndx];
        /*
         * Add to the tail of the release queue
         * Note that sm_releq and sm_allocq could toggle
         * before we get the lock. This does not affect
         * correctness as the 2 queues are only maintained
         * to reduce lock pressure.
         */
        releq = sm->sm_releq;
        if (releq == &sm->sm_freeq[0])
                smp->sm_flags |= SM_QNDX_ZERO;
        else
                smp->sm_flags &= ~SM_QNDX_ZERO;
        mutex_enter(&releq->smq_mtx);
        smpfreelist = releq->smq_free;
        if (smpfreelist == 0) {
                int want;

                releq->smq_free = smp->sm_next = smp->sm_prev = smp;
                /*
                 * Both queue mutexes held to set sm_want;
                 * snapshot the value before dropping releq mutex.
                 * If sm_want appears after the releq mutex is dropped,
                 * then the smap just freed is already gone.
                 */
                want = sm->sm_want;
                mutex_exit(&releq->smq_mtx);
                /*
                 * See if there was a waiter before dropping the releq mutex
                 * then recheck after obtaining sm_freeq[0] mutex as
                 * the another thread may have already signaled.
                 */
                if (want) {
                        mutex_enter(&sm->sm_freeq[0].smq_mtx);
                        if (sm->sm_want)
                                cv_signal(&sm->sm_free_cv);
                        mutex_exit(&sm->sm_freeq[0].smq_mtx);
                }
        } else {
                smp->sm_next = smpfreelist;
                smp->sm_prev = smpfreelist->sm_prev;
                smpfreelist->sm_prev = smp;
                smp->sm_prev->sm_next = smp;
                mutex_exit(&releq->smq_mtx);
        }
}


static struct smap *
segmap_hashin(struct smap *smp, struct vnode *vp, u_offset_t off, int hashid)
{
        struct smap **hpp;
        struct smap *tmp;
        kmutex_t *hmtx;

        ASSERT(MUTEX_HELD(SMAPMTX(smp)));
        ASSERT(smp->sm_vp == NULL);
        ASSERT(smp->sm_hash == NULL);
        ASSERT(smp->sm_prev == NULL);
        ASSERT(smp->sm_next == NULL);
        ASSERT(hashid >= 0 && hashid <= smd_hashmsk);

        hmtx = SHASHMTX(hashid);

        mutex_enter(hmtx);
        /*
         * First we need to verify that no one has created a smp
         * with (vp,off) as its tag before we us.
         */
        for (tmp = smd_hash[hashid].sh_hash_list;
            tmp != NULL; tmp = tmp->sm_hash)
                if (tmp->sm_vp == vp && tmp->sm_off == off)
                        break;

        if (tmp == NULL) {
                /*
                 * No one created one yet.
                 *
                 * Funniness here - we don't increment the ref count on the
                 * vnode * even though we have another pointer to it here.
                 * The reason for this is that we don't want the fact that
                 * a seg_map entry somewhere refers to a vnode to prevent the
                 * vnode * itself from going away.  This is because this
                 * reference to the vnode is a "soft one".  In the case where
                 * a mapping is being used by a rdwr [or directory routine?]
                 * there already has to be a non-zero ref count on the vnode.
                 * In the case where the vp has been freed and the the smap
                 * structure is on the free list, there are no pages in memory
                 * that can refer to the vnode.  Thus even if we reuse the same
                 * vnode/smap structure for a vnode which has the same
                 * address but represents a different object, we are ok.
                 */
                smp->sm_vp = vp;
                smp->sm_off = off;

                hpp = &smd_hash[hashid].sh_hash_list;
                smp->sm_hash = *hpp;
                *hpp = smp;
#ifdef SEGMAP_HASHSTATS
                smd_hash_len[hashid]++;
#endif
        }
        mutex_exit(hmtx);

        return (tmp);
}

static void
segmap_hashout(struct smap *smp)
{
        struct smap **hpp, *hp;
        struct vnode *vp;
        kmutex_t *mtx;
        int hashid;
        u_offset_t off;

        ASSERT(MUTEX_HELD(SMAPMTX(smp)));

        vp = smp->sm_vp;
        off = smp->sm_off;

        SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */
        mtx = SHASHMTX(hashid);
        mutex_enter(mtx);

        hpp = &smd_hash[hashid].sh_hash_list;
        for (;;) {
                hp = *hpp;
                if (hp == NULL) {
                        panic("segmap_hashout");
                        /*NOTREACHED*/
                }
                if (hp == smp)
                        break;
                hpp = &hp->sm_hash;
        }

        *hpp = smp->sm_hash;
        smp->sm_hash = NULL;
#ifdef SEGMAP_HASHSTATS
        smd_hash_len[hashid]--;
#endif
        mutex_exit(mtx);

        smp->sm_vp = NULL;
        smp->sm_off = (u_offset_t)0;

}

/*
 * Attempt to free unmodified, unmapped, and non locked segmap
 * pages.
 */
void
segmap_pagefree(struct vnode *vp, u_offset_t off)
{
        u_offset_t pgoff;
        page_t  *pp;

        for (pgoff = off; pgoff < off + MAXBSIZE; pgoff += PAGESIZE) {

                if ((pp = page_lookup_nowait(vp, pgoff, SE_EXCL)) == NULL)
                        continue;

                switch (page_release(pp, 1)) {
                case PGREL_NOTREL:
                        segmapcnt.smp_free_notfree.value.ul++;
                        break;
                case PGREL_MOD:
                        segmapcnt.smp_free_dirty.value.ul++;
                        break;
                case PGREL_CLEAN:
                        segmapcnt.smp_free.value.ul++;
                        break;
                }
        }
}

/*
 * Locks held on entry: smap lock
 * Locks held on exit : smap lock.
 */

static void
grab_smp(struct smap *smp, page_t *pp)
{
        ASSERT(MUTEX_HELD(SMAPMTX(smp)));
        ASSERT(smp->sm_refcnt == 0);

        if (smp->sm_vp != (struct vnode *)NULL) {
                struct vnode    *vp = smp->sm_vp;
                u_offset_t      off = smp->sm_off;
                /*
                 * Destroy old vnode association and
                 * unload any hardware translations to
                 * the old object.
                 */
                smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reuse++;
                segmap_hashout(smp);

                /*
                 * This node is off freelist and hashlist,
                 * so there is no reason to drop/reacquire sm_mtx
                 * across calls to hat_unload.
                 */
                if (segmap_kpm) {
                        caddr_t vaddr;
                        int hat_unload_needed = 0;

                        /*
                         * unload kpm mapping
                         */
                        if (pp != NULL) {
                                vaddr = hat_kpm_page2va(pp, 1);
                                hat_kpm_mapout(pp, GET_KPME(smp), vaddr);
                                page_unlock(pp);
                        }

                        /*
                         * Check if we have (also) the rare case of a
                         * non kpm mapping.
                         */
                        if (smp->sm_flags & SM_NOTKPM_RELEASED) {
                                hat_unload_needed = 1;
                                smp->sm_flags &= ~SM_NOTKPM_RELEASED;
                        }

                        if (hat_unload_needed) {
                                hat_unload(kas.a_hat, segkmap->s_base +
                                    ((smp - smd_smap) * MAXBSIZE),
                                    MAXBSIZE, HAT_UNLOAD);
                        }

                } else {
                        ASSERT(smp->sm_flags & SM_NOTKPM_RELEASED);
                        smp->sm_flags &= ~SM_NOTKPM_RELEASED;
                        hat_unload(kas.a_hat, segkmap->s_base +
                            ((smp - smd_smap) * MAXBSIZE),
                            MAXBSIZE, HAT_UNLOAD);
                }
                segmap_pagefree(vp, off);
        }
}

static struct smap *
get_free_smp(int free_ndx)
{
        struct smfree *sm;
        kmutex_t *smtx;
        struct smap *smp, *first;
        struct sm_freeq *allocq, *releq;
        struct kpme *kpme;
        page_t *pp = NULL;
        int end_ndx, page_locked = 0;

        end_ndx = free_ndx;
        sm = &smd_free[free_ndx];

retry_queue:
        allocq = sm->sm_allocq;
        mutex_enter(&allocq->smq_mtx);

        if ((smp = allocq->smq_free) == NULL) {

skip_queue:
                /*
                 * The alloc list is empty or this queue is being skipped;
                 * first see if the allocq toggled.
                 */
                if (sm->sm_allocq != allocq) {
                        /* queue changed */
                        mutex_exit(&allocq->smq_mtx);
                        goto retry_queue;
                }
                releq = sm->sm_releq;
                if (!mutex_tryenter(&releq->smq_mtx)) {
                        /* cannot get releq; a free smp may be there now */
                        mutex_exit(&allocq->smq_mtx);

                        /*
                         * This loop could spin forever if this thread has
                         * higher priority than the thread that is holding
                         * releq->smq_mtx. In order to force the other thread
                         * to run, we'll lock/unlock the mutex which is safe
                         * since we just unlocked the allocq mutex.
                         */
                        mutex_enter(&releq->smq_mtx);
                        mutex_exit(&releq->smq_mtx);
                        goto retry_queue;
                }
                if (releq->smq_free == NULL) {
                        /*
                         * This freelist is empty.
                         * This should not happen unless clients
                         * are failing to release the segmap
                         * window after accessing the data.
                         * Before resorting to sleeping, try
                         * the next list of the same color.
                         */
                        free_ndx = (free_ndx + smd_ncolor) & smd_freemsk;
                        if (free_ndx != end_ndx) {
                                mutex_exit(&releq->smq_mtx);
                                mutex_exit(&allocq->smq_mtx);
                                sm = &smd_free[free_ndx];
                                goto retry_queue;
                        }
                        /*
                         * Tried all freelists of the same color once,
                         * wait on this list and hope something gets freed.
                         */
                        segmapcnt.smp_get_nofree.value.ul++;
                        sm->sm_want++;
                        mutex_exit(&sm->sm_freeq[1].smq_mtx);
                        cv_wait(&sm->sm_free_cv,
                            &sm->sm_freeq[0].smq_mtx);
                        sm->sm_want--;
                        mutex_exit(&sm->sm_freeq[0].smq_mtx);
                        sm = &smd_free[free_ndx];
                        goto retry_queue;
                } else {
                        /*
                         * Something on the rele queue; flip the alloc
                         * and rele queues and retry.
                         */
                        sm->sm_allocq = releq;
                        sm->sm_releq = allocq;
                        mutex_exit(&allocq->smq_mtx);
                        mutex_exit(&releq->smq_mtx);
                        if (page_locked) {
                                delay(hz >> 2);
                                page_locked = 0;
                        }
                        goto retry_queue;
                }
        } else {
                /*
                 * Fastpath the case we get the smap mutex
                 * on the first try.
                 */
                first = smp;
next_smap:
                smtx = SMAPMTX(smp);
                if (!mutex_tryenter(smtx)) {
                        /*
                         * Another thread is trying to reclaim this slot.
                         * Skip to the next queue or smap.
                         */
                        if ((smp = smp->sm_next) == first) {
                                goto skip_queue;
                        } else {
                                goto next_smap;
                        }
                } else {
                        /*
                         * if kpme exists, get shared lock on the page
                         */
                        if (segmap_kpm && smp->sm_vp != NULL) {

                                kpme = GET_KPME(smp);
                                pp = kpme->kpe_page;

                                if (pp != NULL) {
                                        if (!page_trylock(pp, SE_SHARED)) {
                                                smp = smp->sm_next;
                                                mutex_exit(smtx);
                                                page_locked = 1;

                                                pp = NULL;

                                                if (smp == first) {
                                                        goto skip_queue;
                                                } else {
                                                        goto next_smap;
                                                }
                                        } else {
                                                if (kpme->kpe_page == NULL) {
                                                        page_unlock(pp);
                                                        pp = NULL;
                                                }
                                        }
                                }
                        }

                        /*
                         * At this point, we've selected smp.  Remove smp
                         * from its freelist.  If smp is the first one in
                         * the freelist, update the head of the freelist.
                         */
                        if (first == smp) {
                                ASSERT(first == allocq->smq_free);
                                allocq->smq_free = smp->sm_next;
                        }

                        /*
                         * if the head of the freelist still points to smp,
                         * then there are no more free smaps in that list.
                         */
                        if (allocq->smq_free == smp)
                                /*
                                 * Took the last one
                                 */
                                allocq->smq_free = NULL;
                        else {
                                smp->sm_prev->sm_next = smp->sm_next;
                                smp->sm_next->sm_prev = smp->sm_prev;
                        }
                        mutex_exit(&allocq->smq_mtx);
                        smp->sm_prev = smp->sm_next = NULL;

                        /*
                         * if pp != NULL, pp must have been locked;
                         * grab_smp() unlocks pp.
                         */
                        ASSERT((pp == NULL) || PAGE_LOCKED(pp));
                        grab_smp(smp, pp);
                        /* return smp locked. */
                        ASSERT(SMAPMTX(smp) == smtx);
                        ASSERT(MUTEX_HELD(smtx));
                        return (smp);
                }
        }
}

/*
 * Special public segmap operations
 */

/*
 * Create pages (without using VOP_GETPAGE) and load up translations to them.
 * If softlock is TRUE, then set things up so that it looks like a call
 * to segmap_fault with F_SOFTLOCK.
 *
 * Returns 1, if a page is created by calling page_create_va(), or 0 otherwise.
 *
 * All fields in the generic segment (struct seg) are considered to be
 * read-only for "segmap" even though the kernel address space (kas) may
 * not be locked, hence no lock is needed to access them.
 */
int
segmap_pagecreate(struct seg *seg, caddr_t addr, size_t len, int softlock)
{
        struct segmap_data *smd = (struct segmap_data *)seg->s_data;
        page_t *pp;
        u_offset_t off;
        struct smap *smp;
        struct vnode *vp;
        caddr_t eaddr;
        int newpage = 0;
        uint_t prot;
        kmutex_t *smtx;
        int hat_flag;

        ASSERT(seg->s_as == &kas);

        if (segmap_kpm && IS_KPM_ADDR(addr)) {
                /*
                 * Pages are successfully prefaulted and locked in
                 * segmap_getmapflt and can't be unlocked until
                 * segmap_release. The SM_KPM_NEWPAGE flag is set
                 * in segmap_pagecreate_kpm when new pages are created.
                 * and it is returned as "newpage" indication here.
                 */
                if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
                        panic("segmap_pagecreate: smap not found "
                            "for addr %p", (void *)addr);
                        /*NOTREACHED*/
                }

                smtx = SMAPMTX(smp);
                newpage = smp->sm_flags & SM_KPM_NEWPAGE;
                smp->sm_flags &= ~SM_KPM_NEWPAGE;
                mutex_exit(smtx);

                return (newpage);
        }

        smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++;

        eaddr = addr + len;
        addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);

        smp = GET_SMAP(seg, addr);

        /*
         * We don't grab smp mutex here since we assume the smp
         * has a refcnt set already which prevents the slot from
         * changing its id.
         */
        ASSERT(smp->sm_refcnt > 0);

        vp = smp->sm_vp;
        off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET));
        prot = smd->smd_prot;

        for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) {
                hat_flag = HAT_LOAD;
                pp = page_lookup(vp, off, SE_SHARED);
                if (pp == NULL) {
                        ushort_t bitindex;

                        if ((pp = page_create_va(vp, off,
                            PAGESIZE, PG_WAIT, seg, addr)) == NULL) {
                                panic("segmap_pagecreate: page_create failed");
                                /*NOTREACHED*/
                        }
                        newpage = 1;
                        page_io_unlock(pp);

                        /*
                         * Since pages created here do not contain valid
                         * data until the caller writes into them, the
                         * "exclusive" lock will not be dropped to prevent
                         * other users from accessing the page.  We also
                         * have to lock the translation to prevent a fault
                         * from occurring when the virtual address mapped by
                         * this page is written into.  This is necessary to
                         * avoid a deadlock since we haven't dropped the
                         * "exclusive" lock.
                         */
                        bitindex = (ushort_t)((off - smp->sm_off) >> PAGESHIFT);

                        /*
                         * Large Files: The following assertion is to
                         * verify the cast above.
                         */
                        ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
                        smtx = SMAPMTX(smp);
                        mutex_enter(smtx);
                        smp->sm_bitmap |= SMAP_BIT_MASK(bitindex);
                        mutex_exit(smtx);

                        hat_flag = HAT_LOAD_LOCK;
                } else if (softlock) {
                        hat_flag = HAT_LOAD_LOCK;
                }

                if (IS_VMODSORT(pp->p_vnode) && (prot & PROT_WRITE))
                        hat_setmod(pp);

                hat_memload(kas.a_hat, addr, pp, prot, hat_flag);

                if (hat_flag != HAT_LOAD_LOCK)
                        page_unlock(pp);

                TRACE_5(TR_FAC_VM, TR_SEGMAP_PAGECREATE,
                    "segmap_pagecreate:seg %p addr %p pp %p vp %p offset %llx",
                    seg, addr, pp, vp, off);
        }

        return (newpage);
}

void
segmap_pageunlock(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw)
{
        struct smap     *smp;
        ushort_t        bitmask;
        page_t          *pp;
        struct  vnode   *vp;
        u_offset_t      off;
        caddr_t         eaddr;
        kmutex_t        *smtx;

        ASSERT(seg->s_as == &kas);

        eaddr = addr + len;
        addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);

        if (segmap_kpm && IS_KPM_ADDR(addr)) {
                /*
                 * Pages are successfully prefaulted and locked in
                 * segmap_getmapflt and can't be unlocked until
                 * segmap_release, so no pages or hat mappings have
                 * to be unlocked at this point.
                 */
#ifdef DEBUG
                if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
                        panic("segmap_pageunlock: smap not found "
                            "for addr %p", (void *)addr);
                        /*NOTREACHED*/
                }

                ASSERT(smp->sm_refcnt > 0);
                mutex_exit(SMAPMTX(smp));
#endif
                return;
        }

        smp = GET_SMAP(seg, addr);
        smtx = SMAPMTX(smp);

        ASSERT(smp->sm_refcnt > 0);

        vp = smp->sm_vp;
        off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET));

        for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) {
                bitmask = SMAP_BIT_MASK((int)(off - smp->sm_off) >> PAGESHIFT);

                /*
                 * Large Files: Following assertion is to verify
                 * the correctness of the cast to (int) above.
                 */
                ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);

                /*
                 * If the bit corresponding to "off" is set,
                 * clear this bit in the bitmap, unlock translations,
                 * and release the "exclusive" lock on the page.
                 */
                if (smp->sm_bitmap & bitmask) {
                        mutex_enter(smtx);
                        smp->sm_bitmap &= ~bitmask;
                        mutex_exit(smtx);

                        hat_unlock(kas.a_hat, addr, PAGESIZE);

                        /*
                         * Use page_find() instead of page_lookup() to
                         * find the page since we know that it has
                         * "exclusive" lock.
                         */
                        pp = page_find(vp, off);
                        if (pp == NULL) {
                                panic("segmap_pageunlock: page not found");
                                /*NOTREACHED*/
                        }
                        if (rw == S_WRITE) {
                                hat_setrefmod(pp);
                        } else if (rw != S_OTHER) {
                                hat_setref(pp);
                        }

                        page_unlock(pp);
                }
        }
}

caddr_t
segmap_getmap(struct seg *seg, struct vnode *vp, u_offset_t off)
{
        return (segmap_getmapflt(seg, vp, off, MAXBSIZE, 0, S_OTHER));
}

/*
 * This is the magic virtual address that offset 0 of an ELF
 * file gets mapped to in user space. This is used to pick
 * the vac color on the freelist.
 */
#define ELF_OFFZERO_VA  (0x10000)
/*
 * segmap_getmap allocates a MAXBSIZE big slot to map the vnode vp
 * in the range <off, off + len). off doesn't need to be MAXBSIZE aligned.
 * The return address is  always MAXBSIZE aligned.
 *
 * If forcefault is nonzero and the MMU translations haven't yet been created,
 * segmap_getmap will call segmap_fault(..., F_INVAL, rw) to create them.
 */
caddr_t
segmap_getmapflt(
        struct seg *seg,
        struct vnode *vp,
        u_offset_t off,
        size_t len,
        int forcefault,
        enum seg_rw rw)
{
        struct smap *smp, *nsmp;
        extern struct vnode *common_specvp();
        caddr_t baseaddr;                       /* MAXBSIZE aligned */
        u_offset_t baseoff;
        int newslot;
        caddr_t vaddr;
        int color, hashid;
        kmutex_t *hashmtx, *smapmtx;
        struct smfree *sm;
        page_t  *pp;
        struct kpme *kpme;
        uint_t  prot;
        caddr_t base;
        page_t  *pl[MAXPPB + 1];
        int     error;
        int     is_kpm = 1;

        ASSERT(seg->s_as == &kas);
        ASSERT(seg == segkmap);

        baseoff = off & (offset_t)MAXBMASK;
        if (off + len > baseoff + MAXBSIZE) {
                panic("segmap_getmap bad len");
                /*NOTREACHED*/
        }

        /*
         * If this is a block device we have to be sure to use the
         * "common" block device vnode for the mapping.
         */
        if (vp->v_type == VBLK)
                vp = common_specvp(vp);

        smd_cpu[CPU->cpu_seqid].scpu.scpu_getmap++;

        if (segmap_kpm == 0 ||
            (forcefault == SM_PAGECREATE && rw != S_WRITE)) {
                is_kpm = 0;
        }

        SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */
        hashmtx = SHASHMTX(hashid);

retry_hash:
        mutex_enter(hashmtx);
        for (smp = smd_hash[hashid].sh_hash_list;
            smp != NULL; smp = smp->sm_hash)
                if (smp->sm_vp == vp && smp->sm_off == baseoff)
                        break;
        mutex_exit(hashmtx);

vrfy_smp:
        if (smp != NULL) {

                ASSERT(vp->v_count != 0);

                /*
                 * Get smap lock and recheck its tag. The hash lock
                 * is dropped since the hash is based on (vp, off)
                 * and (vp, off) won't change when we have smap mtx.
                 */
                smapmtx = SMAPMTX(smp);
                mutex_enter(smapmtx);
                if (smp->sm_vp != vp || smp->sm_off != baseoff) {
                        mutex_exit(smapmtx);
                        goto retry_hash;
                }

                if (smp->sm_refcnt == 0) {

                        smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reclaim++;

                        /*
                         * Could still be on the free list. However, this
                         * could also be an smp that is transitioning from
                         * the free list when we have too much contention
                         * for the smapmtx's. In this case, we have an
                         * unlocked smp that is not on the free list any
                         * longer, but still has a 0 refcnt.  The only way
                         * to be sure is to check the freelist pointers.
                         * Since we now have the smapmtx, we are guaranteed
                         * that the (vp, off) won't change, so we are safe
                         * to reclaim it.  get_free_smp() knows that this
                         * can happen, and it will check the refcnt.
                         */

                        if ((smp->sm_next != NULL)) {
                                struct sm_freeq *freeq;

                                ASSERT(smp->sm_prev != NULL);
                                sm = &smd_free[smp->sm_free_ndx];

                                if (smp->sm_flags & SM_QNDX_ZERO)
                                        freeq = &sm->sm_freeq[0];
                                else
                                        freeq = &sm->sm_freeq[1];

                                mutex_enter(&freeq->smq_mtx);
                                if (freeq->smq_free != smp) {
                                        /*
                                         * fastpath normal case
                                         */
                                        smp->sm_prev->sm_next = smp->sm_next;
                                        smp->sm_next->sm_prev = smp->sm_prev;
                                } else if (smp == smp->sm_next) {
                                        /*
                                         * Taking the last smap on freelist
                                         */
                                        freeq->smq_free = NULL;
                                } else {
                                        /*
                                         * Reclaiming 1st smap on list
                                         */
                                        freeq->smq_free = smp->sm_next;
                                        smp->sm_prev->sm_next = smp->sm_next;
                                        smp->sm_next->sm_prev = smp->sm_prev;
                                }
                                mutex_exit(&freeq->smq_mtx);
                                smp->sm_prev = smp->sm_next = NULL;
                        } else {
                                ASSERT(smp->sm_prev == NULL);
                                segmapcnt.smp_stolen.value.ul++;
                        }

                } else {
                        segmapcnt.smp_get_use.value.ul++;
                }
                smp->sm_refcnt++;               /* another user */

                /*
                 * We don't invoke segmap_fault via TLB miss, so we set ref
                 * and mod bits in advance. For S_OTHER  we set them in
                 * segmap_fault F_SOFTUNLOCK.
                 */
                if (is_kpm) {
                        if (rw == S_WRITE) {
                                smp->sm_flags |= SM_WRITE_DATA;
                        } else if (rw == S_READ) {
                                smp->sm_flags |= SM_READ_DATA;
                        }
                }
                mutex_exit(smapmtx);

                newslot = 0;
        } else {

                uint32_t free_ndx, *free_ndxp;
                union segmap_cpu *scpu;

                /*
                 * On a PAC machine or a machine with anti-alias
                 * hardware, smd_colormsk will be zero.
                 *
                 * On a VAC machine- pick color by offset in the file
                 * so we won't get VAC conflicts on elf files.
                 * On data files, color does not matter but we
                 * don't know what kind of file it is so we always
                 * pick color by offset. This causes color
                 * corresponding to file offset zero to be used more
                 * heavily.
                 */
                color = (baseoff >> MAXBSHIFT) & smd_colormsk;
                scpu = smd_cpu+CPU->cpu_seqid;
                free_ndxp = &scpu->scpu.scpu_free_ndx[color];
                free_ndx = (*free_ndxp += smd_ncolor) & smd_freemsk;
#ifdef DEBUG
                colors_used[free_ndx]++;
#endif /* DEBUG */

                /*
                 * Get a locked smp slot from the free list.
                 */
                smp = get_free_smp(free_ndx);
                smapmtx = SMAPMTX(smp);

                ASSERT(smp->sm_vp == NULL);

                if ((nsmp = segmap_hashin(smp, vp, baseoff, hashid)) != NULL) {
                        /*
                         * Failed to hashin, there exists one now.
                         * Return the smp we just allocated.
                         */
                        segmap_smapadd(smp);
                        mutex_exit(smapmtx);

                        smp = nsmp;
                        goto vrfy_smp;
                }
                smp->sm_refcnt++;               /* another user */

                /*
                 * We don't invoke segmap_fault via TLB miss, so we set ref
                 * and mod bits in advance. For S_OTHER  we set them in
                 * segmap_fault F_SOFTUNLOCK.
                 */
                if (is_kpm) {
                        if (rw == S_WRITE) {
                                smp->sm_flags |= SM_WRITE_DATA;
                        } else if (rw == S_READ) {
                                smp->sm_flags |= SM_READ_DATA;
                        }
                }
                mutex_exit(smapmtx);

                newslot = 1;
        }

        if (!is_kpm)
                goto use_segmap_range;

        /*
         * Use segkpm
         */
        /* Lint directive required until 6746211 is fixed */
        /*CONSTCOND*/
        ASSERT(PAGESIZE == MAXBSIZE);

        /*
         * remember the last smp faulted on this cpu.
         */
        (smd_cpu+CPU->cpu_seqid)->scpu.scpu_last_smap = smp;

        if (forcefault == SM_PAGECREATE) {
                baseaddr = segmap_pagecreate_kpm(seg, vp, baseoff, smp, rw);
                return (baseaddr);
        }

        if (newslot == 0 &&
            (pp = GET_KPME(smp)->kpe_page) != NULL) {

                /* fastpath */
                switch (rw) {
                case S_READ:
                case S_WRITE:
                        if (page_trylock(pp, SE_SHARED)) {
                                if (PP_ISFREE(pp) ||
                                    !(pp->p_vnode == vp &&
                                    pp->p_offset == baseoff)) {
                                        page_unlock(pp);
                                        pp = page_lookup(vp, baseoff,
                                            SE_SHARED);
                                }
                        } else {
                                pp = page_lookup(vp, baseoff, SE_SHARED);
                        }

                        if (pp == NULL) {
                                ASSERT(GET_KPME(smp)->kpe_page == NULL);
                                break;
                        }

                        if (rw == S_WRITE &&
                            hat_page_getattr(pp, P_MOD | P_REF) !=
                            (P_MOD | P_REF)) {
                                page_unlock(pp);
                                break;
                        }

                        /*
                         * We have the p_selock as reader, grab_smp
                         * can't hit us, we have bumped the smap
                         * refcnt and hat_pageunload needs the
                         * p_selock exclusive.
                         */
                        kpme = GET_KPME(smp);
                        if (kpme->kpe_page == pp) {
                                baseaddr = hat_kpm_page2va(pp, 0);
                        } else if (kpme->kpe_page == NULL) {
                                baseaddr = hat_kpm_mapin(pp, kpme);
                        } else {
                                panic("segmap_getmapflt: stale "
                                    "kpme page, kpme %p", (void *)kpme);
                                /*NOTREACHED*/
                        }

                        /*
                         * We don't invoke segmap_fault via TLB miss,
                         * so we set ref and mod bits in advance.
                         * For S_OTHER and we set them in segmap_fault
                         * F_SOFTUNLOCK.
                         */
                        if (rw == S_READ && !hat_isref(pp))
                                hat_setref(pp);

                        return (baseaddr);
                default:
                        break;
                }
        }

        base = segkpm_create_va(baseoff);
        error = VOP_GETPAGE(vp, (offset_t)baseoff, len, &prot, pl, MAXBSIZE,
            seg, base, rw, CRED(), NULL);

        pp = pl[0];
        if (error || pp == NULL) {
                /*
                 * Use segmap address slot and let segmap_fault deal
                 * with the error cases. There is no error return
                 * possible here.
                 */
                goto use_segmap_range;
        }

        ASSERT(pl[1] == NULL);

        /*
         * When prot is not returned w/ PROT_ALL the returned pages
         * are not backed by fs blocks. For most of the segmap users
         * this is no problem, they don't write to the pages in the
         * same request and therefore don't rely on a following
         * trap driven segmap_fault. With SM_LOCKPROTO users it
         * is more secure to use segkmap adresses to allow
         * protection segmap_fault's.
         */
        if (prot != PROT_ALL && forcefault == SM_LOCKPROTO) {
                /*
                 * Use segmap address slot and let segmap_fault
                 * do the error return.
                 */
                ASSERT(rw != S_WRITE);
                ASSERT(PAGE_LOCKED(pp));
                page_unlock(pp);
                forcefault = 0;
                goto use_segmap_range;
        }

        /*
         * We have the p_selock as reader, grab_smp can't hit us, we
         * have bumped the smap refcnt and hat_pageunload needs the
         * p_selock exclusive.
         */
        kpme = GET_KPME(smp);
        if (kpme->kpe_page == pp) {
                baseaddr = hat_kpm_page2va(pp, 0);
        } else if (kpme->kpe_page == NULL) {
                baseaddr = hat_kpm_mapin(pp, kpme);
        } else {
                panic("segmap_getmapflt: stale kpme page after "
                    "VOP_GETPAGE, kpme %p", (void *)kpme);
                /*NOTREACHED*/
        }

        smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++;

        return (baseaddr);


use_segmap_range:
        baseaddr = seg->s_base + ((smp - smd_smap) * MAXBSIZE);
        TRACE_4(TR_FAC_VM, TR_SEGMAP_GETMAP,
            "segmap_getmap:seg %p addr %p vp %p offset %llx",
            seg, baseaddr, vp, baseoff);

        /*
         * Prefault the translations
         */
        vaddr = baseaddr + (off - baseoff);
        if (forcefault && (newslot || !hat_probe(kas.a_hat, vaddr))) {

                caddr_t pgaddr = (caddr_t)((uintptr_t)vaddr &
                    (uintptr_t)PAGEMASK);

                (void) segmap_fault(kas.a_hat, seg, pgaddr,
                    (vaddr + len - pgaddr + PAGESIZE - 1) & (uintptr_t)PAGEMASK,
                    F_INVAL, rw);
        }

        return (baseaddr);
}

int
segmap_release(struct seg *seg, caddr_t addr, uint_t flags)
{
        struct smap     *smp;
        int             error;
        int             bflags = 0;
        struct vnode    *vp;
        u_offset_t      offset;
        kmutex_t        *smtx;
        int             is_kpm = 0;
        page_t          *pp;

        if (segmap_kpm && IS_KPM_ADDR(addr)) {

                if (((uintptr_t)addr & MAXBOFFSET) != 0) {
                        panic("segmap_release: addr %p not "
                            "MAXBSIZE aligned", (void *)addr);
                        /*NOTREACHED*/
                }

                if ((smp = get_smap_kpm(addr, &pp)) == NULL) {
                        panic("segmap_release: smap not found "
                            "for addr %p", (void *)addr);
                        /*NOTREACHED*/
                }

                TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP,
                    "segmap_relmap:seg %p addr %p smp %p",
                    seg, addr, smp);

                smtx = SMAPMTX(smp);

                /*
                 * For compatibility reasons segmap_pagecreate_kpm sets this
                 * flag to allow a following segmap_pagecreate to return
                 * this as "newpage" flag. When segmap_pagecreate is not
                 * called at all we clear it now.
                 */
                smp->sm_flags &= ~SM_KPM_NEWPAGE;
                is_kpm = 1;
                if (smp->sm_flags & SM_WRITE_DATA) {
                        hat_setrefmod(pp);
                } else if (smp->sm_flags & SM_READ_DATA) {
                        hat_setref(pp);
                }
        } else {
                if (addr < seg->s_base || addr >= seg->s_base + seg->s_size ||
                    ((uintptr_t)addr & MAXBOFFSET) != 0) {
                        panic("segmap_release: bad addr %p", (void *)addr);
                        /*NOTREACHED*/
                }
                smp = GET_SMAP(seg, addr);

                TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP,
                    "segmap_relmap:seg %p addr %p smp %p",
                    seg, addr, smp);

                smtx = SMAPMTX(smp);
                mutex_enter(smtx);
                smp->sm_flags |= SM_NOTKPM_RELEASED;
        }

        ASSERT(smp->sm_refcnt > 0);

        /*
         * Need to call VOP_PUTPAGE() if any flags (except SM_DONTNEED)
         * are set.
         */
        if ((flags & ~SM_DONTNEED) != 0) {
                if (flags & SM_WRITE)
                        segmapcnt.smp_rel_write.value.ul++;
                if (flags & SM_ASYNC) {
                        bflags |= B_ASYNC;
                        segmapcnt.smp_rel_async.value.ul++;
                }
                if (flags & SM_INVAL) {
                        bflags |= B_INVAL;
                        segmapcnt.smp_rel_abort.value.ul++;
                }
                if (flags & SM_DESTROY) {
                        bflags |= (B_INVAL|B_TRUNC);
                        segmapcnt.smp_rel_abort.value.ul++;
                }
                if (smp->sm_refcnt == 1) {
                        /*
                         * We only bother doing the FREE and DONTNEED flags
                         * if no one else is still referencing this mapping.
                         */
                        if (flags & SM_FREE) {
                                bflags |= B_FREE;
                                segmapcnt.smp_rel_free.value.ul++;
                        }
                        if (flags & SM_DONTNEED) {
                                bflags |= B_DONTNEED;
                                segmapcnt.smp_rel_dontneed.value.ul++;
                        }
                }
        } else {
                smd_cpu[CPU->cpu_seqid].scpu.scpu_release++;
        }

        vp = smp->sm_vp;
        offset = smp->sm_off;

        if (--smp->sm_refcnt == 0) {

                smp->sm_flags &= ~(SM_WRITE_DATA | SM_READ_DATA);

                if (flags & (SM_INVAL|SM_DESTROY)) {
                        segmap_hashout(smp);    /* remove map info */
                        if (is_kpm) {
                                hat_kpm_mapout(pp, GET_KPME(smp), addr);
                                if (smp->sm_flags & SM_NOTKPM_RELEASED) {
                                        smp->sm_flags &= ~SM_NOTKPM_RELEASED;
                                        hat_unload(kas.a_hat, segkmap->s_base +
                                            ((smp - smd_smap) * MAXBSIZE),
                                            MAXBSIZE, HAT_UNLOAD);
                                }

                        } else {
                                if (segmap_kpm)
                                        segkpm_mapout_validkpme(GET_KPME(smp));

                                smp->sm_flags &= ~SM_NOTKPM_RELEASED;
                                hat_unload(kas.a_hat, addr, MAXBSIZE,
                                    HAT_UNLOAD);
                        }
                }
                segmap_smapadd(smp);    /* add to free list */
        }

        mutex_exit(smtx);

        if (is_kpm)
                page_unlock(pp);
        /*
         * Now invoke VOP_PUTPAGE() if any flags (except SM_DONTNEED)
         * are set.
         */
        if ((flags & ~SM_DONTNEED) != 0) {
                error = VOP_PUTPAGE(vp, offset, MAXBSIZE,
                    bflags, CRED(), NULL);
        } else {
                error = 0;
        }

        return (error);
}

/*
 * Dump the pages belonging to this segmap segment.
 */
static void
segmap_dump(struct seg *seg)
{
        struct segmap_data *smd;
        struct smap *smp, *smp_end;
        page_t *pp;
        pfn_t pfn;
        u_offset_t off;
        caddr_t addr;

        smd = (struct segmap_data *)seg->s_data;
        addr = seg->s_base;
        for (smp = smd->smd_sm, smp_end = smp + smd->smd_npages;
            smp < smp_end; smp++) {

                if (smp->sm_refcnt) {
                        for (off = 0; off < MAXBSIZE; off += PAGESIZE) {
                                int we_own_it = 0;

                                /*
                                 * If pp == NULL, the page either does
                                 * not exist or is exclusively locked.
                                 * So determine if it exists before
                                 * searching for it.
                                 */
                                if ((pp = page_lookup_nowait(smp->sm_vp,
                                    smp->sm_off + off, SE_SHARED)))
                                        we_own_it = 1;
                                else
                                        pp = page_exists(smp->sm_vp,
                                            smp->sm_off + off);

                                if (pp) {
                                        pfn = page_pptonum(pp);
                                        dump_addpage(seg->s_as,
                                            addr + off, pfn);
                                        if (we_own_it)
                                                page_unlock(pp);
                                }
                                dump_timeleft = dump_timeout;
                        }
                }
                addr += MAXBSIZE;
        }
}

/*ARGSUSED*/
static int
segmap_pagelock(struct seg *seg, caddr_t addr, size_t len,
    struct page ***ppp, enum lock_type type, enum seg_rw rw)
{
        return (ENOTSUP);
}

static int
segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
{
        struct segmap_data *smd = (struct segmap_data *)seg->s_data;

        memidp->val[0] = (uintptr_t)smd->smd_sm->sm_vp;
        memidp->val[1] = smd->smd_sm->sm_off + (uintptr_t)(addr - seg->s_base);
        return (0);
}

/*ARGSUSED*/
static lgrp_mem_policy_info_t *
segmap_getpolicy(struct seg *seg, caddr_t addr)
{
        return (NULL);
}

/*ARGSUSED*/
static int
segmap_capable(struct seg *seg, segcapability_t capability)
{
        return (0);
}


#ifdef  SEGKPM_SUPPORT

/*
 * segkpm support routines
 */

static caddr_t
segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off,
        struct smap *smp, enum seg_rw rw)
{
        caddr_t base;
        page_t  *pp;
        int     newpage = 0;
        struct kpme     *kpme;

        ASSERT(smp->sm_refcnt > 0);

        if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
                kmutex_t *smtx;

                base = segkpm_create_va(off);

                if ((pp = page_create_va(vp, off, PAGESIZE, PG_WAIT,
                    seg, base)) == NULL) {
                        panic("segmap_pagecreate_kpm: "
                            "page_create failed");
                        /*NOTREACHED*/
                }

                newpage = 1;
                page_io_unlock(pp);
                ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);

                /*
                 * Mark this here until the following segmap_pagecreate
                 * or segmap_release.
                 */
                smtx = SMAPMTX(smp);
                mutex_enter(smtx);
                smp->sm_flags |= SM_KPM_NEWPAGE;
                mutex_exit(smtx);
        }

        kpme = GET_KPME(smp);
        if (!newpage && kpme->kpe_page == pp)
                base = hat_kpm_page2va(pp, 0);
        else
                base = hat_kpm_mapin(pp, kpme);

        /*
         * FS code may decide not to call segmap_pagecreate and we
         * don't invoke segmap_fault via TLB miss, so we have to set
         * ref and mod bits in advance.
         */
        if (rw == S_WRITE) {
                hat_setrefmod(pp);
        } else {
                ASSERT(rw == S_READ);
                hat_setref(pp);
        }

        smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++;

        return (base);
}

/*
 * Find the smap structure corresponding to the
 * KPM addr and return it locked.
 */
struct smap *
get_smap_kpm(caddr_t addr, page_t **ppp)
{
        struct smap     *smp;
        struct vnode    *vp;
        u_offset_t      offset;
        caddr_t         baseaddr = (caddr_t)((uintptr_t)addr & MAXBMASK);
        int             hashid;
        kmutex_t        *hashmtx;
        page_t          *pp;
        union segmap_cpu *scpu;

        pp = hat_kpm_vaddr2page(baseaddr);

        ASSERT(pp && !PP_ISFREE(pp));
        ASSERT(PAGE_LOCKED(pp));
        ASSERT(((uintptr_t)pp->p_offset & MAXBOFFSET) == 0);

        vp = pp->p_vnode;
        offset = pp->p_offset;
        ASSERT(vp != NULL);

        /*
         * Assume the last smap used on this cpu is the one needed.
         */
        scpu = smd_cpu+CPU->cpu_seqid;
        smp = scpu->scpu.scpu_last_smap;
        mutex_enter(&smp->sm_mtx);
        if (smp->sm_vp == vp && smp->sm_off == offset) {
                ASSERT(smp->sm_refcnt > 0);
        } else {
                /*
                 * Assumption wrong, find the smap on the hash chain.
                 */
                mutex_exit(&smp->sm_mtx);
                SMAP_HASHFUNC(vp, offset, hashid); /* macro assigns hashid */
                hashmtx = SHASHMTX(hashid);

                mutex_enter(hashmtx);
                smp = smd_hash[hashid].sh_hash_list;
                for (; smp != NULL; smp = smp->sm_hash) {
                        if (smp->sm_vp == vp && smp->sm_off == offset)
                                break;
                }
                mutex_exit(hashmtx);
                if (smp) {
                        mutex_enter(&smp->sm_mtx);
                        ASSERT(smp->sm_vp == vp && smp->sm_off == offset);
                }
        }

        if (ppp)
                *ppp = smp ? pp : NULL;

        return (smp);
}

#else   /* SEGKPM_SUPPORT */

/* segkpm stubs */

/*ARGSUSED*/
static caddr_t
segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off,
        struct smap *smp, enum seg_rw rw)
{
        return (NULL);
}

/*ARGSUSED*/
struct smap *
get_smap_kpm(caddr_t addr, page_t **ppp)
{
        return (NULL);
}

#endif  /* SEGKPM_SUPPORT */
Illumos