root/usr/src/uts/common/fs/proc/prvnops.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
 * Copyright (c) 2018, Joyent, Inc.
 * Copyright (c) 2017 by Delphix. All rights reserved.
 * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
 * Copyright 2022 MNX Cloud, Inc.
 * Copyright 2023 Oxide Computer Company
 */

/*      Copyright (c) 1984,      1986, 1987, 1988, 1989 AT&T    */
/*        All Rights Reserved   */

#include <sys/types.h>
#include <sys/param.h>
#include <sys/time.h>
#include <sys/cred.h>
#include <sys/policy.h>
#include <sys/debug.h>
#include <sys/dirent.h>
#include <sys/errno.h>
#include <sys/file.h>
#include <sys/inline.h>
#include <sys/kmem.h>
#include <sys/pathname.h>
#include <sys/proc.h>
#include <sys/brand.h>
#include <sys/signal.h>
#include <sys/stat.h>
#include <sys/sysmacros.h>
#include <sys/systm.h>
#include <sys/zone.h>
#include <sys/uio.h>
#include <sys/var.h>
#include <sys/mode.h>
#include <sys/poll.h>
#include <sys/user.h>
#include <sys/vfs.h>
#include <sys/vfs_opreg.h>
#include <sys/gfs.h>
#include <sys/vnode.h>
#include <sys/fault.h>
#include <sys/syscall.h>
#include <sys/procfs.h>
#include <sys/atomic.h>
#include <sys/cmn_err.h>
#include <sys/contract_impl.h>
#include <sys/ctfs.h>
#include <sys/avl.h>
#include <sys/ctype.h>
#include <fs/fs_subr.h>
#include <vm/rm.h>
#include <vm/as.h>
#include <vm/seg.h>
#include <vm/seg_vn.h>
#include <vm/hat.h>
#include <fs/proc/prdata.h>
#if defined(__sparc)
#include <sys/regset.h>
#endif
#if defined(__x86)
#include <sys/sysi86.h>
#endif

/*
 * Created by prinit.
 */
vnodeops_t *prvnodeops;

/*
 * Directory characteristics (patterned after the s5 file system).
 */
#define PRROOTINO       2

#define PRDIRSIZE       14
struct prdirect {
        ushort_t        d_ino;
        char            d_name[PRDIRSIZE];
};

#define PRSDSIZE        (sizeof (struct prdirect))

/*
 * Directory characteristics.
 */
typedef struct prdirent {
        ino64_t         d_ino;          /* "inode number" of entry */
        off64_t         d_off;          /* offset of disk directory entry */
        unsigned short  d_reclen;       /* length of this record */
        char            d_name[14];     /* name of file */
} prdirent_t;

/*
 * Contents of a /proc/<pid> directory.
 * Reuse d_ino field for the /proc file type.
 */
static prdirent_t piddir[] = {
        { PR_PIDDIR,     1 * sizeof (prdirent_t), sizeof (prdirent_t),
                "." },
        { PR_PROCDIR,    2 * sizeof (prdirent_t), sizeof (prdirent_t),
                ".." },
        { PR_AS,         3 * sizeof (prdirent_t), sizeof (prdirent_t),
                "as" },
        { PR_CTL,        4 * sizeof (prdirent_t), sizeof (prdirent_t),
                "ctl" },
        { PR_STATUS,     5 * sizeof (prdirent_t), sizeof (prdirent_t),
                "status" },
        { PR_LSTATUS,    6 * sizeof (prdirent_t), sizeof (prdirent_t),
                "lstatus" },
        { PR_PSINFO,     7 * sizeof (prdirent_t), sizeof (prdirent_t),
                "psinfo" },
        { PR_LPSINFO,    8 * sizeof (prdirent_t), sizeof (prdirent_t),
                "lpsinfo" },
        { PR_MAP,        9 * sizeof (prdirent_t), sizeof (prdirent_t),
                "map" },
        { PR_RMAP,      10 * sizeof (prdirent_t), sizeof (prdirent_t),
                "rmap" },
        { PR_XMAP,      11 * sizeof (prdirent_t), sizeof (prdirent_t),
                "xmap" },
        { PR_CRED,      12 * sizeof (prdirent_t), sizeof (prdirent_t),
                "cred" },
        { PR_SIGACT,    13 * sizeof (prdirent_t), sizeof (prdirent_t),
                "sigact" },
        { PR_AUXV,      14 * sizeof (prdirent_t), sizeof (prdirent_t),
                "auxv" },
        { PR_USAGE,     15 * sizeof (prdirent_t), sizeof (prdirent_t),
                "usage" },
        { PR_LUSAGE,    16 * sizeof (prdirent_t), sizeof (prdirent_t),
                "lusage" },
        { PR_PAGEDATA,  17 * sizeof (prdirent_t), sizeof (prdirent_t),
                "pagedata" },
        { PR_WATCH,     18 * sizeof (prdirent_t), sizeof (prdirent_t),
                "watch" },
        { PR_CURDIR,    19 * sizeof (prdirent_t), sizeof (prdirent_t),
                "cwd" },
        { PR_ROOTDIR,   20 * sizeof (prdirent_t), sizeof (prdirent_t),
                "root" },
        { PR_FDDIR,     21 * sizeof (prdirent_t), sizeof (prdirent_t),
                "fd" },
        { PR_FDINFODIR, 22 * sizeof (prdirent_t), sizeof (prdirent_t),
                "fdinfo" },
        { PR_OBJECTDIR, 23 * sizeof (prdirent_t), sizeof (prdirent_t),
                "object" },
        { PR_LWPDIR,    24 * sizeof (prdirent_t), sizeof (prdirent_t),
                "lwp" },
        { PR_PRIV,      25 * sizeof (prdirent_t), sizeof (prdirent_t),
                "priv" },
        { PR_PATHDIR,   26 * sizeof (prdirent_t), sizeof (prdirent_t),
                "path" },
        { PR_CTDIR,     27 * sizeof (prdirent_t), sizeof (prdirent_t),
                "contracts" },
        { PR_SECFLAGS,  28 * sizeof (prdirent_t), sizeof (prdirent_t),
                "secflags" },
#if defined(__x86)
        { PR_LDT,       29 * sizeof (prdirent_t), sizeof (prdirent_t),
                "ldt" },
#endif
};

#define NPIDDIRFILES    (sizeof (piddir) / sizeof (piddir[0]) - 2)

/*
 * Contents of a /proc/<pid>/lwp/<lwpid> directory.
 */
static prdirent_t lwpiddir[] = {
        { PR_LWPIDDIR,   1 * sizeof (prdirent_t), sizeof (prdirent_t),
                "." },
        { PR_LWPDIR,     2 * sizeof (prdirent_t), sizeof (prdirent_t),
                ".." },
        { PR_LWPCTL,     3 * sizeof (prdirent_t), sizeof (prdirent_t),
                "lwpctl" },
        { PR_LWPNAME,    4 * sizeof (prdirent_t), sizeof (prdirent_t),
                "lwpname" },
        { PR_LWPSTATUS,  5 * sizeof (prdirent_t), sizeof (prdirent_t),
                "lwpstatus" },
        { PR_LWPSINFO,   6 * sizeof (prdirent_t), sizeof (prdirent_t),
                "lwpsinfo" },
        { PR_LWPUSAGE,   7 * sizeof (prdirent_t), sizeof (prdirent_t),
                "lwpusage" },
        { PR_XREGS,      8 * sizeof (prdirent_t), sizeof (prdirent_t),
                "xregs" },
        { PR_TMPLDIR,    9 * sizeof (prdirent_t), sizeof (prdirent_t),
                "templates" },
        { PR_SPYMASTER,  10 * sizeof (prdirent_t), sizeof (prdirent_t),
                "spymaster" },
#if defined(__sparc)
        { PR_GWINDOWS,  11 * sizeof (prdirent_t), sizeof (prdirent_t),
                "gwindows" },
        { PR_ASRS,      12 * sizeof (prdirent_t), sizeof (prdirent_t),
                "asrs" },
#endif
};

#define NLWPIDDIRFILES  (sizeof (lwpiddir) / sizeof (lwpiddir[0]) - 2)

/*
 * Span of entries in the array files (lstatus, lpsinfo, lusage).
 * We make the span larger than the size of the structure on purpose,
 * to make sure that programs cannot use the structure size by mistake.
 * Align _ILP32 structures at 8 bytes, _LP64 structures at 16 bytes.
 */
#ifdef _LP64
#define LSPAN(type)     (round16(sizeof (type)) + 16)
#define LSPAN32(type)   (round8(sizeof (type)) + 8)
#else
#define LSPAN(type)     (round8(sizeof (type)) + 8)
#endif

static void rebuild_objdir(struct as *);
static void prfreecommon(prcommon_t *);
static int praccess(vnode_t *, int, int, cred_t *, caller_context_t *);

static int
propen(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
{
        vnode_t *vp = *vpp;
        prnode_t *pnp = VTOP(vp);
        prcommon_t *pcp = pnp->pr_pcommon;
        prnodetype_t type = pnp->pr_type;
        vnode_t *rvp;
        vtype_t vtype;
        proc_t *p;
        int error = 0;
        prnode_t *npnp = NULL;

        /*
         * Nothing to do for the /proc directory itself.
         */
        if (type == PR_PROCDIR)
                return (0);

        /*
         * If we are opening an underlying mapped object, reject opens
         * for writing regardless of the objects's access modes.
         * If we are opening a file in the /proc/pid/fd directory,
         * reject the open for any but a regular file or directory.
         * Just do it if we are opening the current or root directory.
         */
        switch (type) {
        case PR_OBJECT:
        case PR_FD:
        case PR_CURDIR:
        case PR_ROOTDIR:
                rvp = pnp->pr_realvp;
                vtype = rvp->v_type;
                if ((type == PR_OBJECT && (flag & FWRITE)) ||
                    (type == PR_FD && vtype != VREG && vtype != VDIR))
                        error = EACCES;
                else {
                        /*
                         * Need to hold rvp since VOP_OPEN() may release it.
                         */
                        VN_HOLD(rvp);
                        error = VOP_OPEN(&rvp, flag, cr, ct);
                        if (error) {
                                VN_RELE(rvp);
                        } else {
                                *vpp = rvp;
                                VN_RELE(vp);
                        }
                }
                return (error);
        default:
                break;
        }

        /*
         * If we are opening the pagedata file, allocate a prnode now
         * to avoid calling kmem_alloc() while holding p->p_lock.
         */
        if (type == PR_PAGEDATA || type == PR_OPAGEDATA)
                npnp = prgetnode(vp, type);

        /*
         * If the process exists, lock it now.
         * Otherwise we have a race condition with prclose().
         */
        p = pr_p_lock(pnp);
        mutex_exit(&pr_pidlock);
        if (p == NULL) {
                if (npnp != NULL)
                        prfreenode(npnp);
                return (ENOENT);
        }
        ASSERT(p == pcp->prc_proc);
        ASSERT(p->p_proc_flag & P_PR_LOCK);

        /*
         * Maintain a count of opens for write.  Allow exactly one
         * O_WRITE|O_EXCL request and fail subsequent ones.
         * Don't fail opens of old (bletch!) /proc lwp files.
         * Special case for open by the process itself:
         * Always allow the open by self and discount this
         * open for other opens for writing.
         */
        if (flag & FWRITE) {
                if (p == curproc) {
                        pcp->prc_selfopens++;
                        pnp->pr_flags |= PR_ISSELF;
                } else if (type == PR_LWPIDFILE) {
                        /* EMPTY */;
                } else if (flag & FEXCL) {
                        if (pcp->prc_writers > pcp->prc_selfopens) {
                                error = EBUSY;
                                goto out;
                        }
                        /* semantic for old /proc interface */
                        if (type == PR_PIDDIR)
                                pcp->prc_flags |= PRC_EXCL;
                } else if (pcp->prc_flags & PRC_EXCL) {
                        ASSERT(pcp->prc_writers > pcp->prc_selfopens);
                        error = secpolicy_proc_excl_open(cr);
                        if (error)
                                goto out;
                }
                pcp->prc_writers++;
                /*
                 * The vnode may have become invalid between the
                 * VOP_LOOKUP() of the /proc vnode and the VOP_OPEN().
                 * If so, do now what prinvalidate() should have done.
                 */
                if ((pnp->pr_flags & PR_INVAL) ||
                    (type == PR_PIDDIR &&
                    (VTOP(pnp->pr_pidfile)->pr_flags & PR_INVAL))) {
                        if (p != curproc)
                                pcp->prc_selfopens++;
                        ASSERT(pcp->prc_selfopens <= pcp->prc_writers);
                        if (pcp->prc_selfopens == pcp->prc_writers)
                                pcp->prc_flags &= ~PRC_EXCL;
                }
        }

        /*
         * If this is a large file open, indicate that in our flags -- some
         * procfs structures are not off_t-neutral (e.g., priovec_t), and
         * the open will need to be differentiated where 32-bit processes
         * pass these structures across the user/kernel boundary.
         */
        if (flag & FOFFMAX)
                pnp->pr_flags |= PR_OFFMAX;

        /*
         * Do file-specific things.
         */
        switch (type) {
        default:
                break;
        case PR_PAGEDATA:
        case PR_OPAGEDATA:
                /*
                 * Enable data collection for page data file;
                 * get unique id from the hat layer.
                 */
                {
                        int id;

                        /*
                         * Drop p->p_lock to call hat_startstat()
                         */
                        mutex_exit(&p->p_lock);
                        if ((p->p_flag & SSYS) || p->p_as == &kas ||
                            (id = hat_startstat(p->p_as)) == -1) {
                                mutex_enter(&p->p_lock);
                                error = ENOMEM;
                        } else if (pnp->pr_hatid == 0) {
                                mutex_enter(&p->p_lock);
                                pnp->pr_hatid = (uint_t)id;
                        } else {
                                mutex_enter(&p->p_lock);
                                /*
                                 * Use our newly allocated prnode.
                                 */
                                npnp->pr_hatid = (uint_t)id;
                                /*
                                 * prgetnode() initialized most of the prnode.
                                 * Duplicate the remainder.
                                 */
                                npnp->pr_ino = pnp->pr_ino;
                                npnp->pr_common = pnp->pr_common;
                                npnp->pr_pcommon = pnp->pr_pcommon;
                                npnp->pr_parent = pnp->pr_parent;
                                VN_HOLD(npnp->pr_parent);
                                npnp->pr_index = pnp->pr_index;

                                npnp->pr_next = p->p_plist;
                                p->p_plist = PTOV(npnp);

                                VN_RELE(PTOV(pnp));
                                pnp = npnp;
                                npnp = NULL;
                                *vpp = PTOV(pnp);
                        }
                }
                break;
        }

out:
        prunlock(pnp);

        if (npnp != NULL)
                prfreenode(npnp);
        return (error);
}

/* ARGSUSED */
static int
prclose(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
    caller_context_t *ct)
{
        prnode_t *pnp = VTOP(vp);
        prcommon_t *pcp = pnp->pr_pcommon;
        prnodetype_t type = pnp->pr_type;
        proc_t *p;
        kthread_t *t;
        user_t *up;

        /*
         * Nothing to do for the /proc directory itself.
         */
        if (type == PR_PROCDIR)
                return (0);

        ASSERT(type != PR_OBJECT && type != PR_FD &&
            type != PR_CURDIR && type != PR_ROOTDIR);

        /*
         * If the process exists, lock it now.
         * Otherwise we have a race condition with propen().
         * Hold pr_pidlock across the reference to prc_selfopens,
         * and prc_writers in case there is no process anymore,
         * to cover the case of concurrent calls to prclose()
         * after the process has been reaped by freeproc().
         */
        p = pr_p_lock(pnp);

        /*
         * There is nothing more to do until the last close of
         * the file table entry except to clear the pr_owner
         * field of the prnode and notify any waiters
         * (their file descriptor may have just been closed).
         */
        if (count > 1) {
                mutex_exit(&pr_pidlock);
                if (pnp->pr_owner == curproc && !fisopen(vp))
                        pnp->pr_owner = NULL;
                if (p != NULL) {
                        prnotify(vp);
                        prunlock(pnp);
                }
                return (0);
        }

        /*
         * Decrement the count of self-opens for writing.
         * Decrement the total count of opens for writing.
         * Cancel exclusive opens when only self-opens remain.
         */
        if (flag & FWRITE) {
                /*
                 * prc_selfopens also contains the count of
                 * invalid writers.  See prinvalidate().
                 */
                if ((pnp->pr_flags & (PR_ISSELF|PR_INVAL)) ||
                    (type == PR_PIDDIR &&
                    (VTOP(pnp->pr_pidfile)->pr_flags & PR_INVAL))) {
                        ASSERT(pcp->prc_selfopens != 0);
                        --pcp->prc_selfopens;
                }
                ASSERT(pcp->prc_writers != 0);
                if (--pcp->prc_writers == pcp->prc_selfopens)
                        pcp->prc_flags &= ~PRC_EXCL;
        }
        ASSERT(pcp->prc_writers >= pcp->prc_selfopens);
        mutex_exit(&pr_pidlock);
        if (pnp->pr_owner == curproc && !fisopen(vp))
                pnp->pr_owner = NULL;

        /*
         * If there is no process, there is nothing more to do.
         */
        if (p == NULL)
                return (0);

        ASSERT(p == pcp->prc_proc);
        prnotify(vp);   /* notify waiters */

        /*
         * Do file-specific things.
         */
        switch (type) {
        default:
                break;
        case PR_PAGEDATA:
        case PR_OPAGEDATA:
                /*
                 * This is a page data file.
                 * Free the hat level statistics.
                 * Drop p->p_lock before calling hat_freestat().
                 */
                mutex_exit(&p->p_lock);
                if (p->p_as != &kas && pnp->pr_hatid != 0)
                        hat_freestat(p->p_as, pnp->pr_hatid);
                mutex_enter(&p->p_lock);
                pnp->pr_hatid = 0;
                break;
        }

        /*
         * On last close of all writable file descriptors,
         * perform run-on-last-close and/or kill-on-last-close logic.
         * Can't do this is the /proc agent lwp still exists.
         */
        if (pcp->prc_writers == 0 &&
            p->p_agenttp == NULL &&
            !(pcp->prc_flags & PRC_DESTROY) &&
            p->p_stat != SZOMB &&
            (p->p_proc_flag & (P_PR_RUNLCL|P_PR_KILLCL))) {
                int killproc;

                /*
                 * Cancel any watchpoints currently in effect.
                 * The process might disappear during this operation.
                 */
                if (pr_cancel_watch(pnp) == NULL)
                        return (0);
                /*
                 * If any tracing flags are set, clear them.
                 */
                if (p->p_proc_flag & P_PR_TRACE) {
                        up = PTOU(p);
                        premptyset(&up->u_entrymask);
                        premptyset(&up->u_exitmask);
                        up->u_systrap = 0;
                }
                premptyset(&p->p_sigmask);
                premptyset(&p->p_fltmask);
                killproc = (p->p_proc_flag & P_PR_KILLCL);
                p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE);
                /*
                 * Cancel any outstanding single-step requests.
                 */
                if ((t = p->p_tlist) != NULL) {
                        /*
                         * Drop p_lock because prnostep() touches the stack.
                         * The loop is safe because the process is P_PR_LOCK'd.
                         */
                        mutex_exit(&p->p_lock);
                        do {
                                prnostep(ttolwp(t));
                        } while ((t = t->t_forw) != p->p_tlist);
                        mutex_enter(&p->p_lock);
                }
                /*
                 * Set runnable all lwps stopped by /proc.
                 */
                if (killproc)
                        sigtoproc(p, NULL, SIGKILL);
                else
                        allsetrun(p);
        }

        prunlock(pnp);
        return (0);
}

/*
 * Array of read functions, indexed by /proc file type.
 */
static int pr_read_inval(), pr_read_as(), pr_read_status(),
        pr_read_lstatus(), pr_read_psinfo(), pr_read_lpsinfo(),
        pr_read_map(), pr_read_rmap(), pr_read_xmap(),
        pr_read_cred(), pr_read_sigact(), pr_read_auxv(),
#if defined(__x86)
        pr_read_ldt(),
#endif
        pr_read_usage(), pr_read_lusage(), pr_read_pagedata(),
        pr_read_watch(), pr_read_lwpstatus(), pr_read_lwpsinfo(),
        pr_read_lwpusage(), pr_read_lwpname(),
        pr_read_xregs(), pr_read_priv(),
        pr_read_spymaster(), pr_read_secflags(),
#if defined(__sparc)
        pr_read_gwindows(), pr_read_asrs(),
#endif
        pr_read_piddir(), pr_read_pidfile(), pr_read_opagedata(),
        pr_read_fdinfo();

static int (*pr_read_function[PR_NFILES])() = {
        pr_read_inval,          /* /proc                                */
        pr_read_inval,          /* /proc/self                           */
        pr_read_piddir,         /* /proc/<pid> (old /proc read())       */
        pr_read_as,             /* /proc/<pid>/as                       */
        pr_read_inval,          /* /proc/<pid>/ctl                      */
        pr_read_status,         /* /proc/<pid>/status                   */
        pr_read_lstatus,        /* /proc/<pid>/lstatus                  */
        pr_read_psinfo,         /* /proc/<pid>/psinfo                   */
        pr_read_lpsinfo,        /* /proc/<pid>/lpsinfo                  */
        pr_read_map,            /* /proc/<pid>/map                      */
        pr_read_rmap,           /* /proc/<pid>/rmap                     */
        pr_read_xmap,           /* /proc/<pid>/xmap                     */
        pr_read_cred,           /* /proc/<pid>/cred                     */
        pr_read_sigact,         /* /proc/<pid>/sigact                   */
        pr_read_auxv,           /* /proc/<pid>/auxv                     */
#if defined(__x86)
        pr_read_ldt,            /* /proc/<pid>/ldt                      */
#endif
        pr_read_usage,          /* /proc/<pid>/usage                    */
        pr_read_lusage,         /* /proc/<pid>/lusage                   */
        pr_read_pagedata,       /* /proc/<pid>/pagedata                 */
        pr_read_watch,          /* /proc/<pid>/watch                    */
        pr_read_inval,          /* /proc/<pid>/cwd                      */
        pr_read_inval,          /* /proc/<pid>/root                     */
        pr_read_inval,          /* /proc/<pid>/fd                       */
        pr_read_inval,          /* /proc/<pid>/fd/nn                    */
        pr_read_inval,          /* /proc/<pid>/fdinfo                   */
        pr_read_fdinfo,         /* /proc/<pid>/fdinfo/nn                */
        pr_read_inval,          /* /proc/<pid>/object                   */
        pr_read_inval,          /* /proc/<pid>/object/xxx               */
        pr_read_inval,          /* /proc/<pid>/lwp                      */
        pr_read_inval,          /* /proc/<pid>/lwp/<lwpid>              */
        pr_read_inval,          /* /proc/<pid>/lwp/<lwpid>/lwpctl       */
        pr_read_lwpname,        /* /proc/<pid>/lwp/<lwpid>/lwpname      */
        pr_read_lwpstatus,      /* /proc/<pid>/lwp/<lwpid>/lwpstatus    */
        pr_read_lwpsinfo,       /* /proc/<pid>/lwp/<lwpid>/lwpsinfo     */
        pr_read_lwpusage,       /* /proc/<pid>/lwp/<lwpid>/lwpusage     */
        pr_read_xregs,          /* /proc/<pid>/lwp/<lwpid>/xregs        */
        pr_read_inval,          /* /proc/<pid>/lwp/<lwpid>/templates    */
        pr_read_inval,          /* /proc/<pid>/lwp/<lwpid>/templates/<id> */
        pr_read_spymaster,      /* /proc/<pid>/lwp/<lwpid>/spymaster    */
#if defined(__sparc)
        pr_read_gwindows,       /* /proc/<pid>/lwp/<lwpid>/gwindows     */
        pr_read_asrs,           /* /proc/<pid>/lwp/<lwpid>/asrs         */
#endif
        pr_read_priv,           /* /proc/<pid>/priv                     */
        pr_read_inval,          /* /proc/<pid>/path                     */
        pr_read_inval,          /* /proc/<pid>/path/xxx                 */
        pr_read_inval,          /* /proc/<pid>/contracts                */
        pr_read_inval,          /* /proc/<pid>/contracts/<ctid>         */
        pr_read_secflags,       /* /proc/<pid>/secflags                 */
        pr_read_pidfile,        /* old process file                     */
        pr_read_pidfile,        /* old lwp file                         */
        pr_read_opagedata,      /* old pagedata file                    */
};

/* ARGSUSED */
static int
pr_read_inval(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        /*
         * No read() on any /proc directory, use getdents(2) instead.
         * Cannot read a control file either.
         * An underlying mapped object file cannot get here.
         */
        return (EINVAL);
}

static int
pr_uioread(void *base, long count, uio_t *uiop)
{
        int error = 0;

        ASSERT(count >= 0);
        count -= uiop->uio_offset;
        if (count > 0 && uiop->uio_offset >= 0) {
                error = uiomove((char *)base + uiop->uio_offset,
                    count, UIO_READ, uiop);
        }

        return (error);
}

static int
pr_read_as(prnode_t *pnp, uio_t *uiop)
{
        int error;

        ASSERT(pnp->pr_type == PR_AS);

        if ((error = prlock(pnp, ZNO)) == 0) {
                proc_t *p = pnp->pr_common->prc_proc;
                struct as *as = p->p_as;

                /*
                 * /proc I/O cannot be done to a system process.
                 * A 32-bit process cannot read a 64-bit process.
                 */
                if ((p->p_flag & SSYS) || as == &kas) {
                        error = 0;
#ifdef _SYSCALL32_IMPL
                } else if (curproc->p_model == DATAMODEL_ILP32 &&
                    PROCESS_NOT_32BIT(p)) {
                        error = EOVERFLOW;
#endif
                } else {
                        /*
                         * We don't hold p_lock over an i/o operation because
                         * that could lead to deadlock with the clock thread.
                         */
                        mutex_exit(&p->p_lock);
                        error = prusrio(p, UIO_READ, uiop, 0);
                        mutex_enter(&p->p_lock);
                }
                prunlock(pnp);
        }

        return (error);
}

static int
pr_read_status(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        pstatus_t *sp;
        int error;

        ASSERT(pnp->pr_type == PR_STATUS);

        /*
         * We kmem_alloc() the pstatus structure because
         * it is so big it might blow the kernel stack.
         */
        sp = kmem_alloc(sizeof (*sp), KM_SLEEP);
        if ((error = prlock(pnp, ZNO)) == 0) {
                prgetstatus(pnp->pr_common->prc_proc, sp, VTOZONE(PTOV(pnp)));
                prunlock(pnp);
                error = pr_uioread(sp, sizeof (*sp), uiop);
        }
        kmem_free(sp, sizeof (*sp));
        return (error);
}

static int
pr_read_lstatus(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        proc_t *p;
        kthread_t *t;
        lwpdir_t *ldp;
        size_t size;
        prheader_t *php;
        lwpstatus_t *sp;
        int error;
        int nlwp;
        int i;

        ASSERT(pnp->pr_type == PR_LSTATUS);

        if ((error = prlock(pnp, ZNO)) != 0)
                return (error);
        p = pnp->pr_common->prc_proc;
        nlwp = p->p_lwpcnt;
        size = sizeof (prheader_t) + nlwp * LSPAN(lwpstatus_t);

        /* drop p->p_lock to do kmem_alloc(KM_SLEEP) */
        mutex_exit(&p->p_lock);
        php = kmem_zalloc(size, KM_SLEEP);
        mutex_enter(&p->p_lock);
        /* p->p_lwpcnt can't change while process is locked */
        ASSERT(nlwp == p->p_lwpcnt);

        php->pr_nent = nlwp;
        php->pr_entsize = LSPAN(lwpstatus_t);

        sp = (lwpstatus_t *)(php + 1);
        for (ldp = p->p_lwpdir, i = 0; i < p->p_lwpdir_sz; i++, ldp++) {
                if (ldp->ld_entry == NULL ||
                    (t = ldp->ld_entry->le_thread) == NULL)
                        continue;
                prgetlwpstatus(t, sp, VTOZONE(PTOV(pnp)));
                sp = (lwpstatus_t *)((caddr_t)sp + LSPAN(lwpstatus_t));
        }
        prunlock(pnp);

        error = pr_uioread(php, size, uiop);
        kmem_free(php, size);
        return (error);
}

static int
pr_read_psinfo(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        psinfo_t psinfo;
        proc_t *p;
        int error = 0;

        ASSERT(pnp->pr_type == PR_PSINFO);

        /*
         * We don't want the full treatment of prlock(pnp) here.
         * This file is world-readable and never goes invalid.
         * It doesn't matter if we are in the middle of an exec().
         */
        p = pr_p_lock(pnp);
        mutex_exit(&pr_pidlock);
        if (p == NULL)
                error = ENOENT;
        else {
                ASSERT(p == pnp->pr_common->prc_proc);
                prgetpsinfo(p, &psinfo);
                prunlock(pnp);
                error = pr_uioread(&psinfo, sizeof (psinfo), uiop);
        }
        return (error);
}

static int
pr_read_fdinfo(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        prfdinfo_t *fdinfo;
        list_t data;
        proc_t *p;
        uint_t fd;
        file_t *fp;
        short ufp_flag;
        int error = 0;

        ASSERT(pnp->pr_type == PR_FDINFO);

        /*
         * This is a guess at the size of the structure that needs to
         * be returned. It's a balance between not allocating too much more
         * space than is required and not requiring too many subsequent
         * reallocations. Allocate it before acquiring the process lock.
         */
        pr_iol_initlist(&data, sizeof (prfdinfo_t) + MAXPATHLEN + 2, 1);

        if ((error = prlock(pnp, ZNO)) != 0) {
                pr_iol_freelist(&data);
                return (error);
        }

        p = pnp->pr_common->prc_proc;

        if ((p->p_flag & SSYS) || p->p_as == &kas) {
                prunlock(pnp);
                pr_iol_freelist(&data);
                return (0);
        }

        fd = pnp->pr_index;

        /* Fetch and lock the file_t for this descriptor */
        fp = pr_getf(p, fd, &ufp_flag);

        if (fp == NULL) {
                error = ENOENT;
                prunlock(pnp);
                goto out;
        }

        /*
         * For fdinfo, we don't want to include the placeholder pr_misc at the
         * end of the struct. We'll terminate the data with an empty pr_misc
         * header before returning.
         */

        fdinfo = pr_iol_newbuf(&data, offsetof(prfdinfo_t, pr_misc));
        fdinfo->pr_fd = fd;
        fdinfo->pr_fdflags = ufp_flag;
        fdinfo->pr_fileflags = fp->f_flag2 << 16 | fp->f_flag;
        if ((fdinfo->pr_fileflags & (FSEARCH | FEXEC)) == 0)
                fdinfo->pr_fileflags += FOPEN;
        fdinfo->pr_offset = fp->f_offset;
        /*
         * Information from the vnode (rather than the file_t) is retrieved
         * later, in prgetfdinfo() - for example sock_getfasync()
         */

        prunlock(pnp);

        error = prgetfdinfo(p, fp->f_vnode, fdinfo, cr, fp->f_cred, &data);

        pr_releasef(fp);

out:
        if (error == 0)
                error = pr_iol_uiomove_and_free(&data, uiop, error);
        else
                pr_iol_freelist(&data);

        return (error);
}

static int
pr_read_lpsinfo(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        proc_t *p;
        kthread_t *t;
        lwpdir_t *ldp;
        lwpent_t *lep;
        size_t size;
        prheader_t *php;
        lwpsinfo_t *sp;
        int error;
        int nlwp;
        int i;

        ASSERT(pnp->pr_type == PR_LPSINFO);

        /*
         * We don't want the full treatment of prlock(pnp) here.
         * This file is world-readable and never goes invalid.
         * It doesn't matter if we are in the middle of an exec().
         */
        p = pr_p_lock(pnp);
        mutex_exit(&pr_pidlock);
        if (p == NULL)
                return (ENOENT);
        ASSERT(p == pnp->pr_common->prc_proc);
        if ((nlwp = p->p_lwpcnt + p->p_zombcnt) == 0) {
                prunlock(pnp);
                return (ENOENT);
        }
        size = sizeof (prheader_t) + nlwp * LSPAN(lwpsinfo_t);

        /* drop p->p_lock to do kmem_alloc(KM_SLEEP) */
        mutex_exit(&p->p_lock);
        php = kmem_zalloc(size, KM_SLEEP);
        mutex_enter(&p->p_lock);
        /* p->p_lwpcnt can't change while process is locked */
        ASSERT(nlwp == p->p_lwpcnt + p->p_zombcnt);

        php->pr_nent = nlwp;
        php->pr_entsize = LSPAN(lwpsinfo_t);

        sp = (lwpsinfo_t *)(php + 1);
        for (ldp = p->p_lwpdir, i = 0; i < p->p_lwpdir_sz; i++, ldp++) {
                if ((lep = ldp->ld_entry) == NULL)
                        continue;
                if ((t = lep->le_thread) != NULL)
                        prgetlwpsinfo(t, sp);
                else {
                        bzero(sp, sizeof (*sp));
                        sp->pr_lwpid = lep->le_lwpid;
                        sp->pr_state = SZOMB;
                        sp->pr_sname = 'Z';
                        sp->pr_start.tv_sec = lep->le_start;
                        sp->pr_bindpro = PBIND_NONE;
                        sp->pr_bindpset = PS_NONE;
                }
                sp = (lwpsinfo_t *)((caddr_t)sp + LSPAN(lwpsinfo_t));
        }
        prunlock(pnp);

        error = pr_uioread(php, size, uiop);
        kmem_free(php, size);
        return (error);
}

static int
pr_read_map_common(prnode_t *pnp, uio_t *uiop, prnodetype_t type)
{
        proc_t *p;
        struct as *as;
        list_t iolhead;
        int error;

readmap_common:
        if ((error = prlock(pnp, ZNO)) != 0)
                return (error);

        p = pnp->pr_common->prc_proc;
        as = p->p_as;

        if ((p->p_flag & SSYS) || as == &kas) {
                prunlock(pnp);
                return (0);
        }

        if (!AS_LOCK_TRYENTER(as, RW_WRITER)) {
                prunlock(pnp);
                delay(1);
                goto readmap_common;
        }
        mutex_exit(&p->p_lock);

        switch (type) {
        case PR_XMAP:
                error = prgetxmap(p, &iolhead);
                break;
        case PR_RMAP:
                error = prgetmap(p, 1, &iolhead);
                break;
        case PR_MAP:
                error = prgetmap(p, 0, &iolhead);
                break;
        }

        AS_LOCK_EXIT(as);
        mutex_enter(&p->p_lock);
        prunlock(pnp);

        error = pr_iol_uiomove_and_free(&iolhead, uiop, error);

        return (error);
}

static int
pr_read_map(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        ASSERT(pnp->pr_type == PR_MAP);
        return (pr_read_map_common(pnp, uiop, pnp->pr_type));
}

static int
pr_read_rmap(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        ASSERT(pnp->pr_type == PR_RMAP);
        return (pr_read_map_common(pnp, uiop, pnp->pr_type));
}

static int
pr_read_xmap(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        ASSERT(pnp->pr_type == PR_XMAP);
        return (pr_read_map_common(pnp, uiop, pnp->pr_type));
}

static int
pr_read_cred(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        proc_t *p;
        prcred_t *pcrp;
        int error;
        size_t count;

        ASSERT(pnp->pr_type == PR_CRED);

        /*
         * We kmem_alloc() the prcred_t structure because
         * the number of supplementary groups is variable.
         */
        pcrp =
            kmem_zalloc(sizeof (prcred_t) + sizeof (gid_t) * (ngroups_max - 1),
            KM_SLEEP);

        if ((error = prlock(pnp, ZNO)) != 0)
                goto out;
        p = pnp->pr_common->prc_proc;
        ASSERT(p != NULL);

        prgetcred(p, pcrp);
        prunlock(pnp);

        count = sizeof (prcred_t);
        if (pcrp->pr_ngroups > 1)
                count += sizeof (gid_t) * (pcrp->pr_ngroups - 1);
        error = pr_uioread(pcrp, count, uiop);
out:
        kmem_free(pcrp, sizeof (prcred_t) + sizeof (gid_t) * (ngroups_max - 1));
        return (error);
}

static int
pr_read_priv(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        proc_t *p;
        size_t psize = prgetprivsize();
        prpriv_t *ppriv = kmem_zalloc(psize, KM_SLEEP);
        int error;

        ASSERT(pnp->pr_type == PR_PRIV);

        if ((error = prlock(pnp, ZNO)) != 0)
                goto out;
        p = pnp->pr_common->prc_proc;
        ASSERT(p != NULL);

        prgetpriv(p, ppriv);
        prunlock(pnp);

        error = pr_uioread(ppriv, psize, uiop);
out:
        kmem_free(ppriv, psize);
        return (error);
}

static int
pr_read_sigact(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
        proc_t *p;
        struct sigaction *sap;
        int sig;
        int error;
        user_t *up;

        ASSERT(pnp->pr_type == PR_SIGACT);

        /*
         * We kmem_alloc() the sigaction array because
         * it is so big it might blow the kernel stack.
         */
        sap = kmem_alloc((nsig-1) * sizeof (struct sigaction), KM_SLEEP);

        if ((error = prlock(pnp, ZNO)) != 0)
                goto out;
        p = pnp->pr_common->prc_proc;
        ASSERT(p != NULL);

        if (uiop->uio_offset >= (nsig-1)*sizeof (struct sigaction)) {
                prunlock(pnp);
                goto out;
        }

        up = PTOU(p);
        for (sig = 1; sig < nsig; sig++)
                prgetaction(p, up, sig, &sap[sig-1]);
        prunlock(pnp);

        error = pr_uioread(sap, (nsig - 1) * sizeof (struct sigaction), uiop);
out:
        kmem_free(sap, (nsig-1) * sizeof (struct sigaction));
        return (error);
}

static int
pr_read_auxv(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        auxv_t auxv[__KERN_NAUXV_IMPL];
        proc_t *p;
        user_t *up;
        int error;

        ASSERT(pnp->pr_type == PR_AUXV);

        if ((error = prlock(pnp, ZNO)) != 0)
                return (error);

        if (uiop->uio_offset >= sizeof (auxv)) {
                prunlock(pnp);
                return (0);
        }

        p = pnp->pr_common->prc_proc;
        up = PTOU(p);
        bcopy(up->u_auxv, auxv, sizeof (auxv));
        prunlock(pnp);

        return (pr_uioread(auxv, sizeof (auxv), uiop));
}

#if defined(__x86)
/*
 * XX64
 *      This is almost certainly broken for the amd64 kernel, because
 *      we have two kinds of LDT structures to export -- one for compatibility
 *      mode, and one for long mode, sigh.
 *
 *      For now let's just have a ldt of size 0 for 64-bit processes.
 */
static int
pr_read_ldt(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        proc_t *p;
        struct ssd *ssd;
        size_t size;
        int error;

        ASSERT(pnp->pr_type == PR_LDT);

        if ((error = prlock(pnp, ZNO)) != 0)
                return (error);
        p = pnp->pr_common->prc_proc;

        mutex_exit(&p->p_lock);
        mutex_enter(&p->p_ldtlock);
        size = prnldt(p) * sizeof (struct ssd);
        if (uiop->uio_offset >= size) {
                mutex_exit(&p->p_ldtlock);
                mutex_enter(&p->p_lock);
                prunlock(pnp);
                return (0);
        }

        ssd = kmem_alloc(size, KM_SLEEP);
        prgetldt(p, ssd);
        mutex_exit(&p->p_ldtlock);
        mutex_enter(&p->p_lock);
        prunlock(pnp);

        error = pr_uioread(ssd, size, uiop);
        kmem_free(ssd, size);
        return (error);
}
#endif  /* __x86 */

static int
pr_read_usage(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        prhusage_t *pup;
        prusage_t *upup;
        proc_t *p;
        kthread_t *t;
        int error;

        ASSERT(pnp->pr_type == PR_USAGE);

        /* allocate now, before locking the process */
        pup = kmem_zalloc(sizeof (*pup), KM_SLEEP);
        upup = kmem_alloc(sizeof (*upup), KM_SLEEP);

        /*
         * We don't want the full treatment of prlock(pnp) here.
         * This file is world-readable and never goes invalid.
         * It doesn't matter if we are in the middle of an exec().
         */
        p = pr_p_lock(pnp);
        mutex_exit(&pr_pidlock);
        if (p == NULL) {
                error = ENOENT;
                goto out;
        }
        ASSERT(p == pnp->pr_common->prc_proc);

        if (uiop->uio_offset >= sizeof (prusage_t)) {
                prunlock(pnp);
                error = 0;
                goto out;
        }

        pup->pr_tstamp = gethrtime();

        pup->pr_count  = p->p_defunct;
        pup->pr_create = p->p_mstart;
        pup->pr_term   = p->p_mterm;

        pup->pr_rtime    = p->p_mlreal;
        pup->pr_utime    = p->p_acct[LMS_USER];
        pup->pr_stime    = p->p_acct[LMS_SYSTEM];
        pup->pr_ttime    = p->p_acct[LMS_TRAP];
        pup->pr_tftime   = p->p_acct[LMS_TFAULT];
        pup->pr_dftime   = p->p_acct[LMS_DFAULT];
        pup->pr_kftime   = p->p_acct[LMS_KFAULT];
        pup->pr_ltime    = p->p_acct[LMS_USER_LOCK];
        pup->pr_slptime  = p->p_acct[LMS_SLEEP];
        pup->pr_wtime    = p->p_acct[LMS_WAIT_CPU];
        pup->pr_stoptime = p->p_acct[LMS_STOPPED];

        pup->pr_minf  = p->p_ru.minflt;
        pup->pr_majf  = p->p_ru.majflt;
        pup->pr_nswap = p->p_ru.nswap;
        pup->pr_inblk = p->p_ru.inblock;
        pup->pr_oublk = p->p_ru.oublock;
        pup->pr_msnd  = p->p_ru.msgsnd;
        pup->pr_mrcv  = p->p_ru.msgrcv;
        pup->pr_sigs  = p->p_ru.nsignals;
        pup->pr_vctx  = p->p_ru.nvcsw;
        pup->pr_ictx  = p->p_ru.nivcsw;
        pup->pr_sysc  = p->p_ru.sysc;
        pup->pr_ioch  = p->p_ru.ioch;

        /*
         * Add the usage information for each active lwp.
         */
        if ((t = p->p_tlist) != NULL &&
            !(pnp->pr_pcommon->prc_flags & PRC_DESTROY)) {
                do {
                        if (t->t_proc_flag & TP_LWPEXIT)
                                continue;
                        pup->pr_count++;
                        praddusage(t, pup);
                } while ((t = t->t_forw) != p->p_tlist);
        }

        prunlock(pnp);

        prcvtusage(pup, upup);

        error = pr_uioread(upup, sizeof (prusage_t), uiop);
out:
        kmem_free(pup, sizeof (*pup));
        kmem_free(upup, sizeof (*upup));
        return (error);
}

static int
pr_read_lusage(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        int nlwp;
        prhusage_t *pup;
        prheader_t *php;
        prusage_t *upup;
        size_t size;
        hrtime_t curtime;
        proc_t *p;
        kthread_t *t;
        lwpdir_t *ldp;
        int error;
        int i;

        ASSERT(pnp->pr_type == PR_LUSAGE);

        /*
         * We don't want the full treatment of prlock(pnp) here.
         * This file is world-readable and never goes invalid.
         * It doesn't matter if we are in the middle of an exec().
         */
        p = pr_p_lock(pnp);
        mutex_exit(&pr_pidlock);
        if (p == NULL)
                return (ENOENT);
        ASSERT(p == pnp->pr_common->prc_proc);
        if ((nlwp = p->p_lwpcnt) == 0) {
                prunlock(pnp);
                return (ENOENT);
        }

        size = sizeof (prheader_t) + (nlwp + 1) * LSPAN(prusage_t);
        if (uiop->uio_offset >= size) {
                prunlock(pnp);
                return (0);
        }

        /* drop p->p_lock to do kmem_alloc(KM_SLEEP) */
        mutex_exit(&p->p_lock);
        pup = kmem_zalloc(size + sizeof (prhusage_t), KM_SLEEP);
        mutex_enter(&p->p_lock);
        /* p->p_lwpcnt can't change while process is locked */
        ASSERT(nlwp == p->p_lwpcnt);

        php = (prheader_t *)(pup + 1);
        upup = (prusage_t *)(php + 1);

        php->pr_nent = nlwp + 1;
        php->pr_entsize = LSPAN(prusage_t);

        curtime = gethrtime();

        /*
         * First the summation over defunct lwps.
         */
        pup->pr_count  = p->p_defunct;
        pup->pr_tstamp = curtime;
        pup->pr_create = p->p_mstart;
        pup->pr_term   = p->p_mterm;

        pup->pr_rtime    = p->p_mlreal;
        pup->pr_utime    = p->p_acct[LMS_USER];
        pup->pr_stime    = p->p_acct[LMS_SYSTEM];
        pup->pr_ttime    = p->p_acct[LMS_TRAP];
        pup->pr_tftime   = p->p_acct[LMS_TFAULT];
        pup->pr_dftime   = p->p_acct[LMS_DFAULT];
        pup->pr_kftime   = p->p_acct[LMS_KFAULT];
        pup->pr_ltime    = p->p_acct[LMS_USER_LOCK];
        pup->pr_slptime  = p->p_acct[LMS_SLEEP];
        pup->pr_wtime    = p->p_acct[LMS_WAIT_CPU];
        pup->pr_stoptime = p->p_acct[LMS_STOPPED];

        pup->pr_minf  = p->p_ru.minflt;
        pup->pr_majf  = p->p_ru.majflt;
        pup->pr_nswap = p->p_ru.nswap;
        pup->pr_inblk = p->p_ru.inblock;
        pup->pr_oublk = p->p_ru.oublock;
        pup->pr_msnd  = p->p_ru.msgsnd;
        pup->pr_mrcv  = p->p_ru.msgrcv;
        pup->pr_sigs  = p->p_ru.nsignals;
        pup->pr_vctx  = p->p_ru.nvcsw;
        pup->pr_ictx  = p->p_ru.nivcsw;
        pup->pr_sysc  = p->p_ru.sysc;
        pup->pr_ioch  = p->p_ru.ioch;

        prcvtusage(pup, upup);

        /*
         * Fill one prusage struct for each active lwp.
         */
        for (ldp = p->p_lwpdir, i = 0; i < p->p_lwpdir_sz; i++, ldp++) {
                if (ldp->ld_entry == NULL ||
                    (t = ldp->ld_entry->le_thread) == NULL)
                        continue;
                ASSERT(!(t->t_proc_flag & TP_LWPEXIT));
                ASSERT(nlwp > 0);
                --nlwp;
                upup = (prusage_t *)((caddr_t)upup + LSPAN(prusage_t));
                prgetusage(t, pup);
                prcvtusage(pup, upup);
        }
        ASSERT(nlwp == 0);

        prunlock(pnp);

        error = pr_uioread(php, size, uiop);
        kmem_free(pup, size + sizeof (prhusage_t));
        return (error);
}

static int
pr_read_pagedata(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        proc_t *p;
        int error;

        ASSERT(pnp->pr_type == PR_PAGEDATA);

        if ((error = prlock(pnp, ZNO)) != 0)
                return (error);

        p = pnp->pr_common->prc_proc;
        if ((p->p_flag & SSYS) || p->p_as == &kas) {
                prunlock(pnp);
                return (0);
        }

        mutex_exit(&p->p_lock);
        error = prpdread(p, pnp->pr_hatid, uiop);
        mutex_enter(&p->p_lock);

        prunlock(pnp);
        return (error);
}

static int
pr_read_opagedata(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        proc_t *p;
        struct as *as;
        int error;

        ASSERT(pnp->pr_type == PR_OPAGEDATA);

        if ((error = prlock(pnp, ZNO)) != 0)
                return (error);

        p = pnp->pr_common->prc_proc;
        as = p->p_as;
        if ((p->p_flag & SSYS) || as == &kas) {
                prunlock(pnp);
                return (0);
        }

        mutex_exit(&p->p_lock);
        error = oprpdread(as, pnp->pr_hatid, uiop);
        mutex_enter(&p->p_lock);

        prunlock(pnp);
        return (error);
}

static int
pr_read_watch(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        proc_t *p;
        int error;
        prwatch_t *Bpwp;
        size_t size;
        prwatch_t *pwp;
        int nwarea;
        struct watched_area *pwarea;

        ASSERT(pnp->pr_type == PR_WATCH);

        if ((error = prlock(pnp, ZNO)) != 0)
                return (error);

        p = pnp->pr_common->prc_proc;
        nwarea = avl_numnodes(&p->p_warea);
        size = nwarea * sizeof (prwatch_t);
        if (uiop->uio_offset >= size) {
                prunlock(pnp);
                return (0);
        }

        /* drop p->p_lock to do kmem_alloc(KM_SLEEP) */
        mutex_exit(&p->p_lock);
        Bpwp = pwp = kmem_zalloc(size, KM_SLEEP);
        mutex_enter(&p->p_lock);
        /* p->p_nwarea can't change while process is locked */
        ASSERT(nwarea == avl_numnodes(&p->p_warea));

        /* gather the watched areas */
        for (pwarea = avl_first(&p->p_warea); pwarea != NULL;
            pwarea = AVL_NEXT(&p->p_warea, pwarea), pwp++) {
                pwp->pr_vaddr = (uintptr_t)pwarea->wa_vaddr;
                pwp->pr_size = pwarea->wa_eaddr - pwarea->wa_vaddr;
                pwp->pr_wflags = (int)pwarea->wa_flags;
        }

        prunlock(pnp);

        error = pr_uioread(Bpwp, size, uiop);
        kmem_free(Bpwp, size);
        return (error);
}

static int
pr_read_lwpstatus(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        lwpstatus_t *sp;
        int error;

        ASSERT(pnp->pr_type == PR_LWPSTATUS);

        /*
         * We kmem_alloc() the lwpstatus structure because
         * it is so big it might blow the kernel stack.
         */
        sp = kmem_alloc(sizeof (*sp), KM_SLEEP);

        if ((error = prlock(pnp, ZNO)) != 0)
                goto out;

        if (uiop->uio_offset >= sizeof (*sp)) {
                prunlock(pnp);
                goto out;
        }

        prgetlwpstatus(pnp->pr_common->prc_thread, sp, VTOZONE(PTOV(pnp)));
        prunlock(pnp);

        error = pr_uioread(sp, sizeof (*sp), uiop);
out:
        kmem_free(sp, sizeof (*sp));
        return (error);
}

static int
pr_read_lwpsinfo(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        lwpsinfo_t lwpsinfo;
        proc_t *p;
        kthread_t *t;
        lwpent_t *lep;

        ASSERT(pnp->pr_type == PR_LWPSINFO);

        /*
         * We don't want the full treatment of prlock(pnp) here.
         * This file is world-readable and never goes invalid.
         * It doesn't matter if we are in the middle of an exec().
         */
        p = pr_p_lock(pnp);
        mutex_exit(&pr_pidlock);
        if (p == NULL)
                return (ENOENT);
        ASSERT(p == pnp->pr_common->prc_proc);
        if (pnp->pr_common->prc_tslot == -1) {
                prunlock(pnp);
                return (ENOENT);
        }

        if (uiop->uio_offset >= sizeof (lwpsinfo)) {
                prunlock(pnp);
                return (0);
        }

        if ((t = pnp->pr_common->prc_thread) != NULL)
                prgetlwpsinfo(t, &lwpsinfo);
        else {
                lep = p->p_lwpdir[pnp->pr_common->prc_tslot].ld_entry;
                bzero(&lwpsinfo, sizeof (lwpsinfo));
                lwpsinfo.pr_lwpid = lep->le_lwpid;
                lwpsinfo.pr_state = SZOMB;
                lwpsinfo.pr_sname = 'Z';
                lwpsinfo.pr_start.tv_sec = lep->le_start;
                lwpsinfo.pr_bindpro = PBIND_NONE;
                lwpsinfo.pr_bindpset = PS_NONE;
        }
        prunlock(pnp);

        return (pr_uioread(&lwpsinfo, sizeof (lwpsinfo), uiop));
}

static int
pr_read_lwpusage(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        prhusage_t *pup;
        prusage_t *upup;
        proc_t *p;
        int error;

        ASSERT(pnp->pr_type == PR_LWPUSAGE);

        /* allocate now, before locking the process */
        pup = kmem_zalloc(sizeof (*pup), KM_SLEEP);
        upup = kmem_alloc(sizeof (*upup), KM_SLEEP);

        /*
         * We don't want the full treatment of prlock(pnp) here.
         * This file is world-readable and never goes invalid.
         * It doesn't matter if we are in the middle of an exec().
         */
        p = pr_p_lock(pnp);
        mutex_exit(&pr_pidlock);
        if (p == NULL) {
                error = ENOENT;
                goto out;
        }
        ASSERT(p == pnp->pr_common->prc_proc);
        if (pnp->pr_common->prc_thread == NULL) {
                prunlock(pnp);
                error = ENOENT;
                goto out;
        }
        if (uiop->uio_offset >= sizeof (prusage_t)) {
                prunlock(pnp);
                error = 0;
                goto out;
        }

        pup->pr_tstamp = gethrtime();
        prgetusage(pnp->pr_common->prc_thread, pup);

        prunlock(pnp);

        prcvtusage(pup, upup);

        error = pr_uioread(upup, sizeof (prusage_t), uiop);
out:
        kmem_free(pup, sizeof (*pup));
        kmem_free(upup, sizeof (*upup));
        return (error);
}

static int
pr_read_lwpname(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        char lwpname[THREAD_NAME_MAX];
        kthread_t *t;
        int error;

        ASSERT(pnp->pr_type == PR_LWPNAME);

        if (uiop->uio_offset >= THREAD_NAME_MAX)
                return (0);

        if ((error = prlock(pnp, ZNO)) != 0)
                return (error);

        bzero(lwpname, sizeof (lwpname));

        t = pnp->pr_common->prc_thread;

        if (t->t_name != NULL)
                (void) strlcpy(lwpname, t->t_name, sizeof (lwpname));

        prunlock(pnp);

        return (pr_uioread(lwpname, sizeof (lwpname), uiop));
}

/* ARGSUSED */
static int
pr_read_xregs(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        proc_t *p;
        kthread_t *t;
        int error;
        void *xreg;
        size_t size;

        ASSERT(pnp->pr_type == PR_XREGS);

        if ((error = prlock(pnp, ZNO)) != 0)
                return (error);

        p = pnp->pr_common->prc_proc;
        t = pnp->pr_common->prc_thread;

        /*
         * While we would prefer to do the allocation without holding the
         * process under a prlock(), we can only determine this size while
         * holding the process as the hold guarantees us:
         *
         *  o That the process in question actualy exists.
         *  o That the process in question cannot change the set of FPU features
         *    it has enabled.
         *
         * We will drop p_lock across the allocation call itself. This should be
         * safe as the enabled feature set should not change while the process
         * is locked (e.g. enabling extending FPU state like AMX on x86 should
         * require the process to be locked).
         */
        size = prhasx(p) ? prgetprxregsize(p) : 0;
        if (size == 0) {
                prunlock(pnp);
                return (0);
        }

        /*
         * To read the extended register set we require that the thread be
         * stopped as this state is only valid in the kernel when it is. An
         * exception made if the target thread and the current thread are one
         * and the same. We won't stop you from doing something... weird.
         */
        thread_lock(t);
        if (t != curthread && !ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
                thread_unlock(t);
                prunlock(pnp);
                return (EBUSY);
        }
        thread_unlock(t);

        mutex_exit(&p->p_lock);
        xreg = kmem_zalloc(size, KM_SLEEP);
        mutex_enter(&p->p_lock);
        ASSERT3U(size, ==, prgetprxregsize(p));

        if (uiop->uio_offset >= size) {
                prunlock(pnp);
                goto out;
        }

        /* drop p->p_lock while (possibly) touching the stack */
        mutex_exit(&p->p_lock);
        prgetprxregs(ttolwp(t), xreg);
        mutex_enter(&p->p_lock);
        prunlock(pnp);

        error = pr_uioread(xreg, size, uiop);
out:
        kmem_free(xreg, size);
        return (error);
}

static int
pr_read_spymaster(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        psinfo_t psinfo;
        int error;
        klwp_t *lwp;

        ASSERT(pnp->pr_type == PR_SPYMASTER);

        if ((error = prlock(pnp, ZNO)) != 0)
                return (error);

        if (pnp->pr_common->prc_thread == NULL) {
                prunlock(pnp);
                return (0);
        }

        lwp = pnp->pr_common->prc_thread->t_lwp;

        if (lwp->lwp_spymaster == NULL) {
                prunlock(pnp);
                return (0);
        }

        bcopy(lwp->lwp_spymaster, &psinfo, sizeof (psinfo_t));
        prunlock(pnp);

        return (pr_uioread(&psinfo, sizeof (psinfo), uiop));
}

static int
pr_read_secflags(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        prsecflags_t ret;
        int error;
        proc_t *p;

        ASSERT(pnp->pr_type == PR_SECFLAGS);

        if ((error = prlock(pnp, ZNO)) != 0)
                return (error);

        p = pnp->pr_common->prc_proc;
        prgetsecflags(p, &ret);
        prunlock(pnp);

        return (pr_uioread(&ret, sizeof (ret), uiop));
}

#if defined(__sparc)

static int
pr_read_gwindows(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        proc_t *p;
        kthread_t *t;
        gwindows_t *gwp;
        int error;
        size_t size;

        ASSERT(pnp->pr_type == PR_GWINDOWS);

        gwp = kmem_zalloc(sizeof (gwindows_t), KM_SLEEP);

        if ((error = prlock(pnp, ZNO)) != 0)
                goto out;

        p = pnp->pr_common->prc_proc;
        t = pnp->pr_common->prc_thread;

        /*
         * Drop p->p_lock while touching the stack.
         * The P_PR_LOCK flag prevents the lwp from
         * disappearing while we do this.
         */
        mutex_exit(&p->p_lock);
        if ((size = prnwindows(ttolwp(t))) != 0)
                size = sizeof (gwindows_t) -
                    (SPARC_MAXREGWINDOW - size) * sizeof (struct rwindow);
        if (uiop->uio_offset >= size) {
                mutex_enter(&p->p_lock);
                prunlock(pnp);
                goto out;
        }
        prgetwindows(ttolwp(t), gwp);
        mutex_enter(&p->p_lock);
        prunlock(pnp);

        error = pr_uioread(gwp, size, uiop);
out:
        kmem_free(gwp, sizeof (gwindows_t));
        return (error);
}

/* ARGSUSED */
static int
pr_read_asrs(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        int error;

        ASSERT(pnp->pr_type == PR_ASRS);

        /* the asrs file exists only for sparc v9 _LP64 processes */
        if ((error = prlock(pnp, ZNO)) == 0) {
                proc_t *p = pnp->pr_common->prc_proc;
                kthread_t *t = pnp->pr_common->prc_thread;
                asrset_t asrset;

                if (p->p_model != DATAMODEL_LP64 ||
                    uiop->uio_offset >= sizeof (asrset_t)) {
                        prunlock(pnp);
                        return (0);
                }

                /*
                 * Drop p->p_lock while touching the stack.
                 * The P_PR_LOCK flag prevents the lwp from
                 * disappearing while we do this.
                 */
                mutex_exit(&p->p_lock);
                prgetasregs(ttolwp(t), asrset);
                mutex_enter(&p->p_lock);
                prunlock(pnp);

                error = pr_uioread(&asrset[0], sizeof (asrset_t), uiop);
        }

        return (error);
}

#endif  /* __sparc */

static int
pr_read_piddir(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        ASSERT(pnp->pr_type == PR_PIDDIR);
        ASSERT(pnp->pr_pidfile != NULL);

        /* use the underlying PR_PIDFILE to read the process */
        pnp = VTOP(pnp->pr_pidfile);
        ASSERT(pnp->pr_type == PR_PIDFILE);

        return (pr_read_pidfile(pnp, uiop));
}

static int
pr_read_pidfile(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        int error;

        ASSERT(pnp->pr_type == PR_PIDFILE || pnp->pr_type == PR_LWPIDFILE);

        if ((error = prlock(pnp, ZNO)) == 0) {
                proc_t *p = pnp->pr_common->prc_proc;
                struct as *as = p->p_as;

                if ((p->p_flag & SSYS) || as == &kas) {
                        /*
                         * /proc I/O cannot be done to a system process.
                         */
                        error = EIO;    /* old /proc semantics */
                } else {
                        /*
                         * We drop p_lock because we don't want to hold
                         * it over an I/O operation because that could
                         * lead to deadlock with the clock thread.
                         * The process will not disappear and its address
                         * space will not change because it is marked P_PR_LOCK.
                         */
                        mutex_exit(&p->p_lock);
                        error = prusrio(p, UIO_READ, uiop, 1);
                        mutex_enter(&p->p_lock);
                }
                prunlock(pnp);
        }

        return (error);
}

#ifdef _SYSCALL32_IMPL

/*
 * Array of ILP32 read functions, indexed by /proc file type.
 */
static int pr_read_status_32(),
        pr_read_lstatus_32(), pr_read_psinfo_32(), pr_read_lpsinfo_32(),
        pr_read_map_32(), pr_read_rmap_32(), pr_read_xmap_32(),
        pr_read_sigact_32(), pr_read_auxv_32(),
        pr_read_usage_32(), pr_read_lusage_32(), pr_read_pagedata_32(),
        pr_read_watch_32(), pr_read_lwpstatus_32(), pr_read_lwpsinfo_32(),
        pr_read_lwpusage_32(), pr_read_spymaster_32(),
#if defined(__sparc)
        pr_read_gwindows_32(),
#endif
        pr_read_opagedata_32();

static int (*pr_read_function_32[PR_NFILES])() = {
        pr_read_inval,          /* /proc                                */
        pr_read_inval,          /* /proc/self                           */
        pr_read_piddir,         /* /proc/<pid> (old /proc read())       */
        pr_read_as,             /* /proc/<pid>/as                       */
        pr_read_inval,          /* /proc/<pid>/ctl                      */
        pr_read_status_32,      /* /proc/<pid>/status                   */
        pr_read_lstatus_32,     /* /proc/<pid>/lstatus                  */
        pr_read_psinfo_32,      /* /proc/<pid>/psinfo                   */
        pr_read_lpsinfo_32,     /* /proc/<pid>/lpsinfo                  */
        pr_read_map_32,         /* /proc/<pid>/map                      */
        pr_read_rmap_32,        /* /proc/<pid>/rmap                     */
        pr_read_xmap_32,        /* /proc/<pid>/xmap                     */
        pr_read_cred,           /* /proc/<pid>/cred                     */
        pr_read_sigact_32,      /* /proc/<pid>/sigact                   */
        pr_read_auxv_32,        /* /proc/<pid>/auxv                     */
#if defined(__x86)
        pr_read_ldt,            /* /proc/<pid>/ldt                      */
#endif
        pr_read_usage_32,       /* /proc/<pid>/usage                    */
        pr_read_lusage_32,      /* /proc/<pid>/lusage                   */
        pr_read_pagedata_32,    /* /proc/<pid>/pagedata                 */
        pr_read_watch_32,       /* /proc/<pid>/watch                    */
        pr_read_inval,          /* /proc/<pid>/cwd                      */
        pr_read_inval,          /* /proc/<pid>/root                     */
        pr_read_inval,          /* /proc/<pid>/fd                       */
        pr_read_inval,          /* /proc/<pid>/fd/nn                    */
        pr_read_inval,          /* /proc/<pid>/fdinfo                   */
        pr_read_fdinfo,         /* /proc/<pid>/fdinfo/nn                */
        pr_read_inval,          /* /proc/<pid>/object                   */
        pr_read_inval,          /* /proc/<pid>/object/xxx               */
        pr_read_inval,          /* /proc/<pid>/lwp                      */
        pr_read_inval,          /* /proc/<pid>/lwp/<lwpid>              */
        pr_read_inval,          /* /proc/<pid>/lwp/<lwpid>/lwpctl       */
        pr_read_lwpname,        /* /proc/<pid>/lwp/<lwpid>/lwpname      */
        pr_read_lwpstatus_32,   /* /proc/<pid>/lwp/<lwpid>/lwpstatus    */
        pr_read_lwpsinfo_32,    /* /proc/<pid>/lwp/<lwpid>/lwpsinfo     */
        pr_read_lwpusage_32,    /* /proc/<pid>/lwp/<lwpid>/lwpusage     */
        pr_read_xregs,          /* /proc/<pid>/lwp/<lwpid>/xregs        */
        pr_read_inval,          /* /proc/<pid>/lwp/<lwpid>/templates    */
        pr_read_inval,          /* /proc/<pid>/lwp/<lwpid>/templates/<id> */
        pr_read_spymaster_32,   /* /proc/<pid>/lwp/<lwpid>/spymaster    */
#if defined(__sparc)
        pr_read_gwindows_32,    /* /proc/<pid>/lwp/<lwpid>/gwindows     */
        pr_read_asrs,           /* /proc/<pid>/lwp/<lwpid>/asrs         */
#endif
        pr_read_priv,           /* /proc/<pid>/priv                     */
        pr_read_inval,          /* /proc/<pid>/path                     */
        pr_read_inval,          /* /proc/<pid>/path/xxx                 */
        pr_read_inval,          /* /proc/<pid>/contracts                */
        pr_read_inval,          /* /proc/<pid>/contracts/<ctid>         */
        pr_read_secflags,       /* /proc/<pid>/secflags                 */
        pr_read_pidfile,        /* old process file                     */
        pr_read_pidfile,        /* old lwp file                         */
        pr_read_opagedata_32,   /* old pagedata file                    */
};

static int
pr_read_status_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        pstatus32_t *sp;
        proc_t *p;
        int error;

        ASSERT(pnp->pr_type == PR_STATUS);

        /*
         * We kmem_alloc() the pstatus structure because
         * it is so big it might blow the kernel stack.
         */
        sp = kmem_alloc(sizeof (*sp), KM_SLEEP);
        if ((error = prlock(pnp, ZNO)) == 0) {
                /*
                 * A 32-bit process cannot get the status of a 64-bit process.
                 * The fields for the 64-bit quantities are not large enough.
                 */
                p = pnp->pr_common->prc_proc;
                if (PROCESS_NOT_32BIT(p)) {
                        prunlock(pnp);
                        error = EOVERFLOW;
                } else {
                        prgetstatus32(pnp->pr_common->prc_proc, sp,
                            VTOZONE(PTOV(pnp)));
                        prunlock(pnp);
                        error = pr_uioread(sp, sizeof (*sp), uiop);
                }
        }
        kmem_free((caddr_t)sp, sizeof (*sp));
        return (error);
}

static int
pr_read_lstatus_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        proc_t *p;
        kthread_t *t;
        lwpdir_t *ldp;
        size_t size;
        prheader32_t *php;
        lwpstatus32_t *sp;
        int error;
        int nlwp;
        int i;

        ASSERT(pnp->pr_type == PR_LSTATUS);

        if ((error = prlock(pnp, ZNO)) != 0)
                return (error);
        p = pnp->pr_common->prc_proc;
        /*
         * A 32-bit process cannot get the status of a 64-bit process.
         * The fields for the 64-bit quantities are not large enough.
         */
        if (PROCESS_NOT_32BIT(p)) {
                prunlock(pnp);
                return (EOVERFLOW);
        }
        nlwp = p->p_lwpcnt;
        size = sizeof (prheader32_t) + nlwp * LSPAN32(lwpstatus32_t);

        /* drop p->p_lock to do kmem_alloc(KM_SLEEP) */
        mutex_exit(&p->p_lock);
        php = kmem_zalloc(size, KM_SLEEP);
        mutex_enter(&p->p_lock);
        /* p->p_lwpcnt can't change while process is locked */
        ASSERT(nlwp == p->p_lwpcnt);

        php->pr_nent = nlwp;
        php->pr_entsize = LSPAN32(lwpstatus32_t);

        sp = (lwpstatus32_t *)(php + 1);
        for (ldp = p->p_lwpdir, i = 0; i < p->p_lwpdir_sz; i++, ldp++) {
                if (ldp->ld_entry == NULL ||
                    (t = ldp->ld_entry->le_thread) == NULL)
                        continue;
                prgetlwpstatus32(t, sp, VTOZONE(PTOV(pnp)));
                sp = (lwpstatus32_t *)((caddr_t)sp + LSPAN32(lwpstatus32_t));
        }
        prunlock(pnp);

        error = pr_uioread(php, size, uiop);
        kmem_free(php, size);
        return (error);
}

static int
pr_read_psinfo_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        psinfo32_t psinfo;
        proc_t *p;
        int error = 0;

        ASSERT(pnp->pr_type == PR_PSINFO);

        /*
         * We don't want the full treatment of prlock(pnp) here.
         * This file is world-readable and never goes invalid.
         * It doesn't matter if we are in the middle of an exec().
         */
        p = pr_p_lock(pnp);
        mutex_exit(&pr_pidlock);
        if (p == NULL)
                error = ENOENT;
        else {
                ASSERT(p == pnp->pr_common->prc_proc);
                prgetpsinfo32(p, &psinfo);
                prunlock(pnp);
                error = pr_uioread(&psinfo, sizeof (psinfo), uiop);
        }
        return (error);
}

static int
pr_read_lpsinfo_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        proc_t *p;
        kthread_t *t;
        lwpdir_t *ldp;
        lwpent_t *lep;
        size_t size;
        prheader32_t *php;
        lwpsinfo32_t *sp;
        int error;
        int nlwp;
        int i;

        ASSERT(pnp->pr_type == PR_LPSINFO);

        /*
         * We don't want the full treatment of prlock(pnp) here.
         * This file is world-readable and never goes invalid.
         * It doesn't matter if we are in the middle of an exec().
         */
        p = pr_p_lock(pnp);
        mutex_exit(&pr_pidlock);
        if (p == NULL)
                return (ENOENT);
        ASSERT(p == pnp->pr_common->prc_proc);
        if ((nlwp = p->p_lwpcnt + p->p_zombcnt) == 0) {
                prunlock(pnp);
                return (ENOENT);
        }
        size = sizeof (prheader32_t) + nlwp * LSPAN32(lwpsinfo32_t);

        /* drop p->p_lock to do kmem_alloc(KM_SLEEP) */
        mutex_exit(&p->p_lock);
        php = kmem_zalloc(size, KM_SLEEP);
        mutex_enter(&p->p_lock);
        /* p->p_lwpcnt can't change while process is locked */
        ASSERT(nlwp == p->p_lwpcnt + p->p_zombcnt);

        php->pr_nent = nlwp;
        php->pr_entsize = LSPAN32(lwpsinfo32_t);

        sp = (lwpsinfo32_t *)(php + 1);
        for (ldp = p->p_lwpdir, i = 0; i < p->p_lwpdir_sz; i++, ldp++) {
                if ((lep = ldp->ld_entry) == NULL)
                        continue;
                if ((t = lep->le_thread) != NULL)
                        prgetlwpsinfo32(t, sp);
                else {
                        bzero(sp, sizeof (*sp));
                        sp->pr_lwpid = lep->le_lwpid;
                        sp->pr_state = SZOMB;
                        sp->pr_sname = 'Z';
                        sp->pr_start.tv_sec = (time32_t)lep->le_start;
                }
                sp = (lwpsinfo32_t *)((caddr_t)sp + LSPAN32(lwpsinfo32_t));
        }
        prunlock(pnp);

        error = pr_uioread(php, size, uiop);
        kmem_free(php, size);
        return (error);
}

static int
pr_read_map_common_32(prnode_t *pnp, uio_t *uiop, prnodetype_t type)
{
        proc_t *p;
        struct as *as;
        list_t  iolhead;
        int error;

readmap32_common:
        if ((error = prlock(pnp, ZNO)) != 0)
                return (error);

        p = pnp->pr_common->prc_proc;
        as = p->p_as;

        if ((p->p_flag & SSYS) || as == &kas) {
                prunlock(pnp);
                return (0);
        }

        if (PROCESS_NOT_32BIT(p)) {
                prunlock(pnp);
                return (EOVERFLOW);
        }

        if (!AS_LOCK_TRYENTER(as, RW_WRITER)) {
                prunlock(pnp);
                delay(1);
                goto readmap32_common;
        }
        mutex_exit(&p->p_lock);

        switch (type) {
        case PR_XMAP:
                error = prgetxmap32(p, &iolhead);
                break;
        case PR_RMAP:
                error = prgetmap32(p, 1, &iolhead);
                break;
        case PR_MAP:
                error = prgetmap32(p, 0, &iolhead);
                break;
        }
        AS_LOCK_EXIT(as);
        mutex_enter(&p->p_lock);
        prunlock(pnp);

        error = pr_iol_uiomove_and_free(&iolhead, uiop, error);

        return (error);
}

static int
pr_read_map_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        ASSERT(pnp->pr_type == PR_MAP);
        return (pr_read_map_common_32(pnp, uiop, pnp->pr_type));
}

static int
pr_read_rmap_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        ASSERT(pnp->pr_type == PR_RMAP);
        return (pr_read_map_common_32(pnp, uiop, pnp->pr_type));
}

static int
pr_read_xmap_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        ASSERT(pnp->pr_type == PR_XMAP);
        return (pr_read_map_common_32(pnp, uiop, pnp->pr_type));
}

static int
pr_read_sigact_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
        proc_t *p;
        struct sigaction32 *sap;
        int sig;
        int error;
        user_t *up;

        ASSERT(pnp->pr_type == PR_SIGACT);

        /*
         * We kmem_alloc() the sigaction32 array because
         * it is so big it might blow the kernel stack.
         */
        sap = kmem_alloc((nsig-1) * sizeof (struct sigaction32), KM_SLEEP);

        if ((error = prlock(pnp, ZNO)) != 0)
                goto out;
        p = pnp->pr_common->prc_proc;

        if (PROCESS_NOT_32BIT(p)) {
                prunlock(pnp);
                error = EOVERFLOW;
                goto out;
        }

        if (uiop->uio_offset >= (nsig-1) * sizeof (struct sigaction32)) {
                prunlock(pnp);
                goto out;
        }

        up = PTOU(p);
        for (sig = 1; sig < nsig; sig++)
                prgetaction32(p, up, sig, &sap[sig-1]);
        prunlock(pnp);

        error = pr_uioread(sap, (nsig - 1) * sizeof (struct sigaction32), uiop);
out:
        kmem_free(sap, (nsig-1) * sizeof (struct sigaction32));
        return (error);
}

static int
pr_read_auxv_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        auxv32_t auxv[__KERN_NAUXV_IMPL];
        proc_t *p;
        user_t *up;
        int error;
        int i;

        ASSERT(pnp->pr_type == PR_AUXV);

        if ((error = prlock(pnp, ZNO)) != 0)
                return (error);
        p = pnp->pr_common->prc_proc;

        if (PROCESS_NOT_32BIT(p)) {
                prunlock(pnp);
                return (EOVERFLOW);
        }

        if (uiop->uio_offset >= sizeof (auxv)) {
                prunlock(pnp);
                return (0);
        }

        up = PTOU(p);
        for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
                auxv[i].a_type = (int32_t)up->u_auxv[i].a_type;
                auxv[i].a_un.a_val = (int32_t)up->u_auxv[i].a_un.a_val;
        }
        prunlock(pnp);

        return (pr_uioread(auxv, sizeof (auxv), uiop));
}

static int
pr_read_usage_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        prhusage_t *pup;
        prusage32_t *upup;
        proc_t *p;
        kthread_t *t;
        int error;

        ASSERT(pnp->pr_type == PR_USAGE);

        /* allocate now, before locking the process */
        pup = kmem_zalloc(sizeof (*pup), KM_SLEEP);
        upup = kmem_alloc(sizeof (*upup), KM_SLEEP);

        /*
         * We don't want the full treatment of prlock(pnp) here.
         * This file is world-readable and never goes invalid.
         * It doesn't matter if we are in the middle of an exec().
         */
        p = pr_p_lock(pnp);
        mutex_exit(&pr_pidlock);
        if (p == NULL) {
                error = ENOENT;
                goto out;
        }
        ASSERT(p == pnp->pr_common->prc_proc);

        if (uiop->uio_offset >= sizeof (prusage32_t)) {
                prunlock(pnp);
                error = 0;
                goto out;
        }

        pup->pr_tstamp = gethrtime();

        pup->pr_count  = p->p_defunct;
        pup->pr_create = p->p_mstart;
        pup->pr_term   = p->p_mterm;

        pup->pr_rtime    = p->p_mlreal;
        pup->pr_utime    = p->p_acct[LMS_USER];
        pup->pr_stime    = p->p_acct[LMS_SYSTEM];
        pup->pr_ttime    = p->p_acct[LMS_TRAP];
        pup->pr_tftime   = p->p_acct[LMS_TFAULT];
        pup->pr_dftime   = p->p_acct[LMS_DFAULT];
        pup->pr_kftime   = p->p_acct[LMS_KFAULT];
        pup->pr_ltime    = p->p_acct[LMS_USER_LOCK];
        pup->pr_slptime  = p->p_acct[LMS_SLEEP];
        pup->pr_wtime    = p->p_acct[LMS_WAIT_CPU];
        pup->pr_stoptime = p->p_acct[LMS_STOPPED];

        pup->pr_minf  = p->p_ru.minflt;
        pup->pr_majf  = p->p_ru.majflt;
        pup->pr_nswap = p->p_ru.nswap;
        pup->pr_inblk = p->p_ru.inblock;
        pup->pr_oublk = p->p_ru.oublock;
        pup->pr_msnd  = p->p_ru.msgsnd;
        pup->pr_mrcv  = p->p_ru.msgrcv;
        pup->pr_sigs  = p->p_ru.nsignals;
        pup->pr_vctx  = p->p_ru.nvcsw;
        pup->pr_ictx  = p->p_ru.nivcsw;
        pup->pr_sysc  = p->p_ru.sysc;
        pup->pr_ioch  = p->p_ru.ioch;

        /*
         * Add the usage information for each active lwp.
         */
        if ((t = p->p_tlist) != NULL &&
            !(pnp->pr_pcommon->prc_flags & PRC_DESTROY)) {
                do {
                        if (t->t_proc_flag & TP_LWPEXIT)
                                continue;
                        pup->pr_count++;
                        praddusage(t, pup);
                } while ((t = t->t_forw) != p->p_tlist);
        }

        prunlock(pnp);

        prcvtusage32(pup, upup);

        error = pr_uioread(upup, sizeof (prusage32_t), uiop);
out:
        kmem_free(pup, sizeof (*pup));
        kmem_free(upup, sizeof (*upup));
        return (error);
}

static int
pr_read_lusage_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        int nlwp;
        prhusage_t *pup;
        prheader32_t *php;
        prusage32_t *upup;
        size_t size;
        hrtime_t curtime;
        proc_t *p;
        kthread_t *t;
        lwpdir_t *ldp;
        int error;
        int i;

        ASSERT(pnp->pr_type == PR_LUSAGE);

        /*
         * We don't want the full treatment of prlock(pnp) here.
         * This file is world-readable and never goes invalid.
         * It doesn't matter if we are in the middle of an exec().
         */
        p = pr_p_lock(pnp);
        mutex_exit(&pr_pidlock);
        if (p == NULL)
                return (ENOENT);
        ASSERT(p == pnp->pr_common->prc_proc);
        if ((nlwp = p->p_lwpcnt) == 0) {
                prunlock(pnp);
                return (ENOENT);
        }

        size = sizeof (prheader32_t) + (nlwp + 1) * LSPAN32(prusage32_t);
        if (uiop->uio_offset >= size) {
                prunlock(pnp);
                return (0);
        }

        /* drop p->p_lock to do kmem_alloc(KM_SLEEP) */
        mutex_exit(&p->p_lock);
        pup = kmem_zalloc(size + sizeof (prhusage_t), KM_SLEEP);
        mutex_enter(&p->p_lock);
        /* p->p_lwpcnt can't change while process is locked */
        ASSERT(nlwp == p->p_lwpcnt);

        php = (prheader32_t *)(pup + 1);
        upup = (prusage32_t *)(php + 1);

        php->pr_nent = nlwp + 1;
        php->pr_entsize = LSPAN32(prusage32_t);

        curtime = gethrtime();

        /*
         * First the summation over defunct lwps.
         */
        pup->pr_count  = p->p_defunct;
        pup->pr_tstamp = curtime;
        pup->pr_create = p->p_mstart;
        pup->pr_term   = p->p_mterm;

        pup->pr_rtime    = p->p_mlreal;
        pup->pr_utime    = p->p_acct[LMS_USER];
        pup->pr_stime    = p->p_acct[LMS_SYSTEM];
        pup->pr_ttime    = p->p_acct[LMS_TRAP];
        pup->pr_tftime   = p->p_acct[LMS_TFAULT];
        pup->pr_dftime   = p->p_acct[LMS_DFAULT];
        pup->pr_kftime   = p->p_acct[LMS_KFAULT];
        pup->pr_ltime    = p->p_acct[LMS_USER_LOCK];
        pup->pr_slptime  = p->p_acct[LMS_SLEEP];
        pup->pr_wtime    = p->p_acct[LMS_WAIT_CPU];
        pup->pr_stoptime = p->p_acct[LMS_STOPPED];

        pup->pr_minf  = p->p_ru.minflt;
        pup->pr_majf  = p->p_ru.majflt;
        pup->pr_nswap = p->p_ru.nswap;
        pup->pr_inblk = p->p_ru.inblock;
        pup->pr_oublk = p->p_ru.oublock;
        pup->pr_msnd  = p->p_ru.msgsnd;
        pup->pr_mrcv  = p->p_ru.msgrcv;
        pup->pr_sigs  = p->p_ru.nsignals;
        pup->pr_vctx  = p->p_ru.nvcsw;
        pup->pr_ictx  = p->p_ru.nivcsw;
        pup->pr_sysc  = p->p_ru.sysc;
        pup->pr_ioch  = p->p_ru.ioch;

        prcvtusage32(pup, upup);

        /*
         * Fill one prusage struct for each active lwp.
         */
        for (ldp = p->p_lwpdir, i = 0; i < p->p_lwpdir_sz; i++, ldp++) {
                if (ldp->ld_entry == NULL ||
                    (t = ldp->ld_entry->le_thread) == NULL)
                        continue;
                ASSERT(!(t->t_proc_flag & TP_LWPEXIT));
                ASSERT(nlwp > 0);
                --nlwp;
                upup = (prusage32_t *)
                    ((caddr_t)upup + LSPAN32(prusage32_t));
                prgetusage(t, pup);
                prcvtusage32(pup, upup);
        }
        ASSERT(nlwp == 0);

        prunlock(pnp);

        error = pr_uioread(php, size, uiop);
        kmem_free(pup, size + sizeof (prhusage_t));
        return (error);
}

static int
pr_read_pagedata_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        proc_t *p;
        int error;

        ASSERT(pnp->pr_type == PR_PAGEDATA);

        if ((error = prlock(pnp, ZNO)) != 0)
                return (error);

        p = pnp->pr_common->prc_proc;
        if ((p->p_flag & SSYS) || p->p_as == &kas) {
                prunlock(pnp);
                return (0);
        }

        if (PROCESS_NOT_32BIT(p)) {
                prunlock(pnp);
                return (EOVERFLOW);
        }

        mutex_exit(&p->p_lock);
        error = prpdread32(p, pnp->pr_hatid, uiop);
        mutex_enter(&p->p_lock);

        prunlock(pnp);
        return (error);
}

static int
pr_read_opagedata_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        proc_t *p;
        struct as *as;
        int error;

        ASSERT(pnp->pr_type == PR_OPAGEDATA);

        if ((error = prlock(pnp, ZNO)) != 0)
                return (error);

        p = pnp->pr_common->prc_proc;
        as = p->p_as;

        if ((p->p_flag & SSYS) || as == &kas) {
                prunlock(pnp);
                return (0);
        }

        if (PROCESS_NOT_32BIT(p)) {
                prunlock(pnp);
                return (EOVERFLOW);
        }

        mutex_exit(&p->p_lock);
        error = oprpdread32(as, pnp->pr_hatid, uiop);
        mutex_enter(&p->p_lock);

        prunlock(pnp);
        return (error);
}

static int
pr_read_watch_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        proc_t *p;
        int error;
        prwatch32_t *Bpwp;
        size_t size;
        prwatch32_t *pwp;
        int nwarea;
        struct watched_area *pwarea;

        ASSERT(pnp->pr_type == PR_WATCH);

        if ((error = prlock(pnp, ZNO)) != 0)
                return (error);

        p = pnp->pr_common->prc_proc;
        if (PROCESS_NOT_32BIT(p)) {
                prunlock(pnp);
                return (EOVERFLOW);
        }
        nwarea = avl_numnodes(&p->p_warea);
        size = nwarea * sizeof (prwatch32_t);
        if (uiop->uio_offset >= size) {
                prunlock(pnp);
                return (0);
        }

        /* drop p->p_lock to do kmem_alloc(KM_SLEEP) */
        mutex_exit(&p->p_lock);
        Bpwp = pwp = kmem_zalloc(size, KM_SLEEP);
        mutex_enter(&p->p_lock);
        /* p->p_nwarea can't change while process is locked */
        ASSERT(nwarea == avl_numnodes(&p->p_warea));

        /* gather the watched areas */
        for (pwarea = avl_first(&p->p_warea); pwarea != NULL;
            pwarea = AVL_NEXT(&p->p_warea, pwarea), pwp++) {
                pwp->pr_vaddr = (caddr32_t)(uintptr_t)pwarea->wa_vaddr;
                pwp->pr_size = (size32_t)(pwarea->wa_eaddr - pwarea->wa_vaddr);
                pwp->pr_wflags = (int)pwarea->wa_flags;
        }

        prunlock(pnp);

        error = pr_uioread(Bpwp, size, uiop);
        kmem_free(Bpwp, size);
        return (error);
}

static int
pr_read_lwpstatus_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        lwpstatus32_t *sp;
        proc_t *p;
        int error;

        ASSERT(pnp->pr_type == PR_LWPSTATUS);

        /*
         * We kmem_alloc() the lwpstatus structure because
         * it is so big it might blow the kernel stack.
         */
        sp = kmem_alloc(sizeof (*sp), KM_SLEEP);

        if ((error = prlock(pnp, ZNO)) != 0)
                goto out;

        /*
         * A 32-bit process cannot get the status of a 64-bit process.
         * The fields for the 64-bit quantities are not large enough.
         */
        p = pnp->pr_common->prc_proc;
        if (PROCESS_NOT_32BIT(p)) {
                prunlock(pnp);
                error = EOVERFLOW;
                goto out;
        }

        if (uiop->uio_offset >= sizeof (*sp)) {
                prunlock(pnp);
                goto out;
        }

        prgetlwpstatus32(pnp->pr_common->prc_thread, sp, VTOZONE(PTOV(pnp)));
        prunlock(pnp);

        error = pr_uioread(sp, sizeof (*sp), uiop);
out:
        kmem_free(sp, sizeof (*sp));
        return (error);
}

static int
pr_read_lwpsinfo_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        lwpsinfo32_t lwpsinfo;
        proc_t *p;
        kthread_t *t;
        lwpent_t *lep;

        ASSERT(pnp->pr_type == PR_LWPSINFO);

        /*
         * We don't want the full treatment of prlock(pnp) here.
         * This file is world-readable and never goes invalid.
         * It doesn't matter if we are in the middle of an exec().
         */
        p = pr_p_lock(pnp);
        mutex_exit(&pr_pidlock);
        if (p == NULL)
                return (ENOENT);
        ASSERT(p == pnp->pr_common->prc_proc);
        if (pnp->pr_common->prc_tslot == -1) {
                prunlock(pnp);
                return (ENOENT);
        }

        if (uiop->uio_offset >= sizeof (lwpsinfo)) {
                prunlock(pnp);
                return (0);
        }

        if ((t = pnp->pr_common->prc_thread) != NULL)
                prgetlwpsinfo32(t, &lwpsinfo);
        else {
                lep = p->p_lwpdir[pnp->pr_common->prc_tslot].ld_entry;
                bzero(&lwpsinfo, sizeof (lwpsinfo));
                lwpsinfo.pr_lwpid = lep->le_lwpid;
                lwpsinfo.pr_state = SZOMB;
                lwpsinfo.pr_sname = 'Z';
                lwpsinfo.pr_start.tv_sec = (time32_t)lep->le_start;
        }
        prunlock(pnp);

        return (pr_uioread(&lwpsinfo, sizeof (lwpsinfo), uiop));
}

static int
pr_read_lwpusage_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        prhusage_t *pup;
        prusage32_t *upup;
        proc_t *p;
        int error;

        ASSERT(pnp->pr_type == PR_LWPUSAGE);

        /* allocate now, before locking the process */
        pup = kmem_zalloc(sizeof (*pup), KM_SLEEP);
        upup = kmem_alloc(sizeof (*upup), KM_SLEEP);

        /*
         * We don't want the full treatment of prlock(pnp) here.
         * This file is world-readable and never goes invalid.
         * It doesn't matter if we are in the middle of an exec().
         */
        p = pr_p_lock(pnp);
        mutex_exit(&pr_pidlock);
        if (p == NULL) {
                error = ENOENT;
                goto out;
        }
        ASSERT(p == pnp->pr_common->prc_proc);
        if (pnp->pr_common->prc_thread == NULL) {
                prunlock(pnp);
                error = ENOENT;
                goto out;
        }
        if (uiop->uio_offset >= sizeof (prusage32_t)) {
                prunlock(pnp);
                error = 0;
                goto out;
        }

        pup->pr_tstamp = gethrtime();
        prgetusage(pnp->pr_common->prc_thread, pup);

        prunlock(pnp);

        prcvtusage32(pup, upup);

        error = pr_uioread(upup, sizeof (prusage32_t), uiop);
out:
        kmem_free(pup, sizeof (*pup));
        kmem_free(upup, sizeof (*upup));
        return (error);
}

static int
pr_read_spymaster_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        psinfo32_t psinfo;
        int error;
        klwp_t *lwp;

        ASSERT(pnp->pr_type == PR_SPYMASTER);

        if ((error = prlock(pnp, ZNO)) != 0)
                return (error);

        if (pnp->pr_common->prc_thread == NULL) {
                prunlock(pnp);
                return (0);
        }

        lwp = pnp->pr_common->prc_thread->t_lwp;

        if (lwp->lwp_spymaster == NULL) {
                prunlock(pnp);
                return (0);
        }

        psinfo_kto32(lwp->lwp_spymaster, &psinfo);
        prunlock(pnp);

        return (pr_uioread(&psinfo, sizeof (psinfo), uiop));
}

#if defined(__sparc)
static int
pr_read_gwindows_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
        proc_t *p;
        kthread_t *t;
        gwindows32_t *gwp;
        int error;
        size_t size;

        ASSERT(pnp->pr_type == PR_GWINDOWS);

        gwp = kmem_zalloc(sizeof (gwindows32_t), KM_SLEEP);

        if ((error = prlock(pnp, ZNO)) != 0)
                goto out;

        p = pnp->pr_common->prc_proc;
        t = pnp->pr_common->prc_thread;

        if (PROCESS_NOT_32BIT(p)) {
                prunlock(pnp);
                error = EOVERFLOW;
                goto out;
        }

        /*
         * Drop p->p_lock while touching the stack.
         * The P_PR_LOCK flag prevents the lwp from
         * disappearing while we do this.
         */
        mutex_exit(&p->p_lock);
        if ((size = prnwindows(ttolwp(t))) != 0)
                size = sizeof (gwindows32_t) -
                    (SPARC_MAXREGWINDOW - size) * sizeof (struct rwindow32);
        if (uiop->uio_offset >= size) {
                mutex_enter(&p->p_lock);
                prunlock(pnp);
                goto out;
        }
        prgetwindows32(ttolwp(t), gwp);
        mutex_enter(&p->p_lock);
        prunlock(pnp);

        error = pr_uioread(gwp, size, uiop);
out:
        kmem_free(gwp, sizeof (gwindows32_t));
        return (error);
}
#endif  /* __sparc */

#endif  /* _SYSCALL32_IMPL */

/* ARGSUSED */
static int
prread(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, caller_context_t *ct)
{
        prnode_t *pnp = VTOP(vp);

        ASSERT(pnp->pr_type < PR_NFILES);

#ifdef _SYSCALL32_IMPL
        /*
         * What is read from the /proc files depends on the data
         * model of the caller.  An LP64 process will see LP64
         * data.  An ILP32 process will see ILP32 data.
         */
        if (curproc->p_model == DATAMODEL_LP64)
                return (pr_read_function[pnp->pr_type](pnp, uiop, cr));
        else
                return (pr_read_function_32[pnp->pr_type](pnp, uiop, cr));
#else
        return (pr_read_function[pnp->pr_type](pnp, uiop, cr));
#endif
}

/* Note we intentionally don't handle partial writes/updates. */
static int
pr_write_lwpname(prnode_t *pnp, uio_t *uiop)
{
        kthread_t *t = NULL;
        char *lwpname;
        int error;

        lwpname = kmem_zalloc(THREAD_NAME_MAX, KM_SLEEP);

        if ((error = uiomove(lwpname, THREAD_NAME_MAX, UIO_WRITE, uiop)) != 0) {
                kmem_free(lwpname, THREAD_NAME_MAX);
                return (error);
        }

        /* Somebody tried to write too long a thread name... */
        if (lwpname[THREAD_NAME_MAX - 1] != '\0' || uiop->uio_resid > 0) {
                kmem_free(lwpname, THREAD_NAME_MAX);
                return (EIO);
        }

        VERIFY3U(lwpname[THREAD_NAME_MAX - 1], ==, '\0');

        for (size_t i = 0; lwpname[i] != '\0'; i++) {
                if (!ISPRINT(lwpname[i])) {
                        kmem_free(lwpname, THREAD_NAME_MAX);
                        return (EINVAL);
                }
        }

        /* Equivalent of thread_setname(), but with the ZNO magic. */
        if ((error = prlock(pnp, ZNO)) != 0) {
                kmem_free(lwpname, THREAD_NAME_MAX);
                return (error);
        }

        t = pnp->pr_common->prc_thread;
        if (t->t_name == NULL) {
                t->t_name = lwpname;
        } else {
                (void) strlcpy(t->t_name, lwpname, THREAD_NAME_MAX);
                kmem_free(lwpname, THREAD_NAME_MAX);
        }

        prunlock(pnp);
        return (0);
}

/* ARGSUSED */
static int
prwrite(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, caller_context_t *ct)
{
        prnode_t *pnp = VTOP(vp);
        int old = 0;
        int error;
        ssize_t resid;

        ASSERT(pnp->pr_type < PR_NFILES);

        /*
         * Only a handful of /proc files are writable, enumerate them here.
         */
        switch (pnp->pr_type) {
        case PR_PIDDIR:         /* directory write()s: visceral revulsion. */
                ASSERT(pnp->pr_pidfile != NULL);
                /* use the underlying PR_PIDFILE to write the process */
                vp = pnp->pr_pidfile;
                pnp = VTOP(vp);
                ASSERT(pnp->pr_type == PR_PIDFILE);
                /* FALLTHROUGH */
        case PR_PIDFILE:
        case PR_LWPIDFILE:
                old = 1;
                /* FALLTHROUGH */
        case PR_AS:
                if ((error = prlock(pnp, ZNO)) == 0) {
                        proc_t *p = pnp->pr_common->prc_proc;
                        struct as *as = p->p_as;

                        if ((p->p_flag & SSYS) || as == &kas) {
                                /*
                                 * /proc I/O cannot be done to a system process.
                                 */
                                error = EIO;
#ifdef _SYSCALL32_IMPL
                        } else if (curproc->p_model == DATAMODEL_ILP32 &&
                            PROCESS_NOT_32BIT(p)) {
                                error = EOVERFLOW;
#endif
                        } else {
                                /*
                                 * See comments above (pr_read_pidfile)
                                 * about this locking dance.
                                 */
                                mutex_exit(&p->p_lock);
                                error = prusrio(p, UIO_WRITE, uiop, old);
                                mutex_enter(&p->p_lock);
                        }
                        prunlock(pnp);
                }
                return (error);

        case PR_CTL:
        case PR_LWPCTL:
                resid = uiop->uio_resid;
                /*
                 * Perform the action on the control file
                 * by passing curthreads credentials
                 * and not target process's credentials.
                 */
#ifdef _SYSCALL32_IMPL
                if (curproc->p_model == DATAMODEL_ILP32)
                        error = prwritectl32(vp, uiop, CRED());
                else
                        error = prwritectl(vp, uiop, CRED());
#else
                error = prwritectl(vp, uiop, CRED());
#endif
                /*
                 * This hack makes sure that the EINTR is passed
                 * all the way back to the caller's write() call.
                 */
                if (error == EINTR)
                        uiop->uio_resid = resid;
                return (error);

        case PR_LWPNAME:
                return (pr_write_lwpname(pnp, uiop));

        default:
                return ((vp->v_type == VDIR)? EISDIR : EBADF);
        }
        /* NOTREACHED */
}

static int
prgetattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
    caller_context_t *ct)
{
        prnode_t *pnp = VTOP(vp);
        prnodetype_t type = pnp->pr_type;
        prcommon_t *pcp;
        proc_t *p;
        struct as *as;
        int error;
        vnode_t *rvp;
        timestruc_t now;
        extern uint_t nproc;
        int ngroups;
        int nsig;

        /*
         * This ugly bit of code allows us to keep both versions of this
         * function from the same source.
         */
#ifdef _LP64
        int iam32bit = (curproc->p_model == DATAMODEL_ILP32);
#define PR_OBJSIZE(obj32, obj64)        \
        (iam32bit ? sizeof (obj32) : sizeof (obj64))
#define PR_OBJSPAN(obj32, obj64)        \
        (iam32bit ? LSPAN32(obj32) : LSPAN(obj64))
#else
#define PR_OBJSIZE(obj32, obj64)        \
        (sizeof (obj64))
#define PR_OBJSPAN(obj32, obj64)        \
        (LSPAN(obj64))
#endif

        /*
         * Return all the attributes.  Should be refined
         * so that it returns only those asked for.
         * Most of this is complete fakery anyway.
         */

        /*
         * For files in the /proc/<pid>/object directory,
         * return the attributes of the underlying object.
         * For files in the /proc/<pid>/fd directory,
         * return the attributes of the underlying file, but
         * make it look inaccessible if it is not a regular file.
         * Make directories look like symlinks.
         */
        switch (type) {
        case PR_CURDIR:
        case PR_ROOTDIR:
                if (!(flags & ATTR_REAL))
                        break;
                /* restrict full knowledge of the attributes to owner or root */
                if ((error = praccess(vp, 0, 0, cr, ct)) != 0)
                        return (error);
                /* FALLTHROUGH */
        case PR_OBJECT:
        case PR_FD:
                rvp = pnp->pr_realvp;
                error = VOP_GETATTR(rvp, vap, flags, cr, ct);
                if (error)
                        return (error);
                if (type == PR_FD) {
                        if (rvp->v_type != VREG && rvp->v_type != VDIR)
                                vap->va_mode = 0;
                        else
                                vap->va_mode &= pnp->pr_mode;
                }
                if (type == PR_OBJECT)
                        vap->va_mode &= 07555;
                if (rvp->v_type == VDIR && !(flags & ATTR_REAL)) {
                        vap->va_type = VLNK;
                        vap->va_size = 0;
                        vap->va_nlink = 1;
                }
                return (0);
        default:
                break;
        }

        bzero(vap, sizeof (*vap));
        /*
         * Large Files: Internally proc now uses VPROC to indicate
         * a proc file. Since we have been returning VREG through
         * VOP_GETATTR() until now, we continue to do this so as
         * not to break apps depending on this return value.
         */
        vap->va_type = (vp->v_type == VPROC) ? VREG : vp->v_type;
        vap->va_mode = pnp->pr_mode;
        vap->va_fsid = vp->v_vfsp->vfs_dev;
        vap->va_blksize = DEV_BSIZE;
        vap->va_rdev = 0;
        vap->va_seq = 0;

        if (type == PR_PROCDIR) {
                vap->va_uid = 0;
                vap->va_gid = 0;
                vap->va_nlink = nproc + 2;
                vap->va_nodeid = (ino64_t)PRROOTINO;
                gethrestime(&now);
                vap->va_atime = vap->va_mtime = vap->va_ctime = now;
                vap->va_size = (v.v_proc + 2) * PRSDSIZE;
                vap->va_nblocks = btod(vap->va_size);
                return (0);
        }

        /*
         * /proc/<pid>/self is a symbolic link, and has no prcommon member
         */
        if (type == PR_SELF) {
                vap->va_uid = crgetruid(CRED());
                vap->va_gid = crgetrgid(CRED());
                vap->va_nodeid = (ino64_t)PR_SELF;
                gethrestime(&now);
                vap->va_atime = vap->va_mtime = vap->va_ctime = now;
                vap->va_nlink = 1;
                vap->va_type = VLNK;
                vap->va_size = 0;
                return (0);
        }

        /* A subset of prlock(pnp...) */
        p = pr_p_lock(pnp);
        mutex_exit(&pr_pidlock);
        if (p == NULL)
                return (ENOENT);
        pcp = pnp->pr_common;

        /*
         * Because we're performing a subset of prlock() inline here, we must
         * follow prlock's semantics when encountering a zombie process
         * (PRC_DESTROY flag is set) or an exiting process (SEXITING flag is
         * set). Those semantics indicate acting as if the process is no
         * longer there (return ENOENT).
         *
         * If we chose to proceed here regardless, we may encounter issues
         * when we drop the p_lock (see PR_OBJECTDIR, PR_PATHDIR, PR_*MAP,
         * PR_LDT, and PR_*PAGEDATA below). A process-cleanup which was
         * blocked on p_lock may ignore the P_PR_LOCK flag we set above, since
         * it set one of PRC_DESTROY or SEXITING. If the process then gets
         * destroyed our "p" will be useless, as will its p_lock.
         *
         * It may be desirable to move this check to only places further down
         * prior to actual droppages of p->p_lock, but for now, we're playing
         * it safe and checking here immediately, like prlock() does..
         */
        if (((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) {
                prunlock(pnp);
                return (ENOENT);
        }

        mutex_enter(&p->p_crlock);
        vap->va_uid = crgetruid(p->p_cred);
        vap->va_gid = crgetrgid(p->p_cred);
        mutex_exit(&p->p_crlock);

        vap->va_nlink = 1;
        vap->va_nodeid = pnp->pr_ino? pnp->pr_ino :
            pmkino(pcp->prc_tslot, pcp->prc_slot, pnp->pr_type);
        if ((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot != -1) {
                vap->va_atime.tv_sec = vap->va_mtime.tv_sec =
                    vap->va_ctime.tv_sec =
                    p->p_lwpdir[pcp->prc_tslot].ld_entry->le_start;
                vap->va_atime.tv_nsec = vap->va_mtime.tv_nsec =
                    vap->va_ctime.tv_nsec = 0;
        } else {
                user_t *up = PTOU(p);
                vap->va_atime.tv_sec = vap->va_mtime.tv_sec =
                    vap->va_ctime.tv_sec = up->u_start.tv_sec;
                vap->va_atime.tv_nsec = vap->va_mtime.tv_nsec =
                    vap->va_ctime.tv_nsec = up->u_start.tv_nsec;
        }

        switch (type) {
        case PR_PIDDIR:
                /* va_nlink: count 'lwp', 'object' and 'fd' directory links */
                vap->va_nlink = 5;
                vap->va_size = sizeof (piddir);
                break;
        case PR_OBJECTDIR:
                if ((p->p_flag & SSYS) || (as = p->p_as) == &kas)
                        vap->va_size = 2 * PRSDSIZE;
                else {
                        mutex_exit(&p->p_lock);
                        AS_LOCK_ENTER(as, RW_WRITER);
                        if (as->a_updatedir)
                                rebuild_objdir(as);
                        vap->va_size = (as->a_sizedir + 2) * PRSDSIZE;
                        AS_LOCK_EXIT(as);
                        mutex_enter(&p->p_lock);
                }
                vap->va_nlink = 2;
                break;
        case PR_PATHDIR:
                if ((p->p_flag & SSYS) || (as = p->p_as) == &kas)
                        vap->va_size = (P_FINFO(p)->fi_nfiles + 4) * PRSDSIZE;
                else {
                        mutex_exit(&p->p_lock);
                        AS_LOCK_ENTER(as, RW_WRITER);
                        if (as->a_updatedir)
                                rebuild_objdir(as);
                        vap->va_size = (as->a_sizedir + 4 +
                            P_FINFO(p)->fi_nfiles) * PRSDSIZE;
                        AS_LOCK_EXIT(as);
                        mutex_enter(&p->p_lock);
                }
                vap->va_nlink = 2;
                break;
        case PR_PATH:
        case PR_CURDIR:
        case PR_ROOTDIR:
        case PR_CT:
                vap->va_type = VLNK;
                vap->va_size = 0;
                break;
        case PR_FDDIR:
        case PR_FDINFODIR:
                vap->va_nlink = 2;
                vap->va_size = (P_FINFO(p)->fi_nfiles + 2) * PRSDSIZE;
                break;
        case PR_FDINFO: {
                file_t *fp;
                int fd = pnp->pr_index;

                fp = pr_getf(p, fd, NULL);
                if (fp == NULL) {
                        prunlock(pnp);
                        return (ENOENT);
                }
                prunlock(pnp);
                vap->va_size = prgetfdinfosize(p, fp->f_vnode, cr);
                vap->va_nblocks = (fsblkcnt64_t)btod(vap->va_size);
                pr_releasef(fp);
                return (0);
        }
        case PR_LWPDIR:
                /*
                 * va_nlink: count each lwp as a directory link.
                 * va_size: size of p_lwpdir + 2
                 */
                vap->va_nlink = p->p_lwpcnt + p->p_zombcnt + 2;
                vap->va_size = (p->p_lwpdir_sz + 2) * PRSDSIZE;
                break;
        case PR_LWPIDDIR:
                vap->va_nlink = 2;
                vap->va_size = sizeof (lwpiddir);
                break;
        case PR_CTDIR:
                vap->va_nlink = 2;
                vap->va_size = (avl_numnodes(&p->p_ct_held) + 2) * PRSDSIZE;
                break;
        case PR_TMPLDIR:
                vap->va_nlink = 2;
                vap->va_size = (ct_ntypes + 2) * PRSDSIZE;
                break;
        case PR_AS:
        case PR_PIDFILE:
        case PR_LWPIDFILE:
                if ((p->p_flag & SSYS) || (as = p->p_as) == &kas)
                        vap->va_size = 0;
                else
                        vap->va_size = as->a_resvsize;
                break;
        case PR_STATUS:
                vap->va_size = PR_OBJSIZE(pstatus32_t, pstatus_t);
                break;
        case PR_LSTATUS:
                vap->va_size = PR_OBJSIZE(prheader32_t, prheader_t) +
                    p->p_lwpcnt * PR_OBJSPAN(lwpstatus32_t, lwpstatus_t);
                break;
        case PR_PSINFO:
                vap->va_size = PR_OBJSIZE(psinfo32_t, psinfo_t);
                break;
        case PR_LPSINFO:
                vap->va_size = PR_OBJSIZE(prheader32_t, prheader_t) +
                    (p->p_lwpcnt + p->p_zombcnt) *
                    PR_OBJSPAN(lwpsinfo32_t, lwpsinfo_t);
                break;
        case PR_MAP:
        case PR_RMAP:
        case PR_XMAP:
                if ((p->p_flag & SSYS) || (as = p->p_as) == &kas)
                        vap->va_size = 0;
                else {
                        mutex_exit(&p->p_lock);
                        AS_LOCK_ENTER(as, RW_WRITER);
                        if (type == PR_MAP)
                                vap->va_mtime = as->a_updatetime;
                        if (type == PR_XMAP)
                                vap->va_size = prnsegs(as, 0) *
                                    PR_OBJSIZE(prxmap32_t, prxmap_t);
                        else
                                vap->va_size = prnsegs(as, type == PR_RMAP) *
                                    PR_OBJSIZE(prmap32_t, prmap_t);
                        AS_LOCK_EXIT(as);
                        mutex_enter(&p->p_lock);
                }
                break;
        case PR_CRED:
                mutex_enter(&p->p_crlock);
                vap->va_size = sizeof (prcred_t);
                ngroups = crgetngroups(p->p_cred);
                if (ngroups > 1)
                        vap->va_size += (ngroups - 1) * sizeof (gid_t);
                mutex_exit(&p->p_crlock);
                break;
        case PR_PRIV:
                vap->va_size = prgetprivsize();
                break;
        case PR_SECFLAGS:
                vap->va_size = sizeof (prsecflags_t);
                break;
        case PR_SIGACT:
                nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
                vap->va_size = (nsig-1) *
                    PR_OBJSIZE(struct sigaction32, struct sigaction);
                break;
        case PR_AUXV:
                vap->va_size = __KERN_NAUXV_IMPL * PR_OBJSIZE(auxv32_t, auxv_t);
                break;
#if defined(__x86)
        case PR_LDT:
                mutex_exit(&p->p_lock);
                mutex_enter(&p->p_ldtlock);
                vap->va_size = prnldt(p) * sizeof (struct ssd);
                mutex_exit(&p->p_ldtlock);
                mutex_enter(&p->p_lock);
                break;
#endif
        case PR_USAGE:
                vap->va_size = PR_OBJSIZE(prusage32_t, prusage_t);
                break;
        case PR_LUSAGE:
                vap->va_size = PR_OBJSIZE(prheader32_t, prheader_t) +
                    (p->p_lwpcnt + 1) * PR_OBJSPAN(prusage32_t, prusage_t);
                break;
        case PR_PAGEDATA:
                if ((p->p_flag & SSYS) || (as = p->p_as) == &kas)
                        vap->va_size = 0;
                else {
                        /*
                         * We can drop p->p_lock before grabbing the
                         * address space lock because p->p_as will not
                         * change while the process is marked P_PR_LOCK.
                         */
                        mutex_exit(&p->p_lock);
                        AS_LOCK_ENTER(as, RW_WRITER);
#ifdef _LP64
                        vap->va_size = iam32bit?
                            prpdsize32(as) : prpdsize(as);
#else
                        vap->va_size = prpdsize(as);
#endif
                        AS_LOCK_EXIT(as);
                        mutex_enter(&p->p_lock);
                }
                break;
        case PR_OPAGEDATA:
                if ((p->p_flag & SSYS) || (as = p->p_as) == &kas)
                        vap->va_size = 0;
                else {
                        mutex_exit(&p->p_lock);
                        AS_LOCK_ENTER(as, RW_WRITER);
#ifdef _LP64
                        vap->va_size = iam32bit?
                            oprpdsize32(as) : oprpdsize(as);
#else
                        vap->va_size = oprpdsize(as);
#endif
                        AS_LOCK_EXIT(as);
                        mutex_enter(&p->p_lock);
                }
                break;
        case PR_WATCH:
                vap->va_size = avl_numnodes(&p->p_warea) *
                    PR_OBJSIZE(prwatch32_t, prwatch_t);
                break;
        case PR_LWPSTATUS:
                vap->va_size = PR_OBJSIZE(lwpstatus32_t, lwpstatus_t);
                break;
        case PR_LWPSINFO:
                vap->va_size = PR_OBJSIZE(lwpsinfo32_t, lwpsinfo_t);
                break;
        case PR_LWPUSAGE:
                vap->va_size = PR_OBJSIZE(prusage32_t, prusage_t);
                break;
        case PR_XREGS:
                if (prhasx(p))
                        vap->va_size = prgetprxregsize(p);
                else
                        vap->va_size = 0;
                break;
        case PR_SPYMASTER:
                if (pnp->pr_common->prc_thread != NULL &&
                    pnp->pr_common->prc_thread->t_lwp->lwp_spymaster != NULL) {
                        vap->va_size = PR_OBJSIZE(psinfo32_t, psinfo_t);
                } else {
                        vap->va_size = 0;
                }
                break;
#if defined(__sparc)
        case PR_GWINDOWS:
        {
                kthread_t *t;
                int n;

                /*
                 * If there is no lwp then just make the size zero.
                 * This can happen if the lwp exits between the VOP_LOOKUP()
                 * of the /proc/<pid>/lwp/<lwpid>/gwindows file and the
                 * VOP_GETATTR() of the resulting vnode.
                 */
                if ((t = pcp->prc_thread) == NULL) {
                        vap->va_size = 0;
                        break;
                }
                /*
                 * Drop p->p_lock while touching the stack.
                 * The P_PR_LOCK flag prevents the lwp from
                 * disappearing while we do this.
                 */
                mutex_exit(&p->p_lock);
                if ((n = prnwindows(ttolwp(t))) == 0)
                        vap->va_size = 0;
                else
                        vap->va_size = PR_OBJSIZE(gwindows32_t, gwindows_t) -
                            (SPARC_MAXREGWINDOW - n) *
                            PR_OBJSIZE(struct rwindow32, struct rwindow);
                mutex_enter(&p->p_lock);
                break;
        }
        case PR_ASRS:
#ifdef _LP64
                if (p->p_model == DATAMODEL_LP64)
                        vap->va_size = sizeof (asrset_t);
                else
#endif
                        vap->va_size = 0;
                break;
#endif
        case PR_CTL:
        case PR_LWPCTL:
        default:
                vap->va_size = 0;
                break;
        }

        prunlock(pnp);
        vap->va_nblocks = (fsblkcnt64_t)btod(vap->va_size);
        return (0);
}

static int
praccess(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct)
{
        prnode_t *pnp = VTOP(vp);
        prnodetype_t type = pnp->pr_type;
        int vmode;
        vtype_t vtype;
        proc_t *p;
        int error = 0;
        vnode_t *rvp;
        vnode_t *xvp;

        if ((mode & VWRITE) && vn_is_readonly(vp))
                return (EROFS);

        switch (type) {
        case PR_PROCDIR:
                break;

        case PR_OBJECT:
        case PR_FD:
                /*
                 * Disallow write access to the underlying objects.
                 * Disallow access to underlying non-regular-file fds.
                 * Disallow access to fds with other than existing open modes.
                 */
                rvp = pnp->pr_realvp;
                vtype = rvp->v_type;
                vmode = pnp->pr_mode;
                if ((type == PR_OBJECT && (mode & VWRITE)) ||
                    (type == PR_FD && vtype != VREG && vtype != VDIR) ||
                    (type == PR_FD && (vmode & mode) != mode &&
                    secpolicy_proc_access(cr) != 0))
                        return (EACCES);
                return (VOP_ACCESS(rvp, mode, flags, cr, ct));

        case PR_PSINFO:         /* these files can be read by anyone */
        case PR_LPSINFO:
        case PR_LWPSINFO:
        case PR_LWPDIR:
        case PR_LWPIDDIR:
        case PR_USAGE:
        case PR_LUSAGE:
        case PR_LWPUSAGE:
                p = pr_p_lock(pnp);
                mutex_exit(&pr_pidlock);
                if (p == NULL)
                        return (ENOENT);
                prunlock(pnp);
                break;

        default:
                /*
                 * Except for the world-readable files above,
                 * only /proc/pid exists if the process is a zombie.
                 */
                if ((error = prlock(pnp,
                    (type == PR_PIDDIR)? ZYES : ZNO)) != 0)
                        return (error);
                p = pnp->pr_common->prc_proc;
                if (p != curproc)
                        error = priv_proc_cred_perm(cr, p, NULL, mode);

                if (error != 0 || p == curproc || (p->p_flag & SSYS) ||
                    p->p_as == &kas || (xvp = p->p_exec) == NULL) {
                        prunlock(pnp);
                } else {
                        /*
                         * Determine if the process's executable is readable.
                         * We have to drop p->p_lock before the secpolicy
                         * and VOP operation.
                         */
                        VN_HOLD(xvp);
                        prunlock(pnp);
                        if (secpolicy_proc_access(cr) != 0)
                                error = VOP_ACCESS(xvp, VREAD, 0, cr, ct);
                        VN_RELE(xvp);
                }
                if (error)
                        return (error);
                break;
        }

        if (type == PR_CURDIR || type == PR_ROOTDIR) {
                /*
                 * Final access check on the underlying directory vnode.
                 */
                return (VOP_ACCESS(pnp->pr_realvp, mode, flags, cr, ct));
        }

        /*
         * Visceral revulsion:  For compatibility with old /proc,
         * allow the /proc/<pid> directory to be opened for writing.
         */
        vmode = pnp->pr_mode;
        if (type == PR_PIDDIR)
                vmode |= VWRITE;
        if ((vmode & mode) != mode)
                error = secpolicy_proc_access(cr);
        return (error);
}

/*
 * Array of lookup functions, indexed by /proc file type.
 */
static vnode_t *pr_lookup_notdir(), *pr_lookup_procdir(), *pr_lookup_piddir(),
        *pr_lookup_objectdir(), *pr_lookup_lwpdir(), *pr_lookup_lwpiddir(),
        *pr_lookup_fddir(), *pr_lookup_fdinfodir(), *pr_lookup_pathdir(),
        *pr_lookup_tmpldir(), *pr_lookup_ctdir();

static vnode_t *(*pr_lookup_function[PR_NFILES])() = {
        pr_lookup_procdir,      /* /proc                                */
        pr_lookup_notdir,       /* /proc/self                           */
        pr_lookup_piddir,       /* /proc/<pid>                          */
        pr_lookup_notdir,       /* /proc/<pid>/as                       */
        pr_lookup_notdir,       /* /proc/<pid>/ctl                      */
        pr_lookup_notdir,       /* /proc/<pid>/status                   */
        pr_lookup_notdir,       /* /proc/<pid>/lstatus                  */
        pr_lookup_notdir,       /* /proc/<pid>/psinfo                   */
        pr_lookup_notdir,       /* /proc/<pid>/lpsinfo                  */
        pr_lookup_notdir,       /* /proc/<pid>/map                      */
        pr_lookup_notdir,       /* /proc/<pid>/rmap                     */
        pr_lookup_notdir,       /* /proc/<pid>/xmap                     */
        pr_lookup_notdir,       /* /proc/<pid>/cred                     */
        pr_lookup_notdir,       /* /proc/<pid>/sigact                   */
        pr_lookup_notdir,       /* /proc/<pid>/auxv                     */
#if defined(__x86)
        pr_lookup_notdir,       /* /proc/<pid>/ldt                      */
#endif
        pr_lookup_notdir,       /* /proc/<pid>/usage                    */
        pr_lookup_notdir,       /* /proc/<pid>/lusage                   */
        pr_lookup_notdir,       /* /proc/<pid>/pagedata                 */
        pr_lookup_notdir,       /* /proc/<pid>/watch                    */
        pr_lookup_notdir,       /* /proc/<pid>/cwd                      */
        pr_lookup_notdir,       /* /proc/<pid>/root                     */
        pr_lookup_fddir,        /* /proc/<pid>/fd                       */
        pr_lookup_notdir,       /* /proc/<pid>/fd/nn                    */
        pr_lookup_fdinfodir,    /* /proc/<pid>/fdinfo                   */
        pr_lookup_notdir,       /* /proc/<pid>/fdinfo/nn                */
        pr_lookup_objectdir,    /* /proc/<pid>/object                   */
        pr_lookup_notdir,       /* /proc/<pid>/object/xxx               */
        pr_lookup_lwpdir,       /* /proc/<pid>/lwp                      */
        pr_lookup_lwpiddir,     /* /proc/<pid>/lwp/<lwpid>              */
        pr_lookup_notdir,       /* /proc/<pid>/lwp/<lwpid>/lwpctl       */
        pr_lookup_notdir,       /* /proc/<pid>/lwp/<lwpid>/lwpname      */
        pr_lookup_notdir,       /* /proc/<pid>/lwp/<lwpid>/lwpstatus    */
        pr_lookup_notdir,       /* /proc/<pid>/lwp/<lwpid>/lwpsinfo     */
        pr_lookup_notdir,       /* /proc/<pid>/lwp/<lwpid>/lwpusage     */
        pr_lookup_notdir,       /* /proc/<pid>/lwp/<lwpid>/xregs        */
        pr_lookup_tmpldir,      /* /proc/<pid>/lwp/<lwpid>/templates    */
        pr_lookup_notdir,       /* /proc/<pid>/lwp/<lwpid>/templates/<id> */
        pr_lookup_notdir,       /* /proc/<pid>/lwp/<lwpid>/spymaster    */
#if defined(__sparc)
        pr_lookup_notdir,       /* /proc/<pid>/lwp/<lwpid>/gwindows     */
        pr_lookup_notdir,       /* /proc/<pid>/lwp/<lwpid>/asrs         */
#endif
        pr_lookup_notdir,       /* /proc/<pid>/priv                     */
        pr_lookup_pathdir,      /* /proc/<pid>/path                     */
        pr_lookup_notdir,       /* /proc/<pid>/path/xxx                 */
        pr_lookup_ctdir,        /* /proc/<pid>/contracts                */
        pr_lookup_notdir,       /* /proc/<pid>/contracts/<ctid>         */
        pr_lookup_notdir,       /* /proc/<pid>/secflags                 */
        pr_lookup_notdir,       /* old process file                     */
        pr_lookup_notdir,       /* old lwp file                         */
        pr_lookup_notdir,       /* old pagedata file                    */
};

static int
prlookup(vnode_t *dp, char *comp, vnode_t **vpp, pathname_t *pathp,
    int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
    int *direntflags, pathname_t *realpnp)
{
        prnode_t *pnp = VTOP(dp);
        prnodetype_t type = pnp->pr_type;
        int error;

        ASSERT(dp->v_type == VDIR);
        ASSERT(type < PR_NFILES);

        if (type != PR_PROCDIR && strcmp(comp, "..") == 0) {
                VN_HOLD(pnp->pr_parent);
                *vpp = pnp->pr_parent;
                return (0);
        }

        if (*comp == '\0' ||
            strcmp(comp, ".") == 0 || strcmp(comp, "..") == 0) {
                VN_HOLD(dp);
                *vpp = dp;
                return (0);
        }

        switch (type) {
        case PR_CURDIR:
        case PR_ROOTDIR:
                /* restrict lookup permission to owner or root */
                if ((error = praccess(dp, VEXEC, 0, cr, ct)) != 0)
                        return (error);
                /* FALLTHROUGH */
        case PR_FD:
                /*
                 * Performing a VOP_LOOKUP on the underlying vnode and emitting
                 * the resulting vnode, without encapsulation, as our own is a
                 * very special case when it comes to the assumptions built
                 * into VFS.
                 *
                 * Since the resulting vnode is highly likely to be at some
                 * abitrary position in another filesystem, we insist that the
                 * VTRAVERSE flag is set on the parent.  This prevents things
                 * such as the v_path freshness logic from mistaking the
                 * resulting vnode as a "real" child of the parent, rather than
                 * a consequence of this "procfs wormhole".
                 *
                 * Failure to establish such protections can lead to
                 * incorrectly calculated v_paths being set on nodes reached
                 * through these lookups.
                 */
                ASSERT((dp->v_flag & VTRAVERSE) != 0);

                dp = pnp->pr_realvp;
                return (VOP_LOOKUP(dp, comp, vpp, pathp, flags, rdir, cr, ct,
                    direntflags, realpnp));
        default:
                break;
        }

        if ((type == PR_OBJECTDIR || type == PR_FDDIR ||
            type == PR_FDINFODIR || type == PR_PATHDIR) &&
            (error = praccess(dp, VEXEC, 0, cr, ct)) != 0)
                return (error);

        /* XXX - Do we need to pass ct, direntflags, or realpnp? */
        *vpp = (pr_lookup_function[type](dp, comp));

        return ((*vpp == NULL) ? ENOENT : 0);
}

/* ARGSUSED */
static int
prcreate(vnode_t *dp, char *comp, vattr_t *vap, vcexcl_t excl,
    int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct,
    vsecattr_t *vsecp)
{
        int error;

        if ((error = prlookup(dp, comp, vpp, NULL, 0, NULL, cr,
            ct, NULL, NULL)) != 0) {
                if (error == ENOENT) {
                        /* One can't O_CREAT nonexistent files in /proc. */
                        error = EACCES;
                }
                return (error);
        }

        if (excl == EXCL) {
                /* Disallow the O_EXCL case */
                error = EEXIST;
        } else if ((error = praccess(*vpp, mode, 0, cr, ct)) == 0) {
                /* Before proceeding, handle O_TRUNC if necessary. */
                if (vap->va_mask & AT_SIZE) {
                        vnode_t *vp = *vpp;

                        if (vp->v_type == VDIR) {
                                /* Only allow O_TRUNC on files */
                                error = EISDIR;
                        } else if (vp->v_type != VPROC ||
                            VTOP(vp)->pr_type != PR_FD) {
                                /*
                                 * Disallow for files outside of the
                                 * /proc/<pid>/fd/<n> entries
                                 */
                                error = EACCES;
                        } else {
                                uint_t mask;

                                vp = VTOP(vp)->pr_realvp;
                                mask = vap->va_mask;
                                vap->va_mask = AT_SIZE;
                                error = VOP_SETATTR(vp, vap, 0, cr, ct);
                                vap->va_mask = mask;
                        }
                }
        }

        if (error) {
                VN_RELE(*vpp);
                *vpp = NULL;
        }
        return (error);
}

/* ARGSUSED */
static vnode_t *
pr_lookup_notdir(vnode_t *dp, char *comp)
{
        return (NULL);
}

/*
 * Find or construct a process vnode for the given pid.
 */
static vnode_t *
pr_lookup_procdir(vnode_t *dp, char *comp)
{
        pid_t pid;
        prnode_t *pnp;
        prcommon_t *pcp;
        vnode_t *vp;
        proc_t *p;
        int c;

        ASSERT(VTOP(dp)->pr_type == PR_PROCDIR);

        if (strcmp(comp, "self") == 0) {
                pnp = prgetnode(dp, PR_SELF);
                return (PTOV(pnp));
        } else {
                pid = 0;
                while ((c = *comp++) != '\0') {
                        if (c < '0' || c > '9')
                                return (NULL);
                        pid = 10*pid + c - '0';
                        if (pid > maxpid)
                                return (NULL);
                }
        }

        pnp = prgetnode(dp, PR_PIDDIR);

        mutex_enter(&pidlock);
        if ((p = prfind(pid)) == NULL || p->p_stat == SIDL) {
                mutex_exit(&pidlock);
                prfreenode(pnp);
                return (NULL);
        }
        ASSERT(p->p_stat != 0);

        /* NOTE: we're holding pidlock across the policy call. */
        if (secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
                mutex_exit(&pidlock);
                prfreenode(pnp);
                return (NULL);
        }

        mutex_enter(&p->p_lock);
        mutex_exit(&pidlock);

        /*
         * If a process vnode already exists and it is not invalid
         * and it was created by the current process and it belongs
         * to the same /proc mount point as our parent vnode, then
         * just use it and discard the newly-allocated prnode.
         */
        for (vp = p->p_trace; vp != NULL; vp = VTOP(vp)->pr_next) {
                if (!(VTOP(VTOP(vp)->pr_pidfile)->pr_flags & PR_INVAL) &&
                    VTOP(vp)->pr_owner == curproc &&
                    vp->v_vfsp == dp->v_vfsp) {
                        ASSERT(!(VTOP(vp)->pr_flags & PR_INVAL));
                        VN_HOLD(vp);
                        prfreenode(pnp);
                        mutex_exit(&p->p_lock);
                        return (vp);
                }
        }
        pnp->pr_owner = curproc;

        /*
         * prgetnode() initialized most of the prnode.
         * Finish the job.
         */
        pcp = pnp->pr_common;   /* the newly-allocated prcommon struct */
        if ((vp = p->p_trace) != NULL) {
                /* discard the new prcommon and use the existing prcommon */
                prfreecommon(pcp);
                pcp = VTOP(vp)->pr_common;
                mutex_enter(&pcp->prc_mutex);
                ASSERT(pcp->prc_refcnt > 0);
                pcp->prc_refcnt++;
                mutex_exit(&pcp->prc_mutex);
                pnp->pr_common = pcp;
        } else {
                /* initialize the new prcommon struct */
                if ((p->p_flag & SSYS) || p->p_as == &kas)
                        pcp->prc_flags |= PRC_SYS;
                if (p->p_stat == SZOMB || (p->p_flag & SEXITING) != 0)
                        pcp->prc_flags |= PRC_DESTROY;
                pcp->prc_proc = p;
                pcp->prc_datamodel = p->p_model;
                pcp->prc_pid = p->p_pid;
                pcp->prc_slot = p->p_slot;
        }
        pnp->pr_pcommon = pcp;
        pnp->pr_parent = dp;
        VN_HOLD(dp);
        /*
         * Link in the old, invalid directory vnode so we
         * can later determine the last close of the file.
         */
        pnp->pr_next = p->p_trace;
        p->p_trace = dp = PTOV(pnp);

        /*
         * Kludge for old /proc: initialize the PR_PIDFILE as well.
         */
        vp = pnp->pr_pidfile;
        pnp = VTOP(vp);
        pnp->pr_ino = ptoi(pcp->prc_pid);
        pnp->pr_common = pcp;
        pnp->pr_pcommon = pcp;
        pnp->pr_parent = dp;
        pnp->pr_next = p->p_plist;
        p->p_plist = vp;

        mutex_exit(&p->p_lock);
        return (dp);
}

static vnode_t *
pr_lookup_piddir(vnode_t *dp, char *comp)
{
        prnode_t *dpnp = VTOP(dp);
        vnode_t *vp;
        prnode_t *pnp;
        proc_t *p;
        user_t *up;
        prdirent_t *dirp;
        int i;
        enum prnodetype type;

        ASSERT(dpnp->pr_type == PR_PIDDIR);

        for (i = 0; i < NPIDDIRFILES; i++) {
                /* Skip "." and ".." */
                dirp = &piddir[i+2];
                if (strcmp(comp, dirp->d_name) == 0)
                        break;
        }

        if (i >= NPIDDIRFILES)
                return (NULL);

        type = (int)dirp->d_ino;
        pnp = prgetnode(dp, type);

        p = pr_p_lock(dpnp);
        mutex_exit(&pr_pidlock);
        if (p == NULL) {
                prfreenode(pnp);
                return (NULL);
        }
        if (dpnp->pr_pcommon->prc_flags & PRC_DESTROY) {
                switch (type) {
                case PR_PSINFO:
                case PR_USAGE:
                        break;
                default:
                        prunlock(dpnp);
                        prfreenode(pnp);
                        return (NULL);
                }
        }

        switch (type) {
        case PR_CURDIR:
        case PR_ROOTDIR:
                up = PTOU(p);
                vp = (type == PR_CURDIR)? up->u_cdir :
                    (up->u_rdir? up->u_rdir : rootdir);

                if (vp == NULL) {
                        /* can't happen(?) */
                        prunlock(dpnp);
                        prfreenode(pnp);
                        return (NULL);
                }
                /*
                 * Fill in the prnode so future references will
                 * be able to find the underlying object's vnode.
                 */
                VN_HOLD(vp);
                pnp->pr_realvp = vp;
                PTOV(pnp)->v_flag |= VTRAVERSE;
                break;
        default:
                break;
        }

        mutex_enter(&dpnp->pr_mutex);

        if ((vp = dpnp->pr_files[i]) != NULL &&
            !(VTOP(vp)->pr_flags & PR_INVAL)) {
                VN_HOLD(vp);
                mutex_exit(&dpnp->pr_mutex);
                prunlock(dpnp);
                prfreenode(pnp);
                return (vp);
        }

        /*
         * prgetnode() initialized most of the prnode.
         * Finish the job.
         */
        pnp->pr_common = dpnp->pr_common;
        pnp->pr_pcommon = dpnp->pr_pcommon;
        pnp->pr_parent = dp;
        VN_HOLD(dp);
        pnp->pr_index = i;

        dpnp->pr_files[i] = vp = PTOV(pnp);

        /*
         * Link new vnode into list of all /proc vnodes for the process.
         */
        if (vp->v_type == VPROC) {
                pnp->pr_next = p->p_plist;
                p->p_plist = vp;
        }
        mutex_exit(&dpnp->pr_mutex);
        prunlock(dpnp);
        return (vp);
}

static vnode_t *
pr_lookup_objectdir(vnode_t *dp, char *comp)
{
        prnode_t *dpnp = VTOP(dp);
        prnode_t *pnp;
        proc_t *p;
        struct seg *seg;
        struct as *as;
        vnode_t *vp;
        vattr_t vattr;

        ASSERT(dpnp->pr_type == PR_OBJECTDIR);

        pnp = prgetnode(dp, PR_OBJECT);

        if (prlock(dpnp, ZNO) != 0) {
                prfreenode(pnp);
                return (NULL);
        }
        p = dpnp->pr_common->prc_proc;
        if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
                prunlock(dpnp);
                prfreenode(pnp);
                return (NULL);
        }

        /*
         * We drop p_lock before grabbing the address space lock
         * in order to avoid a deadlock with the clock thread.
         * The process will not disappear and its address space
         * will not change because it is marked P_PR_LOCK.
         */
        mutex_exit(&p->p_lock);
        AS_LOCK_ENTER(as, RW_READER);
        if ((seg = AS_SEGFIRST(as)) == NULL) {
                vp = NULL;
                goto out;
        }
        if (strcmp(comp, "a.out") == 0) {
                vp = p->p_exec;
                goto out;
        }
        do {
                /*
                 * Manufacture a filename for the "object" directory.
                 */
                vattr.va_mask = AT_FSID|AT_NODEID;
                if (seg->s_ops == &segvn_ops &&
                    SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
                    vp != NULL && vp->v_type == VREG &&
                    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
                        char name[64];

                        if (vp == p->p_exec)    /* "a.out" */
                                continue;
                        pr_object_name(name, vp, &vattr);
                        if (strcmp(name, comp) == 0)
                                goto out;
                }
        } while ((seg = AS_SEGNEXT(as, seg)) != NULL);

        vp = NULL;
out:
        if (vp != NULL) {
                VN_HOLD(vp);
        }
        AS_LOCK_EXIT(as);
        mutex_enter(&p->p_lock);
        prunlock(dpnp);

        if (vp == NULL)
                prfreenode(pnp);
        else {
                /*
                 * Fill in the prnode so future references will
                 * be able to find the underlying object's vnode.
                 * Don't link this prnode into the list of all
                 * prnodes for the process; this is a one-use node.
                 * Its use is entirely to catch and fail opens for writing.
                 */
                pnp->pr_realvp = vp;
                vp = PTOV(pnp);
        }

        return (vp);
}

/*
 * Find or construct an lwp vnode for the given lwpid.
 */
static vnode_t *
pr_lookup_lwpdir(vnode_t *dp, char *comp)
{
        id_t tid;       /* same type as t->t_tid */
        int want_agent;
        prnode_t *dpnp = VTOP(dp);
        prnode_t *pnp;
        prcommon_t *pcp;
        vnode_t *vp;
        proc_t *p;
        kthread_t *t;
        lwpdir_t *ldp;
        lwpent_t *lep;
        int tslot;
        int c;

        ASSERT(dpnp->pr_type == PR_LWPDIR);

        tid = 0;
        if (strcmp(comp, "agent") == 0)
                want_agent = 1;
        else {
                want_agent = 0;
                while ((c = *comp++) != '\0') {
                        id_t otid;

                        if (c < '0' || c > '9')
                                return (NULL);
                        otid = tid;
                        tid = 10*tid + c - '0';
                        if (tid/10 != otid)     /* integer overflow */
                                return (NULL);
                }
        }

        pnp = prgetnode(dp, PR_LWPIDDIR);

        p = pr_p_lock(dpnp);
        mutex_exit(&pr_pidlock);
        if (p == NULL) {
                prfreenode(pnp);
                return (NULL);
        }

        if (want_agent) {
                if ((t = p->p_agenttp) == NULL)
                        lep = NULL;
                else {
                        tid = t->t_tid;
                        tslot = t->t_dslot;
                        lep = p->p_lwpdir[tslot].ld_entry;
                }
        } else {
                if ((ldp = lwp_hash_lookup(p, tid)) == NULL)
                        lep = NULL;
                else {
                        tslot = (int)(ldp - p->p_lwpdir);
                        lep = ldp->ld_entry;
                }
        }

        if (lep == NULL) {
                prunlock(dpnp);
                prfreenode(pnp);
                return (NULL);
        }

        /*
         * If an lwp vnode already exists and it is not invalid
         * and it was created by the current process and it belongs
         * to the same /proc mount point as our parent vnode, then
         * just use it and discard the newly-allocated prnode.
         */
        for (vp = lep->le_trace; vp != NULL; vp = VTOP(vp)->pr_next) {
                if (!(VTOP(vp)->pr_flags & PR_INVAL) &&
                    VTOP(vp)->pr_owner == curproc &&
                    vp->v_vfsp == dp->v_vfsp) {
                        VN_HOLD(vp);
                        prunlock(dpnp);
                        prfreenode(pnp);
                        return (vp);
                }
        }
        pnp->pr_owner = curproc;

        /*
         * prgetnode() initialized most of the prnode.
         * Finish the job.
         */
        pcp = pnp->pr_common;   /* the newly-allocated prcommon struct */
        if ((vp = lep->le_trace) != NULL) {
                /* discard the new prcommon and use the existing prcommon */
                prfreecommon(pcp);
                pcp = VTOP(vp)->pr_common;
                mutex_enter(&pcp->prc_mutex);
                ASSERT(pcp->prc_refcnt > 0);
                pcp->prc_refcnt++;
                mutex_exit(&pcp->prc_mutex);
                pnp->pr_common = pcp;
        } else {
                /* initialize the new prcommon struct */
                pcp->prc_flags |= PRC_LWP;
                if ((p->p_flag & SSYS) || p->p_as == &kas)
                        pcp->prc_flags |= PRC_SYS;
                if ((t = lep->le_thread) == NULL)
                        pcp->prc_flags |= PRC_DESTROY;
                pcp->prc_proc = p;
                pcp->prc_datamodel = dpnp->pr_pcommon->prc_datamodel;
                pcp->prc_pid = p->p_pid;
                pcp->prc_slot = p->p_slot;
                pcp->prc_thread = t;
                pcp->prc_tid = tid;
                pcp->prc_tslot = tslot;
        }
        pnp->pr_pcommon = dpnp->pr_pcommon;
        pnp->pr_parent = dp;
        VN_HOLD(dp);
        /*
         * Link in the old, invalid directory vnode so we
         * can later determine the last close of the file.
         */
        pnp->pr_next = lep->le_trace;
        lep->le_trace = vp = PTOV(pnp);
        prunlock(dpnp);
        return (vp);
}

static vnode_t *
pr_lookup_lwpiddir(vnode_t *dp, char *comp)
{
        prnode_t *dpnp = VTOP(dp);
        vnode_t *vp;
        prnode_t *pnp;
        proc_t *p;
        prdirent_t *dirp;
        int i;
        enum prnodetype type;

        ASSERT(dpnp->pr_type == PR_LWPIDDIR);

        for (i = 0; i < NLWPIDDIRFILES; i++) {
                /* Skip "." and ".." */
                dirp = &lwpiddir[i+2];
                if (strcmp(comp, dirp->d_name) == 0)
                        break;
        }

        if (i >= NLWPIDDIRFILES)
                return (NULL);

        type = (int)dirp->d_ino;
        pnp = prgetnode(dp, type);

        p = pr_p_lock(dpnp);
        mutex_exit(&pr_pidlock);
        if (p == NULL) {
                prfreenode(pnp);
                return (NULL);
        }
        if (dpnp->pr_common->prc_flags & PRC_DESTROY) {
                /*
                 * Only the lwpsinfo file is present for zombie lwps.
                 * Nothing is present if the lwp has been reaped.
                 */
                if (dpnp->pr_common->prc_tslot == -1 ||
                    type != PR_LWPSINFO) {
                        prunlock(dpnp);
                        prfreenode(pnp);
                        return (NULL);
                }
        }

#if defined(__sparc)
        /* the asrs file exists only for sparc v9 _LP64 processes */
        if (type == PR_ASRS && p->p_model != DATAMODEL_LP64) {
                prunlock(dpnp);
                prfreenode(pnp);
                return (NULL);
        }
#endif

        mutex_enter(&dpnp->pr_mutex);

        if ((vp = dpnp->pr_files[i]) != NULL &&
            !(VTOP(vp)->pr_flags & PR_INVAL)) {
                VN_HOLD(vp);
                mutex_exit(&dpnp->pr_mutex);
                prunlock(dpnp);
                prfreenode(pnp);
                return (vp);
        }

        /*
         * prgetnode() initialized most of the prnode.
         * Finish the job.
         */
        pnp->pr_common = dpnp->pr_common;
        pnp->pr_pcommon = dpnp->pr_pcommon;
        pnp->pr_parent = dp;
        VN_HOLD(dp);
        pnp->pr_index = i;

        dpnp->pr_files[i] = vp = PTOV(pnp);

        /*
         * Link new vnode into list of all /proc vnodes for the process.
         */
        if (vp->v_type == VPROC) {
                pnp->pr_next = p->p_plist;
                p->p_plist = vp;
        }
        mutex_exit(&dpnp->pr_mutex);
        prunlock(dpnp);
        return (vp);
}

/*
 * Lookup one of the process's file vnodes.
 */
static vnode_t *
pr_lookup_fddir(vnode_t *dp, char *comp)
{
        prnode_t *dpnp = VTOP(dp);
        prnode_t *pnp;
        vnode_t *vp = NULL;
        proc_t *p;
        file_t *fp;
        uint_t fd;
        int c;

        ASSERT(dpnp->pr_type == PR_FDDIR);

        fd = 0;
        while ((c = *comp++) != '\0') {
                int ofd;
                if (c < '0' || c > '9')
                        return (NULL);
                ofd = fd;
                fd = 10 * fd + c - '0';
                if (fd / 10 != ofd)     /* integer overflow */
                        return (NULL);
        }

        pnp = prgetnode(dp, PR_FD);

        if (prlock(dpnp, ZNO) != 0) {
                prfreenode(pnp);
                return (NULL);
        }
        p = dpnp->pr_common->prc_proc;
        if ((p->p_flag & SSYS) || p->p_as == &kas) {
                prunlock(dpnp);
                prfreenode(pnp);
                return (NULL);
        }

        if ((fp = pr_getf(p, fd, NULL)) != NULL) {
                pnp->pr_mode = 07111;
                if (fp->f_flag & FREAD)
                        pnp->pr_mode |= 0444;
                if (fp->f_flag & FWRITE)
                        pnp->pr_mode |= 0222;
                vp = fp->f_vnode;
                VN_HOLD(vp);
        }

        prunlock(dpnp);
        if (fp != NULL) {
                pr_releasef(fp);
        }

        if (vp == NULL) {
                prfreenode(pnp);
                return (NULL);
        }

        /*
         * Fill in the prnode so future references will
         * be able to find the underlying object's vnode.
         * Don't link this prnode into the list of all
         * prnodes for the process; this is a one-use node.
         */
        pnp->pr_realvp = vp;
        pnp->pr_parent = dp;            /* needed for prlookup */
        VN_HOLD(dp);
        vp = PTOV(pnp);
        if (pnp->pr_realvp->v_type == VDIR) {
                vp->v_type = VDIR;
                vp->v_flag |= VTRAVERSE;
        }

        return (vp);
}

static vnode_t *
pr_lookup_fdinfodir(vnode_t *dp, char *comp)
{
        prnode_t *dpnp = VTOP(dp);
        prnode_t *pnp;
        vnode_t *vp = NULL;
        proc_t *p;
        uint_t fd;
        int c;

        ASSERT(dpnp->pr_type == PR_FDINFODIR);

        fd = 0;
        while ((c = *comp++) != '\0') {
                int ofd;
                if (c < '0' || c > '9')
                        return (NULL);
                ofd = fd;
                fd = 10 * fd + c - '0';
                if (fd / 10 != ofd)     /* integer overflow */
                        return (NULL);
        }

        pnp = prgetnode(dp, PR_FDINFO);

        if (prlock(dpnp, ZNO) != 0) {
                prfreenode(pnp);
                return (NULL);
        }
        p = dpnp->pr_common->prc_proc;
        if ((p->p_flag & SSYS) || p->p_as == &kas) {
                prunlock(dpnp);
                prfreenode(pnp);
                return (NULL);
        }

        /*
         * Don't link this prnode into the list of all
         * prnodes for the process; this is a one-use node.
         * Unlike the FDDIR case, the underlying vnode is not stored in
         * pnp->pr_realvp. Instead, the fd number is stored in pnp->pr_index
         * and used by pr_read_fdinfo() to return information for the right
         * file descriptor.
         */
        pnp->pr_common = dpnp->pr_common;
        pnp->pr_pcommon = dpnp->pr_pcommon;
        pnp->pr_parent = dp;
        pnp->pr_index = fd;
        VN_HOLD(dp);
        prunlock(dpnp);
        vp = PTOV(pnp);

        return (vp);
}

static vnode_t *
pr_lookup_pathdir(vnode_t *dp, char *comp)
{
        prnode_t *dpnp = VTOP(dp);
        prnode_t *pnp;
        vnode_t *vp = NULL;
        proc_t *p;
        uint_t fd, flags = 0;
        int c;
        uf_entry_t *ufp;
        uf_info_t *fip;
        enum { NAME_FD, NAME_OBJECT, NAME_ROOT, NAME_CWD, NAME_UNKNOWN } type;
        char *tmp;
        int idx;
        struct seg *seg;
        struct as *as = NULL;
        vattr_t vattr;

        ASSERT(dpnp->pr_type == PR_PATHDIR);

        /*
         * First, check if this is a numeric entry, in which case we have a
         * file descriptor.
         */
        fd = 0;
        type = NAME_FD;
        tmp = comp;
        while ((c = *tmp++) != '\0') {
                int ofd;
                if (c < '0' || c > '9') {
                        type = NAME_UNKNOWN;
                        break;
                }
                ofd = fd;
                fd = 10*fd + c - '0';
                if (fd/10 != ofd) {     /* integer overflow */
                        type = NAME_UNKNOWN;
                        break;
                }
        }

        /*
         * Next, see if it is one of the special values {root, cwd}.
         */
        if (type == NAME_UNKNOWN) {
                if (strcmp(comp, "root") == 0)
                        type = NAME_ROOT;
                else if (strcmp(comp, "cwd") == 0)
                        type = NAME_CWD;
        }

        /*
         * Grab the necessary data from the process
         */
        if (prlock(dpnp, ZNO) != 0)
                return (NULL);
        p = dpnp->pr_common->prc_proc;

        fip = P_FINFO(p);

        switch (type) {
        case NAME_ROOT:
                if ((vp = PTOU(p)->u_rdir) == NULL)
                        vp = p->p_zone->zone_rootvp;
                VN_HOLD(vp);
                break;
        case NAME_CWD:
                vp = PTOU(p)->u_cdir;
                VN_HOLD(vp);
                break;
        default:
                if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
                        prunlock(dpnp);
                        return (NULL);
                }
        }
        mutex_exit(&p->p_lock);

        /*
         * Determine if this is an object entry
         */
        if (type == NAME_UNKNOWN) {
                /*
                 * Start with the inode index immediately after the number of
                 * files.
                 */
                mutex_enter(&fip->fi_lock);
                idx = fip->fi_nfiles + 4;
                mutex_exit(&fip->fi_lock);

                if (strcmp(comp, "a.out") == 0) {
                        if (p->p_execdir != NULL) {
                                vp = p->p_execdir;
                                VN_HOLD(vp);
                                type = NAME_OBJECT;
                                flags |= PR_AOUT;
                        } else {
                                vp = p->p_exec;
                                VN_HOLD(vp);
                                type = NAME_OBJECT;
                        }
                } else {
                        AS_LOCK_ENTER(as, RW_READER);
                        if ((seg = AS_SEGFIRST(as)) != NULL) {
                                do {
                                        /*
                                         * Manufacture a filename for the
                                         * "object" directory.
                                         */
                                        vattr.va_mask = AT_FSID|AT_NODEID;
                                        if (seg->s_ops == &segvn_ops &&
                                            SEGOP_GETVP(seg, seg->s_base, &vp)
                                            == 0 &&
                                            vp != NULL && vp->v_type == VREG &&
                                            VOP_GETATTR(vp, &vattr, 0, CRED(),
                                            NULL) == 0) {
                                                char name[64];

                                                if (vp == p->p_exec)
                                                        continue;
                                                idx++;
                                                pr_object_name(name, vp,
                                                    &vattr);
                                                if (strcmp(name, comp) == 0)
                                                        break;
                                        }
                                } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
                        }

                        if (seg == NULL) {
                                vp = NULL;
                        } else {
                                VN_HOLD(vp);
                                type = NAME_OBJECT;
                        }

                        AS_LOCK_EXIT(as);
                }
        }


        switch (type) {
        case NAME_FD:
                mutex_enter(&fip->fi_lock);
                if (fd < fip->fi_nfiles) {
                        UF_ENTER(ufp, fip, fd);
                        if (ufp->uf_file != NULL) {
                                vp = ufp->uf_file->f_vnode;
                                VN_HOLD(vp);
                        }
                        UF_EXIT(ufp);
                }
                mutex_exit(&fip->fi_lock);
                idx = fd + 4;
                break;
        case NAME_ROOT:
                idx = 2;
                break;
        case NAME_CWD:
                idx = 3;
                break;
        case NAME_OBJECT:
        case NAME_UNKNOWN:
                /* Nothing to do */
                break;
        }

        mutex_enter(&p->p_lock);
        prunlock(dpnp);

        if (vp != NULL) {
                pnp = prgetnode(dp, PR_PATH);

                pnp->pr_flags |= flags;
                pnp->pr_common = dpnp->pr_common;
                pnp->pr_pcommon = dpnp->pr_pcommon;
                pnp->pr_realvp = vp;
                pnp->pr_parent = dp;            /* needed for prlookup */
                pnp->pr_ino = pmkino(idx, dpnp->pr_common->prc_slot, PR_PATH);
                VN_HOLD(dp);
                vp = PTOV(pnp);
                vp->v_type = VLNK;
        }

        return (vp);
}

/*
 * Look up one of the process's active templates.
 */
static vnode_t *
pr_lookup_tmpldir(vnode_t *dp, char *comp)
{
        prnode_t *dpnp = VTOP(dp);
        prnode_t *pnp;
        vnode_t *vp = NULL;
        proc_t *p;
        int i;

        ASSERT(dpnp->pr_type == PR_TMPLDIR);

        for (i = 0; i < ct_ntypes; i++)
                if (strcmp(comp, ct_types[i]->ct_type_name) == 0)
                        break;
        if (i == ct_ntypes)
                return (NULL);

        pnp = prgetnode(dp, PR_TMPL);

        if (prlock(dpnp, ZNO) != 0) {
                prfreenode(pnp);
                return (NULL);
        }
        p = dpnp->pr_common->prc_proc;
        if ((p->p_flag & SSYS) || p->p_as == &kas ||
            (dpnp->pr_common->prc_flags & (PRC_DESTROY | PRC_LWP)) != PRC_LWP) {
                prunlock(dpnp);
                prfreenode(pnp);
                return (NULL);
        }
        if (ttolwp(dpnp->pr_common->prc_thread)->lwp_ct_active[i] != NULL) {
                pnp->pr_common = dpnp->pr_common;
                pnp->pr_pcommon = dpnp->pr_pcommon;
                pnp->pr_parent = dp;
                pnp->pr_cttype = i;
                VN_HOLD(dp);
                vp = PTOV(pnp);
        } else {
                prfreenode(pnp);
        }
        prunlock(dpnp);

        return (vp);
}

/*
 * Look up one of the contracts owned by the process.
 */
static vnode_t *
pr_lookup_ctdir(vnode_t *dp, char *comp)
{
        prnode_t *dpnp = VTOP(dp);
        prnode_t *pnp;
        vnode_t *vp = NULL;
        proc_t *p;
        id_t id = 0;
        contract_t *ct;
        int c;

        ASSERT(dpnp->pr_type == PR_CTDIR);

        while ((c = *comp++) != '\0') {
                id_t oid;
                if (c < '0' || c > '9')
                        return (NULL);
                oid = id;
                id = 10 * id + c - '0';
                if (id / 10 != oid)     /* integer overflow */
                        return (NULL);
        }

        /*
         * Search all contracts; we'll filter below.
         */
        ct = contract_ptr(id, GLOBAL_ZONEUNIQID);
        if (ct == NULL)
                return (NULL);

        pnp = prgetnode(dp, PR_CT);

        if (prlock(dpnp, ZNO) != 0) {
                prfreenode(pnp);
                contract_rele(ct);
                return (NULL);
        }
        p = dpnp->pr_common->prc_proc;
        /*
         * We only allow lookups of contracts owned by this process, or,
         * if we are zsched and this is a zone's procfs, contracts on
         * stuff in the zone which are held by processes or contracts
         * outside the zone.  (see logic in contract_status_common)
         */
        if ((ct->ct_owner != p) &&
            !(p == VTOZONE(dp)->zone_zsched && ct->ct_state < CTS_ORPHAN &&
            VTOZONE(dp)->zone_uniqid == contract_getzuniqid(ct) &&
            VTOZONE(dp)->zone_uniqid != GLOBAL_ZONEUNIQID &&
            ct->ct_czuniqid == GLOBAL_ZONEUNIQID)) {
                prunlock(dpnp);
                prfreenode(pnp);
                contract_rele(ct);
                return (NULL);
        }
        pnp->pr_common = dpnp->pr_common;
        pnp->pr_pcommon = dpnp->pr_pcommon;
        pnp->pr_contract = ct;
        pnp->pr_parent = dp;
        pnp->pr_ino = pmkino(id, pnp->pr_common->prc_slot, PR_CT);
        VN_HOLD(dp);
        prunlock(dpnp);
        vp = PTOV(pnp);

        return (vp);
}

/*
 * Construct an lwp vnode for the old /proc interface.
 * We stand on our head to make the /proc plumbing correct.
 */
vnode_t *
prlwpnode(prnode_t *pnp, uint_t tid)
{
        char comp[12];
        vnode_t *dp;
        vnode_t *vp;
        prcommon_t *pcp;
        proc_t *p;

        /*
         * Lookup the /proc/<pid>/lwp/<lwpid> directory vnode.
         */
        if (pnp->pr_type == PR_PIDFILE) {
                dp = pnp->pr_parent;            /* /proc/<pid> */
                VN_HOLD(dp);
                vp = pr_lookup_piddir(dp, "lwp");
                VN_RELE(dp);
                if ((dp = vp) == NULL)          /* /proc/<pid>/lwp */
                        return (NULL);
        } else if (pnp->pr_type == PR_LWPIDFILE) {
                dp = pnp->pr_parent;            /* /proc/<pid>/lwp/<lwpid> */
                dp = VTOP(dp)->pr_parent;       /* /proc/<pid>/lwp */
                VN_HOLD(dp);
        } else {
                return (NULL);
        }

        (void) pr_u32tos(tid, comp, sizeof (comp));
        vp = pr_lookup_lwpdir(dp, comp);
        VN_RELE(dp);
        if ((dp = vp) == NULL)
                return (NULL);

        pnp = prgetnode(dp, PR_LWPIDFILE);
        vp = PTOV(pnp);

        /*
         * prgetnode() initialized most of the prnode.
         * Finish the job.
         */
        pcp = VTOP(dp)->pr_common;
        pnp->pr_ino = ptoi(pcp->prc_pid);
        pnp->pr_common = pcp;
        pnp->pr_pcommon = VTOP(dp)->pr_pcommon;
        pnp->pr_parent = dp;
        /*
         * Link new vnode into list of all /proc vnodes for the process.
         */
        p = pr_p_lock(pnp);
        mutex_exit(&pr_pidlock);
        if (p == NULL) {
                VN_RELE(dp);
                prfreenode(pnp);
                vp = NULL;
        } else if (pcp->prc_thread == NULL) {
                prunlock(pnp);
                VN_RELE(dp);
                prfreenode(pnp);
                vp = NULL;
        } else {
                pnp->pr_next = p->p_plist;
                p->p_plist = vp;
                prunlock(pnp);
        }

        return (vp);
}

#if defined(DEBUG)

static  uint32_t nprnode;
static  uint32_t nprcommon;

#define INCREMENT(x)    atomic_inc_32(&x);
#define DECREMENT(x)    atomic_dec_32(&x);

#else

#define INCREMENT(x)
#define DECREMENT(x)

#endif  /* DEBUG */

/*
 * New /proc vnode required; allocate it and fill in most of the fields.
 */
prnode_t *
prgetnode(vnode_t *dp, prnodetype_t type)
{
        prnode_t *pnp;
        prcommon_t *pcp;
        vnode_t *vp;
        ulong_t nfiles;

        INCREMENT(nprnode);
        pnp = kmem_zalloc(sizeof (prnode_t), KM_SLEEP);

        mutex_init(&pnp->pr_mutex, NULL, MUTEX_DEFAULT, NULL);
        pnp->pr_type = type;

        pnp->pr_vnode = vn_alloc(KM_SLEEP);

        vp = PTOV(pnp);
        vp->v_flag = VNOCACHE|VNOMAP|VNOSWAP|VNOMOUNT;
        vn_setops(vp, prvnodeops);
        vp->v_vfsp = dp->v_vfsp;
        vp->v_type = VPROC;
        vp->v_data = (caddr_t)pnp;

        switch (type) {
        case PR_PIDDIR:
        case PR_LWPIDDIR:
                /*
                 * We need a prcommon and a files array for each of these.
                 */
                INCREMENT(nprcommon);

                pcp = kmem_zalloc(sizeof (prcommon_t), KM_SLEEP);
                pcp->prc_refcnt = 1;
                pnp->pr_common = pcp;
                mutex_init(&pcp->prc_mutex, NULL, MUTEX_DEFAULT, NULL);
                cv_init(&pcp->prc_wait, NULL, CV_DEFAULT, NULL);

                nfiles = (type == PR_PIDDIR)? NPIDDIRFILES : NLWPIDDIRFILES;
                pnp->pr_files =
                    kmem_zalloc(nfiles * sizeof (vnode_t *), KM_SLEEP);

                vp->v_type = VDIR;
                /*
                 * Mode should be read-search by all, but we cannot so long
                 * as we must support compatibility mode with old /proc.
                 * Make /proc/<pid> be read by owner only, search by all.
                 * Make /proc/<pid>/lwp/<lwpid> read-search by all.  Also,
                 * set VDIROPEN on /proc/<pid> so it can be opened for writing.
                 */
                if (type == PR_PIDDIR) {
                        /* kludge for old /proc interface */
                        prnode_t *xpnp = prgetnode(dp, PR_PIDFILE);
                        pnp->pr_pidfile = PTOV(xpnp);
                        pnp->pr_mode = 0511;
                        vp->v_flag |= VDIROPEN;
                } else {
                        pnp->pr_mode = 0555;
                }

                break;

        case PR_CURDIR:
        case PR_ROOTDIR:
        case PR_FDDIR:
        case PR_FDINFODIR:
        case PR_OBJECTDIR:
        case PR_PATHDIR:
        case PR_CTDIR:
        case PR_TMPLDIR:
                vp->v_type = VDIR;
                pnp->pr_mode = 0500;    /* read-search by owner only */
                break;

        case PR_CT:
                vp->v_type = VLNK;
                pnp->pr_mode = 0500;    /* read-search by owner only */
                break;

        case PR_PATH:
        case PR_SELF:
                vp->v_type = VLNK;
                pnp->pr_mode = 0777;
                break;

        case PR_LWPDIR:
                vp->v_type = VDIR;
                pnp->pr_mode = 0555;    /* read-search by all */
                break;

        case PR_AS:
        case PR_TMPL:
                pnp->pr_mode = 0600;    /* read-write by owner only */
                break;

        case PR_CTL:
        case PR_LWPCTL:
                pnp->pr_mode = 0200;    /* write-only by owner only */
                break;

        case PR_PIDFILE:
        case PR_LWPIDFILE:
                pnp->pr_mode = 0600;    /* read-write by owner only */
                break;

        case PR_LWPNAME:
                pnp->pr_mode = 0644;    /* readable by all + owner can write */
                break;

        case PR_PSINFO:
        case PR_LPSINFO:
        case PR_LWPSINFO:
        case PR_USAGE:
        case PR_LUSAGE:
        case PR_LWPUSAGE:
                pnp->pr_mode = 0444;    /* read-only by all */
                break;

        default:
                pnp->pr_mode = 0400;    /* read-only by owner only */
                break;
        }
        vn_exists(vp);
        return (pnp);
}

/*
 * Free the storage obtained from prgetnode().
 */
void
prfreenode(prnode_t *pnp)
{
        vnode_t *vp;
        ulong_t nfiles;

        vn_invalid(PTOV(pnp));
        vn_free(PTOV(pnp));
        mutex_destroy(&pnp->pr_mutex);

        switch (pnp->pr_type) {
        case PR_PIDDIR:
                /* kludge for old /proc interface */
                if (pnp->pr_pidfile != NULL) {
                        prfreenode(VTOP(pnp->pr_pidfile));
                        pnp->pr_pidfile = NULL;
                }
                /* FALLTHROUGH */
        case PR_LWPIDDIR:
                /*
                 * We allocated a prcommon and a files array for each of these.
                 */
                prfreecommon(pnp->pr_common);
                nfiles = (pnp->pr_type == PR_PIDDIR)?
                    NPIDDIRFILES : NLWPIDDIRFILES;
                kmem_free(pnp->pr_files, nfiles * sizeof (vnode_t *));
                break;
        default:
                break;
        }
        /*
         * If there is an underlying vnode, be sure
         * to release it after freeing the prnode.
         */
        vp = pnp->pr_realvp;
        kmem_free(pnp, sizeof (*pnp));
        DECREMENT(nprnode);
        if (vp != NULL) {
                VN_RELE(vp);
        }
}

/*
 * Free a prcommon structure, if the reference count reaches zero.
 */
static void
prfreecommon(prcommon_t *pcp)
{
        mutex_enter(&pcp->prc_mutex);
        ASSERT(pcp->prc_refcnt > 0);
        if (--pcp->prc_refcnt != 0)
                mutex_exit(&pcp->prc_mutex);
        else {
                mutex_exit(&pcp->prc_mutex);

                ASSERT(pcp->prc_refcnt == 0);
                ASSERT(pcp->prc_selfopens == 0 && pcp->prc_writers == 0);

                pollhead_clean(&pcp->prc_pollhead);
                mutex_destroy(&pcp->prc_mutex);
                cv_destroy(&pcp->prc_wait);
                kmem_free(pcp, sizeof (prcommon_t));
                DECREMENT(nprcommon);
        }
}

/*
 * Array of readdir functions, indexed by /proc file type.
 */
static int pr_readdir_notdir(), pr_readdir_procdir(), pr_readdir_piddir(),
        pr_readdir_objectdir(), pr_readdir_lwpdir(), pr_readdir_lwpiddir(),
        pr_readdir_fddir(), pr_readdir_fdinfodir(), pr_readdir_pathdir(),
        pr_readdir_tmpldir(), pr_readdir_ctdir();

static int (*pr_readdir_function[PR_NFILES])() = {
        pr_readdir_procdir,     /* /proc                                */
        pr_readdir_notdir,      /* /proc/self                           */
        pr_readdir_piddir,      /* /proc/<pid>                          */
        pr_readdir_notdir,      /* /proc/<pid>/as                       */
        pr_readdir_notdir,      /* /proc/<pid>/ctl                      */
        pr_readdir_notdir,      /* /proc/<pid>/status                   */
        pr_readdir_notdir,      /* /proc/<pid>/lstatus                  */
        pr_readdir_notdir,      /* /proc/<pid>/psinfo                   */
        pr_readdir_notdir,      /* /proc/<pid>/lpsinfo                  */
        pr_readdir_notdir,      /* /proc/<pid>/map                      */
        pr_readdir_notdir,      /* /proc/<pid>/rmap                     */
        pr_readdir_notdir,      /* /proc/<pid>/xmap                     */
        pr_readdir_notdir,      /* /proc/<pid>/cred                     */
        pr_readdir_notdir,      /* /proc/<pid>/sigact                   */
        pr_readdir_notdir,      /* /proc/<pid>/auxv                     */
#if defined(__x86)
        pr_readdir_notdir,      /* /proc/<pid>/ldt                      */
#endif
        pr_readdir_notdir,      /* /proc/<pid>/usage                    */
        pr_readdir_notdir,      /* /proc/<pid>/lusage                   */
        pr_readdir_notdir,      /* /proc/<pid>/pagedata                 */
        pr_readdir_notdir,      /* /proc/<pid>/watch                    */
        pr_readdir_notdir,      /* /proc/<pid>/cwd                      */
        pr_readdir_notdir,      /* /proc/<pid>/root                     */
        pr_readdir_fddir,       /* /proc/<pid>/fd                       */
        pr_readdir_notdir,      /* /proc/<pid>/fd/nn                    */
        pr_readdir_fdinfodir,   /* /proc/<pid>/fdinfo                   */
        pr_readdir_notdir,      /* /proc/<pid>/fdinfo/nn                */
        pr_readdir_objectdir,   /* /proc/<pid>/object                   */
        pr_readdir_notdir,      /* /proc/<pid>/object/xxx               */
        pr_readdir_lwpdir,      /* /proc/<pid>/lwp                      */
        pr_readdir_lwpiddir,    /* /proc/<pid>/lwp/<lwpid>              */
        pr_readdir_notdir,      /* /proc/<pid>/lwp/<lwpid>/lwpctl       */
        pr_readdir_notdir,      /* /proc/<pid>/lwp/<lwpid>/lwpname      */
        pr_readdir_notdir,      /* /proc/<pid>/lwp/<lwpid>/lwpstatus    */
        pr_readdir_notdir,      /* /proc/<pid>/lwp/<lwpid>/lwpsinfo     */
        pr_readdir_notdir,      /* /proc/<pid>/lwp/<lwpid>/lwpusage     */
        pr_readdir_notdir,      /* /proc/<pid>/lwp/<lwpid>/xregs        */
        pr_readdir_tmpldir,     /* /proc/<pid>/lwp/<lwpid>/templates    */
        pr_readdir_notdir,      /* /proc/<pid>/lwp/<lwpid>/templates/<id> */
        pr_readdir_notdir,      /* /proc/<pid>/lwp/<lwpid>/spymaster    */
#if defined(__sparc)
        pr_readdir_notdir,      /* /proc/<pid>/lwp/<lwpid>/gwindows     */
        pr_readdir_notdir,      /* /proc/<pid>/lwp/<lwpid>/asrs         */
#endif
        pr_readdir_notdir,      /* /proc/<pid>/priv                     */
        pr_readdir_pathdir,     /* /proc/<pid>/path                     */
        pr_readdir_notdir,      /* /proc/<pid>/path/xxx                 */
        pr_readdir_ctdir,       /* /proc/<pid>/contracts                */
        pr_readdir_notdir,      /* /proc/<pid>/contracts/<ctid>         */
        pr_readdir_notdir,      /* /proc/<pid>/secflags                 */
        pr_readdir_notdir,      /* old process file                     */
        pr_readdir_notdir,      /* old lwp file                         */
        pr_readdir_notdir,      /* old pagedata file                    */
};

/* ARGSUSED */
static int
prreaddir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp,
    caller_context_t *ct, int flags)
{
        prnode_t *pnp = VTOP(vp);

        ASSERT(pnp->pr_type < PR_NFILES);

        /* XXX - Do we need to pass ct and flags? */
        return (pr_readdir_function[pnp->pr_type](pnp, uiop, eofp));
}

/* ARGSUSED */
static int
pr_readdir_notdir(prnode_t *pnp, uio_t *uiop, int *eofp)
{
        return (ENOTDIR);
}

/* ARGSUSED */
static int
pr_readdir_procdir(prnode_t *pnp, uio_t *uiop, int *eofp)
{
        zoneid_t zoneid;
        gfs_readdir_state_t gstate;
        int error, eof = 0;
        offset_t n;

        ASSERT(pnp->pr_type == PR_PROCDIR);

        zoneid = VTOZONE(PTOV(pnp))->zone_id;

        if ((error = gfs_readdir_init(&gstate, PNSIZ, PRSDSIZE, uiop,
            PRROOTINO, PRROOTINO, 0)) != 0)
                return (error);

        /*
         * Loop until user's request is satisfied or until all processes
         * have been examined.
         */
        while ((error = gfs_readdir_pred(&gstate, uiop, &n)) == 0) {
                uint_t pid;
                int pslot;
                proc_t *p;

                /*
                 * Find next entry.  Skip processes not visible where
                 * this /proc was mounted.
                 */
                mutex_enter(&pidlock);
                while (n < v.v_proc &&
                    ((p = pid_entry(n)) == NULL || p->p_stat == SIDL ||
                    (zoneid != GLOBAL_ZONEID && p->p_zone->zone_id != zoneid) ||
                    secpolicy_basic_procinfo(CRED(), p, curproc) != 0))
                        n++;

                /*
                 * Stop when entire proc table has been examined.
                 */
                if (n >= v.v_proc) {
                        mutex_exit(&pidlock);
                        eof = 1;
                        break;
                }

                ASSERT(p->p_stat != 0);
                pid = p->p_pid;
                pslot = p->p_slot;
                mutex_exit(&pidlock);
                error = gfs_readdir_emitn(&gstate, uiop, n,
                    pmkino(0, pslot, PR_PIDDIR), pid);
                if (error)
                        break;
        }

        return (gfs_readdir_fini(&gstate, error, eofp, eof));
}

/* ARGSUSED */
static int
pr_readdir_piddir(prnode_t *pnp, uio_t *uiop, int *eofp)
{
        int zombie = ((pnp->pr_pcommon->prc_flags & PRC_DESTROY) != 0);
        prdirent_t dirent;
        prdirent_t *dirp;
        offset_t off;
        int error;

        ASSERT(pnp->pr_type == PR_PIDDIR);

        if (uiop->uio_offset < 0 ||
            uiop->uio_offset % sizeof (prdirent_t) != 0 ||
            uiop->uio_resid < sizeof (prdirent_t))
                return (EINVAL);
        if (pnp->pr_pcommon->prc_proc == NULL)
                return (ENOENT);
        if (uiop->uio_offset >= sizeof (piddir))
                goto out;

        /*
         * Loop until user's request is satisfied, omitting some
         * files along the way if the process is a zombie.
         */
        for (dirp = &piddir[uiop->uio_offset / sizeof (prdirent_t)];
            uiop->uio_resid >= sizeof (prdirent_t) &&
            dirp < &piddir[NPIDDIRFILES+2];
            uiop->uio_offset = off + sizeof (prdirent_t), dirp++) {
                off = uiop->uio_offset;
                if (zombie) {
                        switch (dirp->d_ino) {
                        case PR_PIDDIR:
                        case PR_PROCDIR:
                        case PR_PSINFO:
                        case PR_USAGE:
                                break;
                        default:
                                continue;
                        }
                }
                bcopy(dirp, &dirent, sizeof (prdirent_t));
                if (dirent.d_ino == PR_PROCDIR)
                        dirent.d_ino = PRROOTINO;
                else
                        dirent.d_ino = pmkino(0, pnp->pr_pcommon->prc_slot,
                            dirent.d_ino);
                if ((error = uiomove((caddr_t)&dirent, sizeof (prdirent_t),
                    UIO_READ, uiop)) != 0)
                        return (error);
        }
out:
        if (eofp)
                *eofp = (uiop->uio_offset >= sizeof (piddir));
        return (0);
}

static void
rebuild_objdir(struct as *as)
{
        struct seg *seg;
        vnode_t *vp;
        vattr_t vattr;
        vnode_t **dir;
        ulong_t nalloc;
        ulong_t nentries;
        int i, j;
        ulong_t nold, nnew;

        ASSERT(AS_WRITE_HELD(as));

        if (as->a_updatedir == 0 && as->a_objectdir != NULL)
                return;
        as->a_updatedir = 0;

        if ((nalloc = avl_numnodes(&as->a_segtree)) == 0 ||
            (seg = AS_SEGFIRST(as)) == NULL)    /* can't happen? */
                return;

        /*
         * Allocate space for the new object directory.
         * (This is usually about two times too many entries.)
         */
        nalloc = (nalloc + 0xf) & ~0xf;         /* multiple of 16 */
        dir = kmem_zalloc(nalloc * sizeof (vnode_t *), KM_SLEEP);

        /* fill in the new directory with desired entries */
        nentries = 0;
        do {
                vattr.va_mask = AT_FSID|AT_NODEID;
                if (seg->s_ops == &segvn_ops &&
                    SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
                    vp != NULL && vp->v_type == VREG &&
                    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
                        for (i = 0; i < nentries; i++)
                                if (vp == dir[i])
                                        break;
                        if (i == nentries) {
                                ASSERT(nentries < nalloc);
                                dir[nentries++] = vp;
                        }
                }
        } while ((seg = AS_SEGNEXT(as, seg)) != NULL);

        if (as->a_objectdir == NULL) {  /* first time */
                as->a_objectdir = dir;
                as->a_sizedir = nalloc;
                return;
        }

        /*
         * Null out all of the defunct entries in the old directory.
         */
        nold = 0;
        nnew = nentries;
        for (i = 0; i < as->a_sizedir; i++) {
                if ((vp = as->a_objectdir[i]) != NULL) {
                        for (j = 0; j < nentries; j++) {
                                if (vp == dir[j]) {
                                        dir[j] = NULL;
                                        nnew--;
                                        break;
                                }
                        }
                        if (j == nentries)
                                as->a_objectdir[i] = NULL;
                        else
                                nold++;
                }
        }

        if (nold + nnew > as->a_sizedir) {
                /*
                 * Reallocate the old directory to have enough
                 * space for the old and new entries combined.
                 * Round up to the next multiple of 16.
                 */
                ulong_t newsize = (nold + nnew + 0xf) & ~0xf;
                vnode_t **newdir = kmem_zalloc(newsize * sizeof (vnode_t *),
                    KM_SLEEP);
                bcopy(as->a_objectdir, newdir,
                    as->a_sizedir * sizeof (vnode_t *));
                kmem_free(as->a_objectdir, as->a_sizedir * sizeof (vnode_t *));
                as->a_objectdir = newdir;
                as->a_sizedir = newsize;
        }

        /*
         * Move all new entries to the old directory and
         * deallocate the space used by the new directory.
         */
        if (nnew) {
                for (i = 0, j = 0; i < nentries; i++) {
                        if ((vp = dir[i]) == NULL)
                                continue;
                        for (; j < as->a_sizedir; j++) {
                                if (as->a_objectdir[j] != NULL)
                                        continue;
                                as->a_objectdir[j++] = vp;
                                break;
                        }
                }
        }
        kmem_free(dir, nalloc * sizeof (vnode_t *));
}

/*
 * Return the vnode from a slot in the process's object directory.
 * The caller must have locked the process's address space.
 * The only caller is below, in pr_readdir_objectdir().
 */
static vnode_t *
obj_entry(struct as *as, int slot)
{
        ASSERT(AS_LOCK_HELD(as));
        if (as->a_objectdir == NULL)
                return (NULL);
        ASSERT(slot < as->a_sizedir);
        return (as->a_objectdir[slot]);
}

/* ARGSUSED */
static int
pr_readdir_objectdir(prnode_t *pnp, uio_t *uiop, int *eofp)
{
        gfs_readdir_state_t gstate;
        int error, eof = 0;
        offset_t n;
        int pslot;
        size_t objdirsize;
        proc_t *p;
        struct as *as;
        vnode_t *vp;

        ASSERT(pnp->pr_type == PR_OBJECTDIR);

        if ((error = prlock(pnp, ZNO)) != 0)
                return (error);
        p = pnp->pr_common->prc_proc;
        pslot = p->p_slot;

        /*
         * We drop p_lock before grabbing the address space lock
         * in order to avoid a deadlock with the clock thread.
         * The process will not disappear and its address space
         * will not change because it is marked P_PR_LOCK.
         */
        mutex_exit(&p->p_lock);

        if ((error = gfs_readdir_init(&gstate, 64, PRSDSIZE, uiop,
            pmkino(0, pslot, PR_PIDDIR),
            pmkino(0, pslot, PR_OBJECTDIR), 0)) != 0) {
                mutex_enter(&p->p_lock);
                prunlock(pnp);
                return (error);
        }

        if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
                as = NULL;
                objdirsize = 0;
        }

        /*
         * Loop until user's request is satisfied or until
         * all mapped objects have been examined. Cannot hold
         * the address space lock for the following call as
         * gfs_readdir_pred() utimately causes a call to uiomove().
         */
        while ((error = gfs_readdir_pred(&gstate, uiop, &n)) == 0) {
                vattr_t vattr;
                char str[64];

                /*
                 * Set the correct size of the directory just
                 * in case the process has changed it's address
                 * space via mmap/munmap calls.
                 */
                if (as != NULL) {
                        AS_LOCK_ENTER(as, RW_WRITER);
                        if (as->a_updatedir)
                                rebuild_objdir(as);
                        objdirsize = as->a_sizedir;
                }

                /*
                 * Find next object.
                 */
                vattr.va_mask = AT_FSID | AT_NODEID;
                while (n < objdirsize && (((vp = obj_entry(as, n)) == NULL) ||
                    (VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)
                    != 0))) {
                        vattr.va_mask = AT_FSID | AT_NODEID;
                        n++;
                }

                if (as != NULL)
                        AS_LOCK_EXIT(as);

                /*
                 * Stop when all objects have been reported.
                 */
                if (n >= objdirsize) {
                        eof = 1;
                        break;
                }

                if (vp == p->p_exec)
                        (void) strcpy(str, "a.out");
                else
                        pr_object_name(str, vp, &vattr);

                error = gfs_readdir_emit(&gstate, uiop, n, vattr.va_nodeid,
                    str, 0);

                if (error)
                        break;
        }

        mutex_enter(&p->p_lock);
        prunlock(pnp);

        return (gfs_readdir_fini(&gstate, error, eofp, eof));
}

/* ARGSUSED */
static int
pr_readdir_lwpdir(prnode_t *pnp, uio_t *uiop, int *eofp)
{
        gfs_readdir_state_t gstate;
        int error, eof = 0;
        offset_t tslot;
        proc_t *p;
        int pslot;
        lwpdir_t *lwpdir;
        int lwpdirsize;

        ASSERT(pnp->pr_type == PR_LWPDIR);

        p = pr_p_lock(pnp);
        mutex_exit(&pr_pidlock);
        if (p == NULL)
                return (ENOENT);
        ASSERT(p == pnp->pr_common->prc_proc);
        pslot = p->p_slot;
        lwpdir = p->p_lwpdir;
        lwpdirsize = p->p_lwpdir_sz;

        /*
         * Drop p->p_lock so we can safely do uiomove().
         * The lwp directory will not change because
         * we have the process locked with P_PR_LOCK.
         */
        mutex_exit(&p->p_lock);


        if ((error = gfs_readdir_init(&gstate, PLNSIZ, PRSDSIZE, uiop,
            pmkino(0, pslot, PR_PIDDIR),
            pmkino(0, pslot, PR_LWPDIR), 0)) != 0) {
                mutex_enter(&p->p_lock);
                prunlock(pnp);
                return (error);
        }

        /*
         * Loop until user's request is satisfied or until all lwps
         * have been examined.
         */
        while ((error = gfs_readdir_pred(&gstate, uiop, &tslot)) == 0) {
                lwpent_t *lep;
                uint_t tid;

                /*
                 * Find next LWP.
                 */
                while (tslot < lwpdirsize &&
                    ((lep = lwpdir[tslot].ld_entry) == NULL))
                        tslot++;
                /*
                 * Stop when all lwps have been reported.
                 */
                if (tslot >= lwpdirsize) {
                        eof = 1;
                        break;
                }

                tid = lep->le_lwpid;
                error = gfs_readdir_emitn(&gstate, uiop, tslot,
                    pmkino(tslot, pslot, PR_LWPIDDIR), tid);
                if (error)
                        break;
        }

        mutex_enter(&p->p_lock);
        prunlock(pnp);

        return (gfs_readdir_fini(&gstate, error, eofp, eof));
}

/* ARGSUSED */
static int
pr_readdir_lwpiddir(prnode_t *pnp, uio_t *uiop, int *eofp)
{
        prcommon_t *pcp = pnp->pr_common;
        int zombie = ((pcp->prc_flags & PRC_DESTROY) != 0);
        prdirent_t dirent;
        prdirent_t *dirp;
        offset_t off;
        int error;
        int pslot;
        int tslot;

        ASSERT(pnp->pr_type == PR_LWPIDDIR);

        if (uiop->uio_offset < 0 ||
            uiop->uio_offset % sizeof (prdirent_t) != 0 ||
            uiop->uio_resid < sizeof (prdirent_t))
                return (EINVAL);
        if (pcp->prc_proc == NULL || pcp->prc_tslot == -1)
                return (ENOENT);
        if (uiop->uio_offset >= sizeof (lwpiddir))
                goto out;

        /*
         * Loop until user's request is satisfied, omitting some files
         * along the way if the lwp is a zombie and also depending
         * on the data model of the process.
         */
        pslot = pcp->prc_slot;
        tslot = pcp->prc_tslot;
        for (dirp = &lwpiddir[uiop->uio_offset / sizeof (prdirent_t)];
            uiop->uio_resid >= sizeof (prdirent_t) &&
            dirp < &lwpiddir[NLWPIDDIRFILES+2];
            uiop->uio_offset = off + sizeof (prdirent_t), dirp++) {
                off = uiop->uio_offset;
                if (zombie) {
                        switch (dirp->d_ino) {
                        case PR_LWPIDDIR:
                        case PR_LWPDIR:
                        case PR_LWPSINFO:
                                break;
                        default:
                                continue;
                        }
                }
#if defined(__sparc)
                /* the asrs file exists only for sparc v9 _LP64 processes */
                if (dirp->d_ino == PR_ASRS &&
                    pcp->prc_datamodel != DATAMODEL_LP64)
                        continue;
#endif
                bcopy(dirp, &dirent, sizeof (prdirent_t));
                if (dirent.d_ino == PR_LWPDIR)
                        dirent.d_ino = pmkino(0, pslot, dirp->d_ino);
                else
                        dirent.d_ino = pmkino(tslot, pslot, dirp->d_ino);
                if ((error = uiomove((caddr_t)&dirent, sizeof (prdirent_t),
                    UIO_READ, uiop)) != 0)
                        return (error);
        }
out:
        if (eofp)
                *eofp = (uiop->uio_offset >= sizeof (lwpiddir));
        return (0);
}

/*
 * Helper function for reading a directory which lists open file desciptors
 */
static int
pr_readdir_fdlist(prnode_t *pnp, uio_t *uiop, int *eofp,
    prnodetype_t dirtype, prnodetype_t entrytype)
{
        gfs_readdir_state_t gstate;
        int error, eof = 0;
        offset_t n;
        proc_t *p;
        int pslot;
        int fddirsize;
        uf_info_t *fip;

        if ((error = prlock(pnp, ZNO)) != 0)
                return (error);
        p = pnp->pr_common->prc_proc;
        pslot = p->p_slot;
        fip = P_FINFO(p);
        mutex_exit(&p->p_lock);

        if ((error = gfs_readdir_init(&gstate, PLNSIZ, PRSDSIZE, uiop,
            pmkino(0, pslot, PR_PIDDIR), pmkino(0, pslot, dirtype), 0)) != 0) {
                mutex_enter(&p->p_lock);
                prunlock(pnp);
                return (error);
        }

        mutex_enter(&fip->fi_lock);
        if ((p->p_flag & SSYS) || p->p_as == &kas)
                fddirsize = 0;
        else
                fddirsize = fip->fi_nfiles;

        /*
         * Loop until user's request is satisfied or until
         * all file descriptors have been examined.
         */
        while ((error = gfs_readdir_pred(&gstate, uiop, &n)) == 0) {
                /*
                 * Find next fd.
                 */
                while (n < fddirsize && fip->fi_list[n].uf_file == NULL)
                        n++;
                /*
                 * Stop when all fds have been reported.
                 */
                if (n >= fddirsize) {
                        eof = 1;
                        break;
                }

                error = gfs_readdir_emitn(&gstate, uiop, n,
                    pmkino(n, pslot, entrytype), n);
                if (error)
                        break;
        }

        mutex_exit(&fip->fi_lock);
        mutex_enter(&p->p_lock);
        prunlock(pnp);

        return (gfs_readdir_fini(&gstate, error, eofp, eof));
}

static int
pr_readdir_fddir(prnode_t *pnp, uio_t *uiop, int *eofp)
{

        ASSERT(pnp->pr_type == PR_FDDIR);

        return (pr_readdir_fdlist(pnp, uiop, eofp, pnp->pr_type, PR_FD));
}

static int
pr_readdir_fdinfodir(prnode_t *pnp, uio_t *uiop, int *eofp)
{

        ASSERT(pnp->pr_type == PR_FDINFODIR);

        return (pr_readdir_fdlist(pnp, uiop, eofp, pnp->pr_type, PR_FDINFO));
}

/* ARGSUSED */
static int
pr_readdir_pathdir(prnode_t *pnp, uio_t *uiop, int *eofp)
{
        longlong_t bp[DIRENT64_RECLEN(64) / sizeof (longlong_t)];
        dirent64_t *dirent = (dirent64_t *)bp;
        int reclen;
        ssize_t oresid;
        offset_t off, idx;
        int error = 0;
        proc_t *p;
        int fd, obj;
        int pslot;
        int fddirsize;
        uf_info_t *fip;
        struct as *as = NULL;
        size_t objdirsize;
        vattr_t vattr;
        vnode_t *vp;

        ASSERT(pnp->pr_type == PR_PATHDIR);

        if (uiop->uio_offset < 0 ||
            uiop->uio_resid <= 0 ||
            (uiop->uio_offset % PRSDSIZE) != 0)
                return (EINVAL);
        oresid = uiop->uio_resid;
        bzero(bp, sizeof (bp));

        if ((error = prlock(pnp, ZNO)) != 0)
                return (error);
        p = pnp->pr_common->prc_proc;
        fip = P_FINFO(p);
        pslot = p->p_slot;
        mutex_exit(&p->p_lock);

        if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
                as = NULL;
                objdirsize = 0;
        } else {
                AS_LOCK_ENTER(as, RW_WRITER);
                if (as->a_updatedir)
                        rebuild_objdir(as);
                objdirsize = as->a_sizedir;
                AS_LOCK_EXIT(as);
                as = NULL;
        }

        mutex_enter(&fip->fi_lock);
        if ((p->p_flag & SSYS) || p->p_as == &kas)
                fddirsize = 0;
        else
                fddirsize = fip->fi_nfiles;

        for (; uiop->uio_resid > 0; uiop->uio_offset = off + PRSDSIZE) {
                /*
                 * There are 4 special files in the path directory: ".", "..",
                 * "root", and "cwd".  We handle those specially here.
                 */
                off = uiop->uio_offset;
                idx = off / PRSDSIZE;
                if (off == 0) {                         /* "." */
                        dirent->d_ino = pmkino(0, pslot, PR_PATHDIR);
                        dirent->d_name[0] = '.';
                        dirent->d_name[1] = '\0';
                        reclen = DIRENT64_RECLEN(1);
                } else if (idx == 1) {                  /* ".." */
                        dirent->d_ino = pmkino(0, pslot, PR_PIDDIR);
                        dirent->d_name[0] = '.';
                        dirent->d_name[1] = '.';
                        dirent->d_name[2] = '\0';
                        reclen = DIRENT64_RECLEN(2);
                } else if (idx == 2) {                  /* "root" */
                        dirent->d_ino = pmkino(idx, pslot, PR_PATH);
                        (void) strcpy(dirent->d_name, "root");
                        reclen = DIRENT64_RECLEN(4);
                } else if (idx == 3) {                  /* "cwd" */
                        dirent->d_ino = pmkino(idx, pslot, PR_PATH);
                        (void) strcpy(dirent->d_name, "cwd");
                        reclen = DIRENT64_RECLEN(3);
                } else if (idx < 4 + fddirsize) {
                        /*
                         * In this case, we have one of the file descriptors.
                         */
                        fd = idx - 4;
                        if (fip->fi_list[fd].uf_file == NULL)
                                continue;
                        dirent->d_ino = pmkino(idx, pslot, PR_PATH);
                        (void) pr_u32tos(fd, dirent->d_name, PLNSIZ+1);
                        reclen = DIRENT64_RECLEN(PLNSIZ);
                } else if (idx < 4 + fddirsize + objdirsize) {
                        if (fip != NULL) {
                                mutex_exit(&fip->fi_lock);
                                fip = NULL;
                        }

                        /*
                         * We drop p_lock before grabbing the address space lock
                         * in order to avoid a deadlock with the clock thread.
                         * The process will not disappear and its address space
                         * will not change because it is marked P_PR_LOCK.
                         */
                        if (as == NULL) {
                                as = p->p_as;
                                AS_LOCK_ENTER(as, RW_WRITER);
                        }

                        if (as->a_updatedir) {
                                rebuild_objdir(as);
                                objdirsize = as->a_sizedir;
                        }

                        obj = idx - 4 - fddirsize;
                        if ((vp = obj_entry(as, obj)) == NULL)
                                continue;
                        vattr.va_mask = AT_FSID|AT_NODEID;
                        if (VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) != 0)
                                continue;
                        if (vp == p->p_exec)
                                (void) strcpy(dirent->d_name, "a.out");
                        else
                                pr_object_name(dirent->d_name, vp, &vattr);
                        dirent->d_ino = pmkino(idx, pslot, PR_PATH);
                        reclen = DIRENT64_RECLEN(strlen(dirent->d_name));
                } else {
                        break;
                }

                dirent->d_off = uiop->uio_offset + PRSDSIZE;
                dirent->d_reclen = (ushort_t)reclen;
                if (reclen > uiop->uio_resid) {
                        /*
                         * Error if no entries have been returned yet.
                         */
                        if (uiop->uio_resid == oresid)
                                error = EINVAL;
                        break;
                }
                /*
                 * Drop the address space lock to do the uiomove().
                 */
                if (as != NULL)
                        AS_LOCK_EXIT(as);

                error = uiomove((caddr_t)dirent, reclen, UIO_READ, uiop);
                if (as != NULL)
                        AS_LOCK_ENTER(as, RW_WRITER);

                if (error)
                        break;
        }

        if (error == 0 && eofp)
                *eofp = (uiop->uio_offset >= (fddirsize + 2) * PRSDSIZE);

        if (fip != NULL)
                mutex_exit(&fip->fi_lock);
        if (as != NULL)
                AS_LOCK_EXIT(as);
        mutex_enter(&p->p_lock);
        prunlock(pnp);
        return (error);
}

static int
pr_readdir_tmpldir(prnode_t *pnp, uio_t *uiop, int *eofp)
{
        proc_t *p;
        int pslot, tslot;
        gfs_readdir_state_t gstate;
        int error, eof = 0;
        offset_t n;

        ASSERT(pnp->pr_type == PR_TMPLDIR);

        if ((error = prlock(pnp, ZNO)) != 0)
                return (error);
        p = pnp->pr_common->prc_proc;
        pslot = pnp->pr_common->prc_slot;
        tslot = pnp->pr_common->prc_tslot;
        mutex_exit(&p->p_lock);

        if ((error = gfs_readdir_init(&gstate, PRDIRSIZE, PRSDSIZE, uiop,
            pmkino(tslot, pslot, PR_LWPDIR),
            pmkino(tslot, pslot, PR_TMPLDIR), 0)) != 0) {
                mutex_enter(&p->p_lock);
                prunlock(pnp);
                return (error);
        }

        while ((error = gfs_readdir_pred(&gstate, uiop, &n)) == 0) {
                /*
                 * Check for an active template.  Reading a directory's
                 * contents is already racy, so we don't bother taking
                 * any locks.
                 */
                while (n < ct_ntypes &&
                    pnp->pr_common->prc_thread->t_lwp->lwp_ct_active[n] == NULL)
                        n++;
                /*
                 * Stop when all types have been reported.
                 */
                if (n >= ct_ntypes) {
                        eof = 1;
                        break;
                }
                /*
                 * The pmkino invocation below will need to be updated
                 * when we create our fifth contract type.
                 */
                ASSERT(ct_ntypes <= 4);
                error = gfs_readdir_emit(&gstate, uiop, n,
                    pmkino((tslot << 2) | n, pslot, PR_TMPL),
                    ct_types[n]->ct_type_name, 0);
                if (error)
                        break;
        }

        mutex_enter(&p->p_lock);
        prunlock(pnp);

        return (gfs_readdir_fini(&gstate, error, eofp, eof));
}

static int
pr_readdir_ctdir(prnode_t *pnp, uio_t *uiop, int *eofp)
{
        proc_t *p;
        int pslot;
        gfs_readdir_state_t gstate;
        int error, eof = 0;
        offset_t n;
        uint64_t zid;

        ASSERT(pnp->pr_type == PR_CTDIR);

        if ((error = prlock(pnp, ZNO)) != 0)
                return (error);
        p = pnp->pr_common->prc_proc;
        pslot = p->p_slot;
        mutex_exit(&p->p_lock);

        if ((error = gfs_readdir_init(&gstate, PRDIRSIZE, PRSDSIZE, uiop,
            pmkino(0, pslot, PR_PIDDIR), pmkino(0, pslot, PR_CTDIR), 0)) != 0) {
                mutex_enter(&p->p_lock);
                prunlock(pnp);
                return (error);
        }

        zid = VTOZONE(pnp->pr_vnode)->zone_uniqid;
        while ((error = gfs_readdir_pred(&gstate, uiop, &n)) == 0) {
                id_t next = contract_plookup(p, n, zid);
                if (next == -1) {
                        eof = 1;
                        break;
                }
                error = gfs_readdir_emitn(&gstate, uiop, next,
                    pmkino(next, pslot, PR_CT), next);
                if (error)
                        break;
        }

        mutex_enter(&p->p_lock);
        prunlock(pnp);

        return (gfs_readdir_fini(&gstate, error, eofp, eof));
}

/* ARGSUSED */
static int
prfsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
{
        return (0);
}

/*
 * Utility: remove a /proc vnode from a linked list, threaded through pr_next.
 */
static void
pr_list_unlink(vnode_t *pvp, vnode_t **listp)
{
        vnode_t *vp;
        prnode_t *pnp;

        while ((vp = *listp) != NULL) {
                pnp = VTOP(vp);
                if (vp == pvp) {
                        *listp = pnp->pr_next;
                        pnp->pr_next = NULL;
                        break;
                }
                listp = &pnp->pr_next;
        }
}

/* ARGSUSED */
static void
prinactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
{
        prnode_t *pnp = VTOP(vp);
        prnodetype_t type = pnp->pr_type;
        proc_t *p;
        vnode_t *dp;
        vnode_t *ovp = NULL;
        prnode_t *opnp = NULL;

        switch (type) {
        case PR_OBJECT:
        case PR_FD:
        case PR_FDINFO:
        case PR_SELF:
        case PR_PATH:
                /* These are not linked into the usual lists */
                ASSERT(vp->v_count == 1);
                if ((dp = pnp->pr_parent) != NULL)
                        VN_RELE(dp);
                prfreenode(pnp);
                return;
        default:
                break;
        }

        mutex_enter(&pr_pidlock);
        if (pnp->pr_pcommon == NULL)
                p = NULL;
        else if ((p = pnp->pr_pcommon->prc_proc) != NULL)
                mutex_enter(&p->p_lock);
        mutex_enter(&vp->v_lock);

        if (type == PR_PROCDIR || vp->v_count > 1) {
                VN_RELE_LOCKED(vp);
                mutex_exit(&vp->v_lock);
                if (p != NULL)
                        mutex_exit(&p->p_lock);
                mutex_exit(&pr_pidlock);
                return;
        }

        if ((dp = pnp->pr_parent) != NULL) {
                prnode_t *dpnp;

                switch (type) {
                case PR_PIDFILE:
                case PR_LWPIDFILE:
                case PR_OPAGEDATA:
                        break;
                default:
                        dpnp = VTOP(dp);
                        mutex_enter(&dpnp->pr_mutex);
                        if (dpnp->pr_files != NULL &&
                            dpnp->pr_files[pnp->pr_index] == vp)
                                dpnp->pr_files[pnp->pr_index] = NULL;
                        mutex_exit(&dpnp->pr_mutex);
                        break;
                }
                pnp->pr_parent = NULL;
        }

        ASSERT(vp->v_count == 1);

        /*
         * If we allocated an old /proc/pid node, free it too.
         */
        if (pnp->pr_pidfile != NULL) {
                ASSERT(type == PR_PIDDIR);
                ovp = pnp->pr_pidfile;
                opnp = VTOP(ovp);
                ASSERT(opnp->pr_type == PR_PIDFILE);
                pnp->pr_pidfile = NULL;
        }

        mutex_exit(&pr_pidlock);

        if (p != NULL) {
                /*
                 * Remove the vnodes from the lists of
                 * /proc vnodes for the process.
                 */
                int slot;

                switch (type) {
                case PR_PIDDIR:
                        pr_list_unlink(vp, &p->p_trace);
                        break;
                case PR_LWPIDDIR:
                        if ((slot = pnp->pr_common->prc_tslot) != -1) {
                                lwpent_t *lep = p->p_lwpdir[slot].ld_entry;
                                pr_list_unlink(vp, &lep->le_trace);
                        }
                        break;
                default:
                        pr_list_unlink(vp, &p->p_plist);
                        break;
                }
                if (ovp != NULL)
                        pr_list_unlink(ovp, &p->p_plist);
                mutex_exit(&p->p_lock);
        }

        mutex_exit(&vp->v_lock);

        if (type == PR_CT && pnp->pr_contract != NULL) {
                contract_rele(pnp->pr_contract);
                pnp->pr_contract = NULL;
        }

        if (opnp != NULL)
                prfreenode(opnp);
        prfreenode(pnp);
        if (dp != NULL) {
                VN_RELE(dp);
        }
}

/* ARGSUSED */
static int
prseek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
{
        return (0);
}

/*
 * We use the p_execdir member of proc_t to expand the %d token in core file
 * paths (the directory path for the executable that dumped core; see
 * coreadm(8) for details). We'd like gcore(1) to be able to expand %d in
 * the same way as core dumping from the kernel, but there's no convenient
 * and comprehensible way to export the path name for p_execdir. To solve
 * this, we try to find the actual path to the executable that was used. In
 * pr_lookup_pathdir(), we mark the a.out path name vnode with the PR_AOUT
 * flag, and use that here to indicate that more work is needed beyond the
 * call to vnodetopath().
 */
static int
prreadlink_lookup(prnode_t *pnp, char *buf, size_t size, cred_t *cr)
{
        proc_t *p;
        vnode_t *vp, *execvp, *vrootp;
        int ret;
        size_t len;
        dirent64_t *dp;
        size_t dlen = DIRENT64_RECLEN(MAXPATHLEN);
        char *dbuf;

        p = curproc;
        mutex_enter(&p->p_lock);
        if ((vrootp = PTOU(p)->u_rdir) == NULL)
                vrootp = rootdir;
        VN_HOLD(vrootp);
        mutex_exit(&p->p_lock);

        ret = vnodetopath(vrootp, pnp->pr_realvp, buf, size, cr);

        /*
         * If PR_AOUT isn't set, then we looked up the path for the vnode;
         * otherwise, we looked up the path for (what we believe to be) the
         * containing directory.
         */
        if ((pnp->pr_flags & PR_AOUT) == 0) {
                VN_RELE(vrootp);
                return (ret);
        }

        /*
         * Fail if there's a problem locking the process. This will only
         * occur if the process is changing so the information we would
         * report would already be invalid.
         */
        if (prlock(pnp, ZNO) != 0) {
                VN_RELE(vrootp);
                return (EIO);
        }

        p = pnp->pr_common->prc_proc;
        mutex_exit(&p->p_lock);

        execvp = p->p_exec;
        VN_HOLD(execvp);

        /*
         * If our initial lookup of the directory failed, fall back to
         * the path name information for p_exec.
         */
        if (ret != 0) {
                mutex_enter(&p->p_lock);
                prunlock(pnp);
                ret = vnodetopath(vrootp, execvp, buf, size, cr);
                VN_RELE(execvp);
                VN_RELE(vrootp);
                return (ret);
        }

        len = strlen(buf);

        /*
         * We use u_comm as a guess for the last component of the full
         * executable path name. If there isn't going to be enough space
         * we fall back to using the p_exec so that we can have _an_
         * answer even if it's not perfect.
         */
        if (strlen(PTOU(p)->u_comm) + len + 1 < size) {
                buf[len] = '/';
                (void) strcpy(buf + len + 1, PTOU(p)->u_comm);
                mutex_enter(&p->p_lock);
                prunlock(pnp);

                /*
                 * Do a forward lookup of our u_comm guess.
                 */
                if (lookupnameat(buf + len + 1, UIO_SYSSPACE, FOLLOW, NULLVPP,
                    &vp, pnp->pr_realvp) == 0) {
                        if (vn_compare(vp, execvp)) {
                                VN_RELE(vp);
                                VN_RELE(execvp);
                                VN_RELE(vrootp);
                                return (0);
                        }

                        VN_RELE(vp);
                }
        } else {
                mutex_enter(&p->p_lock);
                prunlock(pnp);
        }

        dbuf = kmem_alloc(dlen, KM_SLEEP);

        /*
         * Try to find a matching vnode by iterating through the directory's
         * entries. If that fails, fall back to the path information for
         * p_exec.
         */
        if ((ret = dirfindvp(vrootp, pnp->pr_realvp, execvp, cr, dbuf,
            dlen, &dp)) == 0 && strlen(dp->d_name) + len + 1 < size) {
                buf[len] = '/';
                (void) strcpy(buf + len + 1, dp->d_name);
        } else {
                ret = vnodetopath(vrootp, execvp, buf, size, cr);
        }

        kmem_free(dbuf, dlen);
        VN_RELE(execvp);
        VN_RELE(vrootp);

        return (ret);
}

/* ARGSUSED */
static int
prreadlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ctp)
{
        prnode_t *pnp = VTOP(vp);
        char *buf;
        int ret = EINVAL;
        char idbuf[16];
        int length, rlength;
        contract_t *ct;

        switch (pnp->pr_type) {
        case PR_SELF:
                (void) snprintf(idbuf, sizeof (idbuf), "%d", curproc->p_pid);
                ret = uiomove(idbuf, strlen(idbuf), UIO_READ, uiop);
                break;
        case PR_OBJECT:
        case PR_FD:
        case PR_CURDIR:
        case PR_ROOTDIR:
                if (pnp->pr_realvp->v_type == VDIR)
                        ret = 0;
                break;
        case PR_PATH:
                buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);

                if ((ret = prreadlink_lookup(pnp, buf, MAXPATHLEN, cr)) == 0)
                        ret = uiomove(buf, strlen(buf), UIO_READ, uiop);

                kmem_free(buf, MAXPATHLEN);
                break;
        case PR_CT:
                ASSERT(pnp->pr_contract != NULL);
                ct = pnp->pr_contract;
                length = sizeof (CTFS_ROOT "//") + sizeof (idbuf) +
                    strlen(ct->ct_type->ct_type_name);
                buf = kmem_alloc(length, KM_SLEEP);
                rlength = snprintf(buf, length, CTFS_ROOT "/%s/%d",
                    ct->ct_type->ct_type_name, ct->ct_id);
                ASSERT(rlength < length);
                ret = uiomove(buf, rlength, UIO_READ, uiop);
                kmem_free(buf, length);
                break;
        default:
                break;
        }

        return (ret);
}

/*ARGSUSED2*/
static int
prcmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
{
        prnode_t *pp1, *pp2;

        if (vp1 == vp2)
                return (1);

        if (!vn_matchops(vp1, prvnodeops) || !vn_matchops(vp2, prvnodeops))
                return (0);

        pp1 = VTOP(vp1);
        pp2 = VTOP(vp2);

        if (pp1->pr_type != pp2->pr_type)
                return (0);
        if (pp1->pr_type == PR_PROCDIR)
                return (1);
        if (pp1->pr_ino || pp2->pr_ino)
                return (pp2->pr_ino == pp1->pr_ino);

        if (pp1->pr_common == NULL || pp2->pr_common == NULL)
                return (0);

        return (pp1->pr_common->prc_slot == pp2->pr_common->prc_slot &&
            pp1->pr_common->prc_tslot == pp2->pr_common->prc_tslot);
}

static int
prrealvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
{
        vnode_t *rvp;

        if ((rvp = VTOP(vp)->pr_realvp) != NULL) {
                vp = rvp;
                if (VOP_REALVP(vp, &rvp, ct) == 0)
                        vp = rvp;
        }

        *vpp = vp;
        return (0);
}

/*
 * Return the answer requested to poll().
 * POLLIN, POLLRDNORM, and POLLOUT are recognized as in fs_poll().
 * In addition, these have special meaning for /proc files:
 *      POLLPRI         process or lwp stopped on an event of interest
 *      POLLERR         /proc file descriptor is invalid
 *      POLLHUP         process or lwp has terminated
 */
/*ARGSUSED5*/
static int
prpoll(vnode_t *vp, short events, int anyyet, short *reventsp,
    pollhead_t **phpp, caller_context_t *ct)
{
        prnode_t *pnp = VTOP(vp);
        prcommon_t *pcp = pnp->pr_common;
        pollhead_t *php = &pcp->prc_pollhead;
        proc_t *p;
        short revents;
        int error;
        int lockstate;

        ASSERT(pnp->pr_type < PR_NFILES);

        /*
         * Support for old /proc interface.
         */
        if (pnp->pr_pidfile != NULL) {
                vp = pnp->pr_pidfile;
                pnp = VTOP(vp);
                ASSERT(pnp->pr_type == PR_PIDFILE);
                ASSERT(pnp->pr_common == pcp);
        }

        *reventsp = revents = 0;
        *phpp = (pollhead_t *)NULL;

        if (vp->v_type == VDIR) {
                *reventsp |= POLLNVAL;
                return (0);
        }

        /* avoid deadlock with prnotify() */
        if (pollunlock(&lockstate) != 0) {
                *reventsp = POLLNVAL;
                return (0);
        }

        if ((error = prlock(pnp, ZNO)) != 0) {
                pollrelock(lockstate);
                switch (error) {
                case ENOENT:            /* process or lwp died */
                        *reventsp = POLLHUP;
                        error = 0;
                        break;
                case EAGAIN:            /* invalidated */
                        *reventsp = POLLERR;
                        error = 0;
                        break;
                }
                return (error);
        }

        /*
         * We have the process marked locked (P_PR_LOCK) and we are holding
         * its p->p_lock.  We want to unmark the process but retain
         * exclusive control w.r.t. other /proc controlling processes
         * before reacquiring the polling locks.
         *
         * prunmark() does this for us.  It unmarks the process
         * but retains p->p_lock so we still have exclusive control.
         * We will drop p->p_lock at the end to relinquish control.
         *
         * We cannot call prunlock() at the end to relinquish control
         * because prunlock(), like prunmark(), may drop and reacquire
         * p->p_lock and that would lead to a lock order violation
         * w.r.t. the polling locks we are about to reacquire.
         */
        p = pcp->prc_proc;
        ASSERT(p != NULL);
        prunmark(p);

        pollrelock(lockstate);          /* reacquire dropped poll locks */

        if ((p->p_flag & SSYS) || p->p_as == &kas)
                revents = POLLNVAL;
        else {
                short ev;

                if ((ev = (events & (POLLIN|POLLRDNORM))) != 0)
                        revents |= ev;
                /*
                 * POLLWRNORM (same as POLLOUT) really should not be
                 * used to indicate that the process or lwp stopped.
                 * However, USL chose to use POLLWRNORM rather than
                 * POLLPRI to indicate this, so we just accept either
                 * requested event to indicate stopped.  (grr...)
                 */
                if ((ev = (events & (POLLPRI|POLLOUT|POLLWRNORM))) != 0) {
                        kthread_t *t;

                        if (pcp->prc_flags & PRC_LWP) {
                                t = pcp->prc_thread;
                                ASSERT(t != NULL);
                                thread_lock(t);
                        } else {
                                t = prchoose(p);        /* returns locked t */
                                ASSERT(t != NULL);
                        }

                        if (ISTOPPED(t) || VSTOPPED(t))
                                revents |= ev;
                        thread_unlock(t);
                }
        }

        *reventsp = revents;
        if ((!anyyet && revents == 0) || (events & POLLET)) {
                /*
                 * Arrange to wake up the polling lwp when
                 * the target process/lwp stops or terminates
                 * or when the file descriptor becomes invalid.
                 */
                pcp->prc_flags |= PRC_POLL;
                *phpp = php;
        }
        mutex_exit(&p->p_lock);
        return (0);
}

/* in prioctl.c */
extern int prioctl(vnode_t *, int, intptr_t, int, cred_t *, int *,
        caller_context_t *);

/*
 * /proc vnode operations vector
 */
const fs_operation_def_t pr_vnodeops_template[] = {
        VOPNAME_OPEN,           { .vop_open = propen },
        VOPNAME_CLOSE,          { .vop_close = prclose },
        VOPNAME_READ,           { .vop_read = prread },
        VOPNAME_WRITE,          { .vop_write = prwrite },
        VOPNAME_IOCTL,          { .vop_ioctl = prioctl },
        VOPNAME_GETATTR,        { .vop_getattr = prgetattr },
        VOPNAME_ACCESS,         { .vop_access = praccess },
        VOPNAME_LOOKUP,         { .vop_lookup = prlookup },
        VOPNAME_CREATE,         { .vop_create = prcreate },
        VOPNAME_READDIR,        { .vop_readdir = prreaddir },
        VOPNAME_READLINK,       { .vop_readlink = prreadlink },
        VOPNAME_FSYNC,          { .vop_fsync = prfsync },
        VOPNAME_INACTIVE,       { .vop_inactive = prinactive },
        VOPNAME_SEEK,           { .vop_seek = prseek },
        VOPNAME_CMP,            { .vop_cmp = prcmp },
        VOPNAME_FRLOCK,         { .error = fs_error },
        VOPNAME_REALVP,         { .vop_realvp = prrealvp },
        VOPNAME_POLL,           { .vop_poll = prpoll },
        VOPNAME_DISPOSE,        { .error = fs_error },
        VOPNAME_SHRLOCK,        { .error = fs_error },
        NULL,                   NULL
};