root/sys/kern/kern_prot.c
/*-
 * SPDX-License-Identifier: BSD-3-Clause
 *
 * Copyright (c) 1982, 1986, 1989, 1990, 1991, 1993
 *      The Regents of the University of California.
 * (c) UNIX System Laboratories, Inc.
 * Copyright (c) 2000-2001 Robert N. M. Watson.
 * All rights reserved.
 *
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * System calls related to processes and protection
 */

#include <sys/cdefs.h>
#include "opt_inet.h"
#include "opt_inet6.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/abi_compat.h>
#include <sys/acct.h>
#include <sys/kdb.h>
#include <sys/kernel.h>
#include <sys/libkern.h>
#include <sys/lock.h>
#include <sys/loginclass.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/ptrace.h>
#include <sys/refcount.h>
#include <sys/sx.h>
#include <sys/priv.h>
#include <sys/proc.h>
#ifdef COMPAT_43
#include <sys/sysent.h>
#endif
#include <sys/sysproto.h>
#include <sys/jail.h>
#include <sys/racct.h>
#include <sys/rctl.h>
#include <sys/resourcevar.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/syscallsubr.h>
#include <sys/sysctl.h>

#ifdef MAC
#include <security/mac/mac_syscalls.h>
#endif

#include <vm/uma.h>

#ifdef REGRESSION
FEATURE(regression,
    "Kernel support for interfaces necessary for regression testing (SECURITY RISK!)");
#endif

#include <security/audit/audit.h>
#include <security/mac/mac_framework.h>

static MALLOC_DEFINE(M_CRED, "cred", "credentials");

SYSCTL_NODE(_security, OID_AUTO, bsd, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
    "BSD security policy");

static void crfree_final(struct ucred *cr);

static inline void
groups_check_positive_len(int ngrp)
{
        MPASS2(ngrp >= 0, "negative number of groups");
}
static inline void
groups_check_max_len(int ngrp)
{
        MPASS2(ngrp <= ngroups_max, "too many supplementary groups");
}

static void groups_normalize(int *ngrp, gid_t *groups);
static void crsetgroups_internal(struct ucred *cr, int ngrp,
    const gid_t *groups);

static int cr_canseeotheruids(struct ucred *u1, struct ucred *u2);
static int cr_canseeothergids(struct ucred *u1, struct ucred *u2);
static int cr_canseejailproc(struct ucred *u1, struct ucred *u2);

#ifndef _SYS_SYSPROTO_H_
struct getpid_args {
        int     dummy;
};
#endif
/* ARGSUSED */
int
sys_getpid(struct thread *td, struct getpid_args *uap)
{
        struct proc *p = td->td_proc;

        td->td_retval[0] = p->p_pid;
#if defined(COMPAT_43)
        if (SV_PROC_FLAG(p, SV_AOUT))
                td->td_retval[1] = kern_getppid(td);
#endif
        return (0);
}

#ifndef _SYS_SYSPROTO_H_
struct getppid_args {
        int     dummy;
};
#endif
/* ARGSUSED */
int
sys_getppid(struct thread *td, struct getppid_args *uap)
{

        td->td_retval[0] = kern_getppid(td);
        return (0);
}

int
kern_getppid(struct thread *td)
{
        struct proc *p = td->td_proc;

        return (p->p_oppid);
}

/*
 * Get process group ID; note that POSIX getpgrp takes no parameter.
 */
#ifndef _SYS_SYSPROTO_H_
struct getpgrp_args {
        int     dummy;
};
#endif
int
sys_getpgrp(struct thread *td, struct getpgrp_args *uap)
{
        struct proc *p = td->td_proc;

        PROC_LOCK(p);
        td->td_retval[0] = p->p_pgrp->pg_id;
        PROC_UNLOCK(p);
        return (0);
}

/* Get an arbitrary pid's process group id */
#ifndef _SYS_SYSPROTO_H_
struct getpgid_args {
        pid_t   pid;
};
#endif
int
sys_getpgid(struct thread *td, struct getpgid_args *uap)
{
        struct proc *p;
        int error;

        if (uap->pid == 0) {
                p = td->td_proc;
                PROC_LOCK(p);
        } else {
                p = pfind(uap->pid);
                if (p == NULL)
                        return (ESRCH);
                error = p_cansee(td, p);
                if (error) {
                        PROC_UNLOCK(p);
                        return (error);
                }
        }
        td->td_retval[0] = p->p_pgrp->pg_id;
        PROC_UNLOCK(p);
        return (0);
}

/*
 * Get an arbitrary pid's session id.
 */
#ifndef _SYS_SYSPROTO_H_
struct getsid_args {
        pid_t   pid;
};
#endif
int
sys_getsid(struct thread *td, struct getsid_args *uap)
{

        return (kern_getsid(td, uap->pid));
}

int
kern_getsid(struct thread *td, pid_t pid)
{
        struct proc *p;
        int error;

        if (pid == 0) {
                p = td->td_proc;
                PROC_LOCK(p);
        } else {
                p = pfind(pid);
                if (p == NULL)
                        return (ESRCH);
                error = p_cansee(td, p);
                if (error) {
                        PROC_UNLOCK(p);
                        return (error);
                }
        }
        td->td_retval[0] = p->p_session->s_sid;
        PROC_UNLOCK(p);
        return (0);
}

#ifndef _SYS_SYSPROTO_H_
struct getuid_args {
        int     dummy;
};
#endif
/* ARGSUSED */
int
sys_getuid(struct thread *td, struct getuid_args *uap)
{

        td->td_retval[0] = td->td_ucred->cr_ruid;
#if defined(COMPAT_43)
        td->td_retval[1] = td->td_ucred->cr_uid;
#endif
        return (0);
}

#ifndef _SYS_SYSPROTO_H_
struct geteuid_args {
        int     dummy;
};
#endif
/* ARGSUSED */
int
sys_geteuid(struct thread *td, struct geteuid_args *uap)
{

        td->td_retval[0] = td->td_ucred->cr_uid;
        return (0);
}

#ifndef _SYS_SYSPROTO_H_
struct getgid_args {
        int     dummy;
};
#endif
/* ARGSUSED */
int
sys_getgid(struct thread *td, struct getgid_args *uap)
{

        td->td_retval[0] = td->td_ucred->cr_rgid;
#if defined(COMPAT_43)
        td->td_retval[1] = td->td_ucred->cr_gid;
#endif
        return (0);
}

#ifndef _SYS_SYSPROTO_H_
struct getegid_args {
        int     dummy;
};
#endif
/* ARGSUSED */
int
sys_getegid(struct thread *td, struct getegid_args *uap)
{

        td->td_retval[0] = td->td_ucred->cr_gid;
        return (0);
}

#ifdef COMPAT_FREEBSD14
int
freebsd14_getgroups(struct thread *td, struct freebsd14_getgroups_args *uap)
{
        struct ucred *cred;
        int ngrp, error;

        cred = td->td_ucred;

        /*
         * For FreeBSD < 15.0, we account for the egid being placed at the
         * beginning of the group list prior to all supplementary groups.
         */
        ngrp = cred->cr_ngroups + 1;
        if (uap->gidsetsize == 0) {
                error = 0;
                goto out;
        } else if (uap->gidsetsize < ngrp) {
                return (EINVAL);
        }

        error = copyout(&cred->cr_gid, uap->gidset, sizeof(gid_t));
        if (error == 0)
                error = copyout(cred->cr_groups, uap->gidset + 1,
                    (ngrp - 1) * sizeof(gid_t));

out:
        td->td_retval[0] = ngrp;
        return (error);

}
#endif  /* COMPAT_FREEBSD14 */

#ifndef _SYS_SYSPROTO_H_
struct getgroups_args {
        int     gidsetsize;
        gid_t   *gidset;
};
#endif
int
sys_getgroups(struct thread *td, struct getgroups_args *uap)
{
        struct ucred *cred;
        int ngrp, error;

        cred = td->td_ucred;

        ngrp = cred->cr_ngroups;
        if (uap->gidsetsize == 0) {
                error = 0;
                goto out;
        }
        if (uap->gidsetsize < ngrp)
                return (EINVAL);

        error = copyout(cred->cr_groups, uap->gidset, ngrp * sizeof(gid_t));
out:
        td->td_retval[0] = ngrp;
        return (error);
}

#ifndef _SYS_SYSPROTO_H_
struct setsid_args {
        int     dummy;
};
#endif
/* ARGSUSED */
int
sys_setsid(struct thread *td, struct setsid_args *uap)
{
        struct pgrp *pgrp;
        int error;
        struct proc *p = td->td_proc;
        struct pgrp *newpgrp;
        struct session *newsess;

        pgrp = NULL;

        newpgrp = uma_zalloc(pgrp_zone, M_WAITOK);
        newsess = malloc(sizeof(struct session), M_SESSION, M_WAITOK | M_ZERO);

again:
        error = 0;
        sx_xlock(&proctree_lock);

        if (p->p_pgid == p->p_pid || (pgrp = pgfind(p->p_pid)) != NULL) {
                if (pgrp != NULL)
                        PGRP_UNLOCK(pgrp);
                error = EPERM;
        } else {
                error = enterpgrp(p, p->p_pid, newpgrp, newsess);
                if (error == ERESTART)
                        goto again;
                MPASS(error == 0);
                td->td_retval[0] = p->p_pid;
                newpgrp = NULL;
                newsess = NULL;
        }

        sx_xunlock(&proctree_lock);

        uma_zfree(pgrp_zone, newpgrp);
        free(newsess, M_SESSION);

        return (error);
}

/*
 * set process group (setpgid/old setpgrp)
 *
 * caller does setpgid(targpid, targpgid)
 *
 * pid must be caller or child of caller (ESRCH)
 * if a child
 *      pid must be in same session (EPERM)
 *      pid can't have done an exec (EACCES)
 * if pgid != pid
 *      there must exist some pid in same session having pgid (EPERM)
 * pid must not be session leader (EPERM)
 */
#ifndef _SYS_SYSPROTO_H_
struct setpgid_args {
        int     pid;            /* target process id */
        int     pgid;           /* target pgrp id */
};
#endif
/* ARGSUSED */
int
sys_setpgid(struct thread *td, struct setpgid_args *uap)
{
        struct proc *curp = td->td_proc;
        struct proc *targp;     /* target process */
        struct pgrp *pgrp;      /* target pgrp */
        int error;
        struct pgrp *newpgrp;

        if (uap->pgid < 0)
                return (EINVAL);

        newpgrp = uma_zalloc(pgrp_zone, M_WAITOK);

again:
        error = 0;

        sx_xlock(&proctree_lock);
        if (uap->pid != 0 && uap->pid != curp->p_pid) {
                if ((targp = pfind(uap->pid)) == NULL) {
                        error = ESRCH;
                        goto done;
                }
                if (!inferior(targp)) {
                        PROC_UNLOCK(targp);
                        error = ESRCH;
                        goto done;
                }
                if ((error = p_cansee(td, targp))) {
                        PROC_UNLOCK(targp);
                        goto done;
                }
                if (targp->p_pgrp == NULL ||
                    targp->p_session != curp->p_session) {
                        PROC_UNLOCK(targp);
                        error = EPERM;
                        goto done;
                }
                if (targp->p_flag & P_EXEC) {
                        PROC_UNLOCK(targp);
                        error = EACCES;
                        goto done;
                }
                PROC_UNLOCK(targp);
        } else
                targp = curp;
        if (SESS_LEADER(targp)) {
                error = EPERM;
                goto done;
        }
        if (uap->pgid == 0)
                uap->pgid = targp->p_pid;
        if ((pgrp = pgfind(uap->pgid)) == NULL) {
                if (uap->pgid == targp->p_pid) {
                        error = enterpgrp(targp, uap->pgid, newpgrp,
                            NULL);
                        if (error == 0)
                                newpgrp = NULL;
                } else
                        error = EPERM;
        } else {
                if (pgrp == targp->p_pgrp) {
                        PGRP_UNLOCK(pgrp);
                        goto done;
                }
                if (pgrp->pg_id != targp->p_pid &&
                    pgrp->pg_session != curp->p_session) {
                        PGRP_UNLOCK(pgrp);
                        error = EPERM;
                        goto done;
                }
                PGRP_UNLOCK(pgrp);
                error = enterthispgrp(targp, pgrp);
        }
done:
        KASSERT(error == 0 || newpgrp != NULL,
            ("setpgid failed and newpgrp is NULL"));
        if (error == ERESTART)
                goto again;
        sx_xunlock(&proctree_lock);
        uma_zfree(pgrp_zone, newpgrp);
        return (error);
}

static int
gidp_cmp(const void *p1, const void *p2)
{
        const gid_t g1 = *(const gid_t *)p1;
        const gid_t g2 = *(const gid_t *)p2;

        return ((g1 > g2) - (g1 < g2));
}

/*
 * 'smallgroups' must be an (uninitialized) array of length CRED_SMALLGROUPS_NB.
 * Always sets 'sc_supp_groups', either to a valid kernel-space groups array
 * (which may or may not be 'smallgroups'), or NULL if SETCREDF_SUPP_GROUPS was
 * not specified or there are too many groups, or a buffer containing garbage on
 * copyin() failure.  In the last two cases, 'sc_supp_groups_nb' is additionally
 * set to 0 as a security measure.  'sc_supp_groups' must be freed (M_TEMP) if
 * not equal to 'smallgroups' even on failure.
 */
static int
user_setcred_copyin_supp_groups(struct setcred *const wcred,
    const u_int flags, gid_t *const smallgroups)
{
        gid_t *groups;
        int error;

        if ((flags & SETCREDF_SUPP_GROUPS) == 0) {
                error = 0;
                goto reset_groups_exit;
        }

        /*
         * Check the number of groups' limit right now in order to limit the
         * amount of bytes to copy.
         */
        if (wcred->sc_supp_groups_nb > ngroups_max) {
                error = EINVAL;
                goto reset_groups_exit;
        }

        groups = wcred->sc_supp_groups_nb <= CRED_SMALLGROUPS_NB ?
            smallgroups : malloc(wcred->sc_supp_groups_nb * sizeof(gid_t),
            M_TEMP, M_WAITOK);
        error = copyin(wcred->sc_supp_groups, groups,
            wcred->sc_supp_groups_nb * sizeof(gid_t));
        wcred->sc_supp_groups = groups;

        if (error != 0) {
                wcred->sc_supp_groups_nb = 0;
                /*
                 * 'sc_supp_groups' must be freed by caller if not
                 * 'smallgroups'.
                 */
                return (error);
        }

        return (0);

reset_groups_exit:
        wcred->sc_supp_groups_nb = 0;
        wcred->sc_supp_groups = NULL;
        return (error);
}

int
user_setcred(struct thread *td, const u_int flags, struct setcred *const wcred)
{
#ifdef MAC
        struct mac mac;
        /* Pointer to 'struct mac' or 'struct mac32'. */
        void *umac;
#endif
        gid_t smallgroups[CRED_SMALLGROUPS_NB];
        int error;

        /*
         * As the only point of this wrapper function is to copyin() from
         * userland, we only interpret the data pieces we need to perform this
         * operation and defer further sanity checks to kern_setcred(), except
         * that we redundantly check here that no unknown flags have been
         * passed.
         */
        if ((flags & ~SETCREDF_MASK) != 0)
                return (EINVAL);

#ifdef MAC
        umac = wcred->sc_label;
#endif
        /* Also done on !MAC as a defensive measure. */
        wcred->sc_label = NULL;

        /*
         * Copy supplementary groups as needed.  There is no specific
         * alternative for 32-bit compatibility as 'gid_t' has the same size
         * everywhere.
         */
        error = user_setcred_copyin_supp_groups(wcred, flags, smallgroups);
        if (error != 0)
                goto free_groups;

#ifdef MAC
        if ((flags & SETCREDF_MAC_LABEL) != 0) {
                error = mac_label_copyin(umac, &mac, NULL);
                if (error != 0)
                        goto free_groups;
                wcred->sc_label = &mac;
        }
#endif

        error = kern_setcred(td, flags, wcred);

#ifdef MAC
        if (wcred->sc_label != NULL)
                free_copied_label(wcred->sc_label);
#endif

free_groups:
        if (wcred->sc_supp_groups != smallgroups)
                free(wcred->sc_supp_groups, M_TEMP);

        return (error);
}

#ifndef _SYS_SYSPROTO_H_
struct setcred_args {
        u_int                    flags; /* Flags. */
        const struct setcred    *wcred;
        size_t                   size;  /* Passed 'setcred' structure length. */
};
#endif
/* ARGSUSED */
int
sys_setcred(struct thread *td, struct setcred_args *uap)
{
        struct setcred wcred;
        int error;

        if (uap->size != sizeof(wcred))
                return (EINVAL);
        error = copyin(uap->wcred, &wcred, sizeof(wcred));
        if (error != 0)
                return (error);
        return (user_setcred(td, uap->flags, &wcred));
}

/*
 * CAUTION: This function normalizes groups in 'wcred'.
 */
int
kern_setcred(struct thread *const td, const u_int flags,
    struct setcred *const wcred)
{
        struct proc *const p = td->td_proc;
        struct ucred *new_cred, *old_cred, *to_free_cred = NULL;
        struct uidinfo *uip = NULL, *ruip = NULL;
#ifdef MAC
        void *mac_set_proc_data = NULL;
        bool proc_label_set = false;
#endif
        int error;
        bool cred_set = false;

        /* Bail out on unrecognized flags. */
        if (flags & ~SETCREDF_MASK)
                return (EINVAL);

        /*
         * Part 1: We allocate and perform preparatory operations with no locks.
         */

        if ((flags & SETCREDF_SUPP_GROUPS) != 0 &&
            wcred->sc_supp_groups_nb > ngroups_max)
                        return (EINVAL);

        if (flags & SETCREDF_MAC_LABEL) {
#ifdef MAC
                error = mac_set_proc_prepare(td, wcred->sc_label,
                    &mac_set_proc_data);
                if (error != 0)
                        return (error);
#else
                return (ENOTSUP);
#endif
        }

        if (flags & SETCREDF_UID) {
                AUDIT_ARG_EUID(wcred->sc_uid);
                uip = uifind(wcred->sc_uid);
        }
        if (flags & SETCREDF_RUID) {
                AUDIT_ARG_RUID(wcred->sc_ruid);
                ruip = uifind(wcred->sc_ruid);
        }
        if (flags & SETCREDF_SVUID)
                AUDIT_ARG_SUID(wcred->sc_svuid);

        if (flags & SETCREDF_GID)
                AUDIT_ARG_EGID(wcred->sc_gid);
        if (flags & SETCREDF_RGID)
                AUDIT_ARG_RGID(wcred->sc_rgid);
        if (flags & SETCREDF_SVGID)
                AUDIT_ARG_SGID(wcred->sc_svgid);
        if (flags & SETCREDF_SUPP_GROUPS) {
                /*
                 * Output the raw supplementary groups array for better
                 * traceability.
                 */
                AUDIT_ARG_GROUPSET(wcred->sc_supp_groups,
                    wcred->sc_supp_groups_nb);
                groups_normalize(&wcred->sc_supp_groups_nb,
                    wcred->sc_supp_groups);
        }

        /*
         * We first completely build the new credentials and only then pass them
         * to MAC along with the old ones so that modules can check whether the
         * requested transition is allowed.
         */
        new_cred = crget();
        to_free_cred = new_cred;
        if (flags & SETCREDF_SUPP_GROUPS)
                crextend(new_cred, wcred->sc_supp_groups_nb);

#ifdef MAC
        mac_cred_setcred_enter();
#endif

        /*
         * Part 2: We grab the process lock as to have a stable view of its
         * current credentials, and prepare a copy of them with the requested
         * changes applied under that lock.
         */

        PROC_LOCK(p);
        old_cred = crcopysafe(p, new_cred);

        /*
         * Change user IDs.
         */
        if (flags & SETCREDF_UID)
                change_euid(new_cred, uip);
        if (flags & SETCREDF_RUID)
                change_ruid(new_cred, ruip);
        if (flags & SETCREDF_SVUID)
                change_svuid(new_cred, wcred->sc_svuid);

        /*
         * Change groups.
         */
        if (flags & SETCREDF_SUPP_GROUPS)
                crsetgroups_internal(new_cred, wcred->sc_supp_groups_nb,
                    wcred->sc_supp_groups);
        if (flags & SETCREDF_GID)
                change_egid(new_cred, wcred->sc_gid);
        if (flags & SETCREDF_RGID)
                change_rgid(new_cred, wcred->sc_rgid);
        if (flags & SETCREDF_SVGID)
                change_svgid(new_cred, wcred->sc_svgid);

#ifdef MAC
        /*
         * Change the MAC label.
         */
        if (flags & SETCREDF_MAC_LABEL) {
                error = mac_set_proc_core(td, new_cred, mac_set_proc_data);
                if (error != 0)
                        goto unlock_finish;
                proc_label_set = true;
        }

        /*
         * MAC security modules checks.
         */
        error = mac_cred_check_setcred(flags, old_cred, new_cred);
        if (error != 0)
                goto unlock_finish;
#endif
        /*
         * Privilege check.
         */
        error = priv_check_cred(old_cred, PRIV_CRED_SETCRED);
        if (error != 0)
                goto unlock_finish;

#ifdef RACCT
        /*
         * Hold a reference to 'new_cred', as we need to call some functions on
         * it after proc_set_cred_enforce_proc_lim().
         */
        crhold(new_cred);
#endif

        /* Set the new credentials. */
        cred_set = proc_set_cred_enforce_proc_lim(p, new_cred);
        if (cred_set) {
                setsugid(p);
#ifdef RACCT
                /* Adjust RACCT counters. */
                racct_proc_ucred_changed(p, old_cred, new_cred);
#endif
                to_free_cred = old_cred;
                MPASS(error == 0);
        } else {
#ifdef RACCT
                /* Matches the crhold() just before the containing 'if'. */
                crfree(new_cred);
#endif
                error = EAGAIN;
        }

unlock_finish:
        PROC_UNLOCK(p);

        /*
         * Part 3: After releasing the process lock, we perform cleanups and
         * finishing operations.
         */

#ifdef RACCT
        if (cred_set) {
#ifdef RCTL
                rctl_proc_ucred_changed(p, new_cred);
#endif
                /* Paired with the crhold() above. */
                crfree(new_cred);
        }
#endif

#ifdef MAC
        if (mac_set_proc_data != NULL)
                mac_set_proc_finish(td, proc_label_set, mac_set_proc_data);
        mac_cred_setcred_exit();
#endif
        crfree(to_free_cred);
        if (uip != NULL)
                uifree(uip);
        if (ruip != NULL)
                uifree(ruip);

        return (error);
}

/*
 * Use the clause in B.4.2.2 that allows setuid/setgid to be 4.2/4.3BSD
 * compatible.  It says that setting the uid/gid to euid/egid is a special
 * case of "appropriate privilege".  Once the rules are expanded out, this
 * basically means that setuid(nnn) sets all three id's, in all permitted
 * cases unless _POSIX_SAVED_IDS is enabled.  In that case, setuid(getuid())
 * does not set the saved id - this is dangerous for traditional BSD
 * programs.  For this reason, we *really* do not want to set
 * _POSIX_SAVED_IDS and do not want to clear POSIX_APPENDIX_B_4_2_2.
 */
#define POSIX_APPENDIX_B_4_2_2

#ifndef _SYS_SYSPROTO_H_
struct setuid_args {
        uid_t   uid;
};
#endif
/* ARGSUSED */
int
sys_setuid(struct thread *td, struct setuid_args *uap)
{
        struct proc *p = td->td_proc;
        struct ucred *newcred, *oldcred;
        uid_t uid;
        struct uidinfo *uip;
        int error;

        uid = uap->uid;
        AUDIT_ARG_UID(uid);
        newcred = crget();
        uip = uifind(uid);
        PROC_LOCK(p);
        /*
         * Copy credentials so other references do not see our changes.
         */
        oldcred = crcopysafe(p, newcred);

#ifdef MAC
        error = mac_cred_check_setuid(oldcred, uid);
        if (error)
                goto fail;
#endif

        /*
         * See if we have "permission" by POSIX 1003.1 rules.
         *
         * Note that setuid(geteuid()) is a special case of
         * "appropriate privileges" in appendix B.4.2.2.  We need
         * to use this clause to be compatible with traditional BSD
         * semantics.  Basically, it means that "setuid(xx)" sets all
         * three id's (assuming you have privs).
         *
         * Notes on the logic.  We do things in three steps.
         * 1: We determine if the euid is going to change, and do EPERM
         *    right away.  We unconditionally change the euid later if this
         *    test is satisfied, simplifying that part of the logic.
         * 2: We determine if the real and/or saved uids are going to
         *    change.  Determined by compile options.
         * 3: Change euid last. (after tests in #2 for "appropriate privs")
         */
        if (uid != oldcred->cr_ruid &&          /* allow setuid(getuid()) */
#ifdef _POSIX_SAVED_IDS
            uid != oldcred->cr_svuid &&         /* allow setuid(saved gid) */
#endif
#ifdef POSIX_APPENDIX_B_4_2_2   /* Use BSD-compat clause from B.4.2.2 */
            uid != oldcred->cr_uid &&           /* allow setuid(geteuid()) */
#endif
            (error = priv_check_cred(oldcred, PRIV_CRED_SETUID)) != 0)
                goto fail;

#ifdef _POSIX_SAVED_IDS
        /*
         * Do we have "appropriate privileges" (are we root or uid == euid)
         * If so, we are changing the real uid and/or saved uid.
         */
        if (
#ifdef POSIX_APPENDIX_B_4_2_2   /* Use the clause from B.4.2.2 */
            uid == oldcred->cr_uid ||
#endif
            /* We are using privs. */
            priv_check_cred(oldcred, PRIV_CRED_SETUID) == 0)
#endif
        {
                /*
                 * Set the real uid.
                 */
                if (uid != oldcred->cr_ruid) {
                        change_ruid(newcred, uip);
                        setsugid(p);
                }
                /*
                 * Set saved uid
                 *
                 * XXX always set saved uid even if not _POSIX_SAVED_IDS, as
                 * the security of seteuid() depends on it.  B.4.2.2 says it
                 * is important that we should do this.
                 */
                if (uid != oldcred->cr_svuid) {
                        change_svuid(newcred, uid);
                        setsugid(p);
                }
        }

        /*
         * In all permitted cases, we are changing the euid.
         */
        if (uid != oldcred->cr_uid) {
                change_euid(newcred, uip);
                setsugid(p);
        }

#ifdef RACCT
        racct_proc_ucred_changed(p, oldcred, newcred);
#endif
#ifdef RCTL
        crhold(newcred);
#endif
        /*
         * Takes over 'newcred''s reference, so 'newcred' must not be used
         * besides this point except on RCTL where we took an additional
         * reference above.
         */
        proc_set_cred(p, newcred);
        PROC_UNLOCK(p);
#ifdef RCTL
        rctl_proc_ucred_changed(p, newcred);
        crfree(newcred);
#endif
        uifree(uip);
        crfree(oldcred);
        return (0);

fail:
        PROC_UNLOCK(p);
        uifree(uip);
        crfree(newcred);
        return (error);
}

#ifndef _SYS_SYSPROTO_H_
struct seteuid_args {
        uid_t   euid;
};
#endif
/* ARGSUSED */
int
sys_seteuid(struct thread *td, struct seteuid_args *uap)
{
        struct proc *p = td->td_proc;
        struct ucred *newcred, *oldcred;
        uid_t euid;
        struct uidinfo *euip;
        int error;

        euid = uap->euid;
        AUDIT_ARG_EUID(euid);
        newcred = crget();
        euip = uifind(euid);
        PROC_LOCK(p);
        /*
         * Copy credentials so other references do not see our changes.
         */
        oldcred = crcopysafe(p, newcred);

#ifdef MAC
        error = mac_cred_check_seteuid(oldcred, euid);
        if (error)
                goto fail;
#endif

        if (euid != oldcred->cr_ruid &&         /* allow seteuid(getuid()) */
            euid != oldcred->cr_svuid &&        /* allow seteuid(saved uid) */
            (error = priv_check_cred(oldcred, PRIV_CRED_SETEUID)) != 0)
                goto fail;

        /*
         * Everything's okay, do it.
         */
        if (oldcred->cr_uid != euid) {
                change_euid(newcred, euip);
                setsugid(p);
        }
        proc_set_cred(p, newcred);
        PROC_UNLOCK(p);
        uifree(euip);
        crfree(oldcred);
        return (0);

fail:
        PROC_UNLOCK(p);
        uifree(euip);
        crfree(newcred);
        return (error);
}

#ifndef _SYS_SYSPROTO_H_
struct setgid_args {
        gid_t   gid;
};
#endif
/* ARGSUSED */
int
sys_setgid(struct thread *td, struct setgid_args *uap)
{
        struct proc *p = td->td_proc;
        struct ucred *newcred, *oldcred;
        gid_t gid;
        int error;

        gid = uap->gid;
        AUDIT_ARG_GID(gid);
        newcred = crget();
        PROC_LOCK(p);
        oldcred = crcopysafe(p, newcred);

#ifdef MAC
        error = mac_cred_check_setgid(oldcred, gid);
        if (error)
                goto fail;
#endif

        /*
         * See if we have "permission" by POSIX 1003.1 rules.
         *
         * Note that setgid(getegid()) is a special case of
         * "appropriate privileges" in appendix B.4.2.2.  We need
         * to use this clause to be compatible with traditional BSD
         * semantics.  Basically, it means that "setgid(xx)" sets all
         * three id's (assuming you have privs).
         *
         * For notes on the logic here, see setuid() above.
         */
        if (gid != oldcred->cr_rgid &&          /* allow setgid(getgid()) */
#ifdef _POSIX_SAVED_IDS
            gid != oldcred->cr_svgid &&         /* allow setgid(saved gid) */
#endif
#ifdef POSIX_APPENDIX_B_4_2_2   /* Use BSD-compat clause from B.4.2.2 */
            gid != oldcred->cr_gid && /* allow setgid(getegid()) */
#endif
            (error = priv_check_cred(oldcred, PRIV_CRED_SETGID)) != 0)
                goto fail;

#ifdef _POSIX_SAVED_IDS
        /*
         * Do we have "appropriate privileges" (are we root or gid == egid)
         * If so, we are changing the real uid and saved gid.
         */
        if (
#ifdef POSIX_APPENDIX_B_4_2_2   /* use the clause from B.4.2.2 */
            gid == oldcred->cr_gid ||
#endif
            /* We are using privs. */
            priv_check_cred(oldcred, PRIV_CRED_SETGID) == 0)
#endif
        {
                /*
                 * Set real gid
                 */
                if (oldcred->cr_rgid != gid) {
                        change_rgid(newcred, gid);
                        setsugid(p);
                }
                /*
                 * Set saved gid
                 *
                 * XXX always set saved gid even if not _POSIX_SAVED_IDS, as
                 * the security of setegid() depends on it.  B.4.2.2 says it
                 * is important that we should do this.
                 */
                if (oldcred->cr_svgid != gid) {
                        change_svgid(newcred, gid);
                        setsugid(p);
                }
        }
        /*
         * In all cases permitted cases, we are changing the egid.
         * Copy credentials so other references do not see our changes.
         */
        if (oldcred->cr_gid != gid) {
                change_egid(newcred, gid);
                setsugid(p);
        }
        proc_set_cred(p, newcred);
        PROC_UNLOCK(p);
        crfree(oldcred);
        return (0);

fail:
        PROC_UNLOCK(p);
        crfree(newcred);
        return (error);
}

#ifndef _SYS_SYSPROTO_H_
struct setegid_args {
        gid_t   egid;
};
#endif
/* ARGSUSED */
int
sys_setegid(struct thread *td, struct setegid_args *uap)
{
        struct proc *p = td->td_proc;
        struct ucred *newcred, *oldcred;
        gid_t egid;
        int error;

        egid = uap->egid;
        AUDIT_ARG_EGID(egid);
        newcred = crget();
        PROC_LOCK(p);
        oldcred = crcopysafe(p, newcred);

#ifdef MAC
        error = mac_cred_check_setegid(oldcred, egid);
        if (error)
                goto fail;
#endif

        if (egid != oldcred->cr_rgid &&         /* allow setegid(getgid()) */
            egid != oldcred->cr_svgid &&        /* allow setegid(saved gid) */
            (error = priv_check_cred(oldcred, PRIV_CRED_SETEGID)) != 0)
                goto fail;

        if (oldcred->cr_gid != egid) {
                change_egid(newcred, egid);
                setsugid(p);
        }
        proc_set_cred(p, newcred);
        PROC_UNLOCK(p);
        crfree(oldcred);
        return (0);

fail:
        PROC_UNLOCK(p);
        crfree(newcred);
        return (error);
}

#ifdef COMPAT_FREEBSD14
int
freebsd14_setgroups(struct thread *td, struct freebsd14_setgroups_args *uap)
{
        gid_t smallgroups[CRED_SMALLGROUPS_NB];
        gid_t *groups;
        int gidsetsize, error;

        /*
         * Before FreeBSD 15.0, we allow one more group to be supplied to
         * account for the egid appearing before the supplementary groups.  This
         * may technically allow one more supplementary group for systems that
         * did use the default NGROUPS_MAX if we round it back up to 1024.
         */
        gidsetsize = uap->gidsetsize;
        if (gidsetsize > ngroups_max + 1 || gidsetsize < 0)
                return (EINVAL);

        if (gidsetsize > CRED_SMALLGROUPS_NB)
                groups = malloc(gidsetsize * sizeof(gid_t), M_TEMP, M_WAITOK);
        else
                groups = smallgroups;

        error = copyin(uap->gidset, groups, gidsetsize * sizeof(gid_t));
        if (error == 0) {
                int ngroups = gidsetsize > 0 ? gidsetsize - 1 /* egid */ : 0;

                error = kern_setgroups(td, &ngroups, groups + 1);
                if (error == 0 && gidsetsize > 0)
                        td->td_proc->p_ucred->cr_gid = groups[0];
        }

        if (groups != smallgroups)
                free(groups, M_TEMP);
        return (error);
}
#endif  /* COMPAT_FREEBSD14 */

#ifndef _SYS_SYSPROTO_H_
struct setgroups_args {
        int     gidsetsize;
        gid_t   *gidset;
};
#endif
/* ARGSUSED */
int
sys_setgroups(struct thread *td, struct setgroups_args *uap)
{
        gid_t smallgroups[CRED_SMALLGROUPS_NB];
        gid_t *groups;
        int gidsetsize, error;

        /*
         * Sanity check size now to avoid passing too big a value to copyin(),
         * even if kern_setgroups() will do it again.
         *
         * Ideally, the 'gidsetsize' argument should have been a 'u_int' (and it
         * was, in this implementation, for a long time), but POSIX standardized
         * getgroups() to take an 'int' and it would be quite entrapping to have
         * setgroups() differ.
         */
        gidsetsize = uap->gidsetsize;
        if (gidsetsize > ngroups_max || gidsetsize < 0)
                return (EINVAL);

        if (gidsetsize > CRED_SMALLGROUPS_NB)
                groups = malloc(gidsetsize * sizeof(gid_t), M_TEMP, M_WAITOK);
        else
                groups = smallgroups;

        error = copyin(uap->gidset, groups, gidsetsize * sizeof(gid_t));
        if (error == 0)
                error = kern_setgroups(td, &gidsetsize, groups);

        if (groups != smallgroups)
                free(groups, M_TEMP);
        return (error);
}

/*
 * CAUTION: This function normalizes 'groups', possibly also changing the value
 * of '*ngrpp' as a consequence.
 */
int
kern_setgroups(struct thread *td, int *ngrpp, gid_t *groups)
{
        struct proc *p = td->td_proc;
        struct ucred *newcred, *oldcred;
        int ngrp, error;

        ngrp = *ngrpp;
        /* Sanity check size. */
        if (ngrp < 0 || ngrp > ngroups_max)
                return (EINVAL);

        AUDIT_ARG_GROUPSET(groups, ngrp);

        groups_normalize(&ngrp, groups);
        *ngrpp = ngrp;

        newcred = crget();
        crextend(newcred, ngrp);
        PROC_LOCK(p);
        oldcred = crcopysafe(p, newcred);

#ifdef MAC
        /*
         * We pass NULL here explicitly if we don't have any supplementary
         * groups mostly for the sake of normalization, but also to avoid/detect
         * a situation where a MAC module has some assumption about the layout
         * of `groups` matching historical behavior.
         */
        error = mac_cred_check_setgroups(oldcred, ngrp,
            ngrp == 0 ? NULL : groups);
        if (error)
                goto fail;
#endif

        error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS);
        if (error)
                goto fail;

        crsetgroups_internal(newcred, ngrp, groups);
        setsugid(p);
        proc_set_cred(p, newcred);
        PROC_UNLOCK(p);
        crfree(oldcred);
        return (0);

fail:
        PROC_UNLOCK(p);
        crfree(newcred);
        return (error);
}

#ifndef _SYS_SYSPROTO_H_
struct setreuid_args {
        uid_t   ruid;
        uid_t   euid;
};
#endif
/* ARGSUSED */
int
sys_setreuid(struct thread *td, struct setreuid_args *uap)
{
        struct proc *p = td->td_proc;
        struct ucred *newcred, *oldcred;
        uid_t euid, ruid;
        struct uidinfo *euip, *ruip;
        int error;

        euid = uap->euid;
        ruid = uap->ruid;
        AUDIT_ARG_EUID(euid);
        AUDIT_ARG_RUID(ruid);
        newcred = crget();
        euip = uifind(euid);
        ruip = uifind(ruid);
        PROC_LOCK(p);
        oldcred = crcopysafe(p, newcred);

#ifdef MAC
        error = mac_cred_check_setreuid(oldcred, ruid, euid);
        if (error)
                goto fail;
#endif

        if (((ruid != (uid_t)-1 && ruid != oldcred->cr_ruid &&
              ruid != oldcred->cr_svuid) ||
             (euid != (uid_t)-1 && euid != oldcred->cr_uid &&
              euid != oldcred->cr_ruid && euid != oldcred->cr_svuid)) &&
            (error = priv_check_cred(oldcred, PRIV_CRED_SETREUID)) != 0)
                goto fail;

        if (euid != (uid_t)-1 && oldcred->cr_uid != euid) {
                change_euid(newcred, euip);
                setsugid(p);
        }
        if (ruid != (uid_t)-1 && oldcred->cr_ruid != ruid) {
                change_ruid(newcred, ruip);
                setsugid(p);
        }
        if ((ruid != (uid_t)-1 || newcred->cr_uid != newcred->cr_ruid) &&
            newcred->cr_svuid != newcred->cr_uid) {
                change_svuid(newcred, newcred->cr_uid);
                setsugid(p);
        }
#ifdef RACCT
        racct_proc_ucred_changed(p, oldcred, newcred);
#endif
#ifdef RCTL
        crhold(newcred);
#endif
        /*
         * Takes over 'newcred''s reference, so 'newcred' must not be used
         * besides this point except on RCTL where we took an additional
         * reference above.
         */
        proc_set_cred(p, newcred);
        PROC_UNLOCK(p);
#ifdef RCTL
        rctl_proc_ucred_changed(p, newcred);
        crfree(newcred);
#endif
        uifree(ruip);
        uifree(euip);
        crfree(oldcred);
        return (0);

fail:
        PROC_UNLOCK(p);
        uifree(ruip);
        uifree(euip);
        crfree(newcred);
        return (error);
}

#ifndef _SYS_SYSPROTO_H_
struct setregid_args {
        gid_t   rgid;
        gid_t   egid;
};
#endif
/* ARGSUSED */
int
sys_setregid(struct thread *td, struct setregid_args *uap)
{
        struct proc *p = td->td_proc;
        struct ucred *newcred, *oldcred;
        gid_t egid, rgid;
        int error;

        egid = uap->egid;
        rgid = uap->rgid;
        AUDIT_ARG_EGID(egid);
        AUDIT_ARG_RGID(rgid);
        newcred = crget();
        PROC_LOCK(p);
        oldcred = crcopysafe(p, newcred);

#ifdef MAC
        error = mac_cred_check_setregid(oldcred, rgid, egid);
        if (error)
                goto fail;
#endif

        if (((rgid != (gid_t)-1 && rgid != oldcred->cr_rgid &&
            rgid != oldcred->cr_svgid) ||
             (egid != (gid_t)-1 && egid != oldcred->cr_gid &&
             egid != oldcred->cr_rgid && egid != oldcred->cr_svgid)) &&
            (error = priv_check_cred(oldcred, PRIV_CRED_SETREGID)) != 0)
                goto fail;

        if (egid != (gid_t)-1 && oldcred->cr_gid != egid) {
                change_egid(newcred, egid);
                setsugid(p);
        }
        if (rgid != (gid_t)-1 && oldcred->cr_rgid != rgid) {
                change_rgid(newcred, rgid);
                setsugid(p);
        }
        if ((rgid != (gid_t)-1 || newcred->cr_gid != newcred->cr_rgid) &&
            newcred->cr_svgid != newcred->cr_gid) {
                change_svgid(newcred, newcred->cr_gid);
                setsugid(p);
        }
        proc_set_cred(p, newcred);
        PROC_UNLOCK(p);
        crfree(oldcred);
        return (0);

fail:
        PROC_UNLOCK(p);
        crfree(newcred);
        return (error);
}

/*
 * setresuid(ruid, euid, suid) is like setreuid except control over the saved
 * uid is explicit.
 */
#ifndef _SYS_SYSPROTO_H_
struct setresuid_args {
        uid_t   ruid;
        uid_t   euid;
        uid_t   suid;
};
#endif
/* ARGSUSED */
int
sys_setresuid(struct thread *td, struct setresuid_args *uap)
{
        struct proc *p = td->td_proc;
        struct ucred *newcred, *oldcred;
        uid_t euid, ruid, suid;
        struct uidinfo *euip, *ruip;
        int error;

        euid = uap->euid;
        ruid = uap->ruid;
        suid = uap->suid;
        AUDIT_ARG_EUID(euid);
        AUDIT_ARG_RUID(ruid);
        AUDIT_ARG_SUID(suid);
        newcred = crget();
        euip = uifind(euid);
        ruip = uifind(ruid);
        PROC_LOCK(p);
        oldcred = crcopysafe(p, newcred);

#ifdef MAC
        error = mac_cred_check_setresuid(oldcred, ruid, euid, suid);
        if (error)
                goto fail;
#endif

        if (((ruid != (uid_t)-1 && ruid != oldcred->cr_ruid &&
             ruid != oldcred->cr_svuid &&
              ruid != oldcred->cr_uid) ||
             (euid != (uid_t)-1 && euid != oldcred->cr_ruid &&
            euid != oldcred->cr_svuid &&
              euid != oldcred->cr_uid) ||
             (suid != (uid_t)-1 && suid != oldcred->cr_ruid &&
            suid != oldcred->cr_svuid &&
              suid != oldcred->cr_uid)) &&
            (error = priv_check_cred(oldcred, PRIV_CRED_SETRESUID)) != 0)
                goto fail;

        if (euid != (uid_t)-1 && oldcred->cr_uid != euid) {
                change_euid(newcred, euip);
                setsugid(p);
        }
        if (ruid != (uid_t)-1 && oldcred->cr_ruid != ruid) {
                change_ruid(newcred, ruip);
                setsugid(p);
        }
        if (suid != (uid_t)-1 && oldcred->cr_svuid != suid) {
                change_svuid(newcred, suid);
                setsugid(p);
        }
#ifdef RACCT
        racct_proc_ucred_changed(p, oldcred, newcred);
#endif
#ifdef RCTL
        crhold(newcred);
#endif
        /*
         * Takes over 'newcred''s reference, so 'newcred' must not be used
         * besides this point except on RCTL where we took an additional
         * reference above.
         */
        proc_set_cred(p, newcred);
        PROC_UNLOCK(p);
#ifdef RCTL
        rctl_proc_ucred_changed(p, newcred);
        crfree(newcred);
#endif
        uifree(ruip);
        uifree(euip);
        crfree(oldcred);
        return (0);

fail:
        PROC_UNLOCK(p);
        uifree(ruip);
        uifree(euip);
        crfree(newcred);
        return (error);

}

/*
 * setresgid(rgid, egid, sgid) is like setregid except control over the saved
 * gid is explicit.
 */
#ifndef _SYS_SYSPROTO_H_
struct setresgid_args {
        gid_t   rgid;
        gid_t   egid;
        gid_t   sgid;
};
#endif
/* ARGSUSED */
int
sys_setresgid(struct thread *td, struct setresgid_args *uap)
{
        struct proc *p = td->td_proc;
        struct ucred *newcred, *oldcred;
        gid_t egid, rgid, sgid;
        int error;

        egid = uap->egid;
        rgid = uap->rgid;
        sgid = uap->sgid;
        AUDIT_ARG_EGID(egid);
        AUDIT_ARG_RGID(rgid);
        AUDIT_ARG_SGID(sgid);
        newcred = crget();
        PROC_LOCK(p);
        oldcred = crcopysafe(p, newcred);

#ifdef MAC
        error = mac_cred_check_setresgid(oldcred, rgid, egid, sgid);
        if (error)
                goto fail;
#endif

        if (((rgid != (gid_t)-1 && rgid != oldcred->cr_rgid &&
              rgid != oldcred->cr_svgid &&
              rgid != oldcred->cr_gid) ||
             (egid != (gid_t)-1 && egid != oldcred->cr_rgid &&
              egid != oldcred->cr_svgid &&
              egid != oldcred->cr_gid) ||
             (sgid != (gid_t)-1 && sgid != oldcred->cr_rgid &&
              sgid != oldcred->cr_svgid &&
              sgid != oldcred->cr_gid)) &&
            (error = priv_check_cred(oldcred, PRIV_CRED_SETRESGID)) != 0)
                goto fail;

        if (egid != (gid_t)-1 && oldcred->cr_gid != egid) {
                change_egid(newcred, egid);
                setsugid(p);
        }
        if (rgid != (gid_t)-1 && oldcred->cr_rgid != rgid) {
                change_rgid(newcred, rgid);
                setsugid(p);
        }
        if (sgid != (gid_t)-1 && oldcred->cr_svgid != sgid) {
                change_svgid(newcred, sgid);
                setsugid(p);
        }
        proc_set_cred(p, newcred);
        PROC_UNLOCK(p);
        crfree(oldcred);
        return (0);

fail:
        PROC_UNLOCK(p);
        crfree(newcred);
        return (error);
}

#ifndef _SYS_SYSPROTO_H_
struct getresuid_args {
        uid_t   *ruid;
        uid_t   *euid;
        uid_t   *suid;
};
#endif
/* ARGSUSED */
int
sys_getresuid(struct thread *td, struct getresuid_args *uap)
{
        struct ucred *cred;
        int error1 = 0, error2 = 0, error3 = 0;

        cred = td->td_ucred;
        if (uap->ruid)
                error1 = copyout(&cred->cr_ruid,
                    uap->ruid, sizeof(cred->cr_ruid));
        if (uap->euid)
                error2 = copyout(&cred->cr_uid,
                    uap->euid, sizeof(cred->cr_uid));
        if (uap->suid)
                error3 = copyout(&cred->cr_svuid,
                    uap->suid, sizeof(cred->cr_svuid));
        return (error1 ? error1 : error2 ? error2 : error3);
}

#ifndef _SYS_SYSPROTO_H_
struct getresgid_args {
        gid_t   *rgid;
        gid_t   *egid;
        gid_t   *sgid;
};
#endif
/* ARGSUSED */
int
sys_getresgid(struct thread *td, struct getresgid_args *uap)
{
        struct ucred *cred;
        int error1 = 0, error2 = 0, error3 = 0;

        cred = td->td_ucred;
        if (uap->rgid)
                error1 = copyout(&cred->cr_rgid,
                    uap->rgid, sizeof(cred->cr_rgid));
        if (uap->egid)
                error2 = copyout(&cred->cr_gid,
                    uap->egid, sizeof(cred->cr_gid));
        if (uap->sgid)
                error3 = copyout(&cred->cr_svgid,
                    uap->sgid, sizeof(cred->cr_svgid));
        return (error1 ? error1 : error2 ? error2 : error3);
}

#ifndef _SYS_SYSPROTO_H_
struct issetugid_args {
        int dummy;
};
#endif
/* ARGSUSED */
int
sys_issetugid(struct thread *td, struct issetugid_args *uap)
{
        struct proc *p = td->td_proc;

        /*
         * Note: OpenBSD sets a P_SUGIDEXEC flag set at execve() time,
         * we use P_SUGID because we consider changing the owners as
         * "tainting" as well.
         * This is significant for procs that start as root and "become"
         * a user without an exec - programs cannot know *everything*
         * that libc *might* have put in their data segment.
         */
        td->td_retval[0] = (p->p_flag & P_SUGID) ? 1 : 0;
        return (0);
}

int
sys___setugid(struct thread *td, struct __setugid_args *uap)
{
#ifdef REGRESSION
        struct proc *p;

        p = td->td_proc;
        switch (uap->flag) {
        case 0:
                PROC_LOCK(p);
                p->p_flag &= ~P_SUGID;
                PROC_UNLOCK(p);
                return (0);
        case 1:
                PROC_LOCK(p);
                p->p_flag |= P_SUGID;
                PROC_UNLOCK(p);
                return (0);
        default:
                return (EINVAL);
        }
#else /* !REGRESSION */

        return (ENOSYS);
#endif /* REGRESSION */
}

#ifdef INVARIANTS
static void
groups_check_normalized(int ngrp, const gid_t *groups)
{
        gid_t prev_g;

        groups_check_positive_len(ngrp);
        groups_check_max_len(ngrp);

        if (ngrp <= 1)
                return;

        prev_g = groups[0];
        for (int i = 1; i < ngrp; ++i) {
                const gid_t g = groups[i];

                if (prev_g >= g)
                        panic("%s: groups[%d] (%u) >= groups[%d] (%u)",
                            __func__, i - 1, prev_g, i, g);
                prev_g = g;
        }
}
#else
#define groups_check_normalized(...)
#endif

/*
 * Returns whether gid designates a supplementary group in cred.
 */
bool
group_is_supplementary(const gid_t gid, const struct ucred *const cred)
{

        groups_check_normalized(cred->cr_ngroups, cred->cr_groups);

        /*
         * Perform a binary search of the supplementary groups.  This is
         * possible because we sort the groups in crsetgroups().
         */
        return (bsearch(&gid, cred->cr_groups, cred->cr_ngroups,
            sizeof(gid), gidp_cmp) != NULL);
}

/*
 * Check if gid is a member of the (effective) group set (i.e., effective and
 * supplementary groups).
 */
bool
groupmember(gid_t gid, const struct ucred *cred)
{

        groups_check_positive_len(cred->cr_ngroups);

        if (gid == cred->cr_gid)
                return (true);

        return (group_is_supplementary(gid, cred));
}

/*
 * Check if gid is a member of the real group set (i.e., real and supplementary
 * groups).
 */
bool
realgroupmember(gid_t gid, const struct ucred *cred)
{
        groups_check_positive_len(cred->cr_ngroups);

        if (gid == cred->cr_rgid)
                return (true);

        return (group_is_supplementary(gid, cred));
}

/*
 * Test the active securelevel against a given level.  securelevel_gt()
 * implements (securelevel > level).  securelevel_ge() implements
 * (securelevel >= level).  Note that the logic is inverted -- these
 * functions return EPERM on "success" and 0 on "failure".
 *
 * Due to care taken when setting the securelevel, we know that no jail will
 * be less secure that its parent (or the physical system), so it is sufficient
 * to test the current jail only.
 *
 * XXXRW: Possibly since this has to do with privilege, it should move to
 * kern_priv.c.
 */
int
securelevel_gt(struct ucred *cr, int level)
{

        return (cr->cr_prison->pr_securelevel > level ? EPERM : 0);
}

int
securelevel_ge(struct ucred *cr, int level)
{

        return (cr->cr_prison->pr_securelevel >= level ? EPERM : 0);
}

/*
 * 'see_other_uids' determines whether or not visibility of processes
 * and sockets with credentials holding different real uids is possible
 * using a variety of system MIBs.
 * XXX: data declarations should be together near the beginning of the file.
 */
static int      see_other_uids = 1;
SYSCTL_INT(_security_bsd, OID_AUTO, see_other_uids, CTLFLAG_RW,
    &see_other_uids, 0,
    "Unprivileged processes may see subjects/objects with different real uid");

/*-
 * Determine if u1 "can see" the subject specified by u2, according to the
 * 'see_other_uids' policy.
 * Returns: 0 for permitted, ESRCH otherwise
 * Locks: none
 * References: *u1 and *u2 must not change during the call
 *             u1 may equal u2, in which case only one reference is required
 */
static int
cr_canseeotheruids(struct ucred *u1, struct ucred *u2)
{

        if (!see_other_uids && u1->cr_ruid != u2->cr_ruid) {
                if (priv_check_cred(u1, PRIV_SEEOTHERUIDS) != 0)
                        return (ESRCH);
        }
        return (0);
}

/*
 * 'see_other_gids' determines whether or not visibility of processes
 * and sockets with credentials holding different real gids is possible
 * using a variety of system MIBs.
 * XXX: data declarations should be together near the beginning of the file.
 */
static int      see_other_gids = 1;
SYSCTL_INT(_security_bsd, OID_AUTO, see_other_gids, CTLFLAG_RW,
    &see_other_gids, 0,
    "Unprivileged processes may see subjects/objects with different real gid");

/*
 * Determine if u1 can "see" the subject specified by u2, according to the
 * 'see_other_gids' policy.
 * Returns: 0 for permitted, ESRCH otherwise
 * Locks: none
 * References: *u1 and *u2 must not change during the call
 *             u1 may equal u2, in which case only one reference is required
 */
static int
cr_canseeothergids(struct ucred *u1, struct ucred *u2)
{
        if (see_other_gids)
                return (0);

        /* Restriction in force. */

        if (realgroupmember(u1->cr_rgid, u2))
                return (0);

        for (int i = 0; i < u1->cr_ngroups; i++)
                if (realgroupmember(u1->cr_groups[i], u2))
                        return (0);

        if (priv_check_cred(u1, PRIV_SEEOTHERGIDS) == 0)
                return (0);

        return (ESRCH);
}

/*
 * 'see_jail_proc' determines whether or not visibility of processes and
 * sockets with credentials holding different jail ids is possible using a
 * variety of system MIBs.
 *
 * XXX: data declarations should be together near the beginning of the file.
 */

static int      see_jail_proc = 1;
SYSCTL_INT(_security_bsd, OID_AUTO, see_jail_proc, CTLFLAG_RW,
    &see_jail_proc, 0,
    "Unprivileged processes may see subjects/objects with different jail ids");

/*-
 * Determine if u1 "can see" the subject specified by u2, according to the
 * 'see_jail_proc' policy.
 * Returns: 0 for permitted, ESRCH otherwise
 * Locks: none
 * References: *u1 and *u2 must not change during the call
 *             u1 may equal u2, in which case only one reference is required
 */
static int
cr_canseejailproc(struct ucred *u1, struct ucred *u2)
{
        if (see_jail_proc || /* Policy deactivated. */
            u1->cr_prison == u2->cr_prison || /* Same jail. */
            priv_check_cred(u1, PRIV_SEEJAILPROC) == 0) /* Privileged. */
                return (0);

        return (ESRCH);
}

/*
 * Determine if u1 can tamper with the subject specified by u2, if they are in
 * different jails and 'unprivileged_parent_tampering' jail policy allows it.
 *
 * May be called if u1 and u2 are in the same jail, but it is expected that the
 * caller has already done a prison_check() prior to calling it.
 *
 * Returns: 0 for permitted, EPERM otherwise
 */
static int
cr_can_tamper_with_subjail(struct ucred *u1, struct ucred *u2, int priv)
{

        MPASS(prison_check(u1, u2) == 0);
        if (u1->cr_prison == u2->cr_prison)
                return (0);

        if (priv_check_cred(u1, priv) == 0)
                return (0);

        /*
         * Jails do not maintain a distinct UID space, so process visibility is
         * all that would control an unprivileged process' ability to tamper
         * with a process in a subjail by default if we did not have the
         * allow.unprivileged_parent_tampering knob to restrict it by default.
         */
        if (prison_allow(u2, PR_ALLOW_UNPRIV_PARENT_TAMPER))
                return (0);

        return (EPERM);
}

/*
 * Helper for cr_cansee*() functions to abide by system-wide security.bsd.see_*
 * policies.  Determines if u1 "can see" u2 according to these policies.
 * Returns: 0 for permitted, ESRCH otherwise
 */
int
cr_bsd_visible(struct ucred *u1, struct ucred *u2)
{
        int error;

        error = cr_canseeotheruids(u1, u2);
        if (error != 0)
                return (error);
        error = cr_canseeothergids(u1, u2);
        if (error != 0)
                return (error);
        error = cr_canseejailproc(u1, u2);
        if (error != 0)
                return (error);
        return (0);
}

/*-
 * Determine if u1 "can see" the subject specified by u2.
 * Returns: 0 for permitted, an errno value otherwise
 * Locks: none
 * References: *u1 and *u2 must not change during the call
 *             u1 may equal u2, in which case only one reference is required
 */
int
cr_cansee(struct ucred *u1, struct ucred *u2)
{
        int error;

        if ((error = prison_check(u1, u2)))
                return (error);
#ifdef MAC
        if ((error = mac_cred_check_visible(u1, u2)))
                return (error);
#endif
        if ((error = cr_bsd_visible(u1, u2)))
                return (error);
        return (0);
}

/*-
 * Determine if td "can see" the subject specified by p.
 * Returns: 0 for permitted, an errno value otherwise
 * Locks: Sufficient locks to protect p->p_ucred must be held.  td really
 *        should be curthread.
 * References: td and p must be valid for the lifetime of the call
 */
int
p_cansee(struct thread *td, struct proc *p)
{
        /* Wrap cr_cansee() for all functionality. */
        KASSERT(td == curthread, ("%s: td not curthread", __func__));
        PROC_LOCK_ASSERT(p, MA_OWNED);

        if (td->td_proc == p)
                return (0);
        return (cr_cansee(td->td_ucred, p->p_ucred));
}

/*
 * 'conservative_signals' prevents the delivery of a broad class of
 * signals by unprivileged processes to processes that have changed their
 * credentials since the last invocation of execve().  This can prevent
 * the leakage of cached information or retained privileges as a result
 * of a common class of signal-related vulnerabilities.  However, this
 * may interfere with some applications that expect to be able to
 * deliver these signals to peer processes after having given up
 * privilege.
 */
static int      conservative_signals = 1;
SYSCTL_INT(_security_bsd, OID_AUTO, conservative_signals, CTLFLAG_RW,
    &conservative_signals, 0, "Unprivileged processes prevented from "
    "sending certain signals to processes whose credentials have changed");
/*-
 * Determine whether cred may deliver the specified signal to proc.
 * Returns: 0 for permitted, an errno value otherwise.
 * Locks: A lock must be held for proc.
 * References: cred and proc must be valid for the lifetime of the call.
 */
int
cr_cansignal(struct ucred *cred, struct proc *proc, int signum)
{
        int error;

        PROC_LOCK_ASSERT(proc, MA_OWNED);
        /*
         * Jail semantics limit the scope of signalling to proc in the
         * same jail as cred, if cred is in jail.
         */
        error = prison_check(cred, proc->p_ucred);
        if (error)
                return (error);
#ifdef MAC
        if ((error = mac_proc_check_signal(cred, proc, signum)))
                return (error);
#endif
        if ((error = cr_bsd_visible(cred, proc->p_ucred)))
                return (error);

        /*
         * UNIX signal semantics depend on the status of the P_SUGID
         * bit on the target process.  If the bit is set, then additional
         * restrictions are placed on the set of available signals.
         */
        if (conservative_signals && (proc->p_flag & P_SUGID)) {
                switch (signum) {
                case 0:
                case SIGKILL:
                case SIGINT:
                case SIGTERM:
                case SIGALRM:
                case SIGSTOP:
                case SIGTTIN:
                case SIGTTOU:
                case SIGTSTP:
                case SIGHUP:
                case SIGUSR1:
                case SIGUSR2:
                        /*
                         * Generally, permit job and terminal control
                         * signals.
                         */
                        break;
                default:
                        /* Not permitted without privilege. */
                        error = priv_check_cred(cred, PRIV_SIGNAL_SUGID);
                        if (error)
                                return (error);
                }
        }

        /*
         * Generally, the target credential's ruid or svuid must match the
         * subject credential's ruid or euid.
         */
        if (cred->cr_ruid != proc->p_ucred->cr_ruid &&
            cred->cr_ruid != proc->p_ucred->cr_svuid &&
            cred->cr_uid != proc->p_ucred->cr_ruid &&
            cred->cr_uid != proc->p_ucred->cr_svuid) {
                error = priv_check_cred(cred, PRIV_SIGNAL_DIFFCRED);
                if (error)
                        return (error);
        }

        /*
         * At this point, the target may be in a different jail than the
         * subject -- the subject must be in a parent jail to the target,
         * whether it is prison0 or a subordinate of prison0 that has
         * children.  Additional privileges are required to allow this, as
         * whether the creds are truly equivalent or not must be determined on
         * a case-by-case basis.
         */
        error = cr_can_tamper_with_subjail(cred, proc->p_ucred,
            PRIV_SIGNAL_DIFFJAIL);
        if (error)
                return (error);

        return (0);
}

/*-
 * Determine whether td may deliver the specified signal to p.
 * Returns: 0 for permitted, an errno value otherwise
 * Locks: Sufficient locks to protect various components of td and p
 *        must be held.  td must be curthread, and a lock must be
 *        held for p.
 * References: td and p must be valid for the lifetime of the call
 */
int
p_cansignal(struct thread *td, struct proc *p, int signum)
{

        KASSERT(td == curthread, ("%s: td not curthread", __func__));
        PROC_LOCK_ASSERT(p, MA_OWNED);
        if (td->td_proc == p)
                return (0);

        /*
         * UNIX signalling semantics require that processes in the same
         * session always be able to deliver SIGCONT to one another,
         * overriding the remaining protections.
         */
        /* XXX: This will require an additional lock of some sort. */
        if (signum == SIGCONT && td->td_proc->p_session == p->p_session)
                return (0);
        /*
         * Some compat layers use SIGTHR and higher signals for
         * communication between different kernel threads of the same
         * process, so that they expect that it's always possible to
         * deliver them, even for suid applications where cr_cansignal() can
         * deny such ability for security consideration.  It should be
         * pretty safe to do since the only way to create two processes
         * with the same p_leader is via rfork(2).
         */
        if (td->td_proc->p_leader != NULL && signum >= SIGTHR &&
            signum < SIGTHR + 4 && td->td_proc->p_leader == p->p_leader)
                return (0);

        return (cr_cansignal(td->td_ucred, p, signum));
}

/*-
 * Determine whether td may reschedule p.
 * Returns: 0 for permitted, an errno value otherwise
 * Locks: Sufficient locks to protect various components of td and p
 *        must be held.  td must be curthread, and a lock must
 *        be held for p.
 * References: td and p must be valid for the lifetime of the call
 */
int
p_cansched(struct thread *td, struct proc *p)
{
        int error;

        KASSERT(td == curthread, ("%s: td not curthread", __func__));
        PROC_LOCK_ASSERT(p, MA_OWNED);
        if (td->td_proc == p)
                return (0);
        if ((error = prison_check(td->td_ucred, p->p_ucred)))
                return (error);
#ifdef MAC
        if ((error = mac_proc_check_sched(td->td_ucred, p)))
                return (error);
#endif
        if ((error = cr_bsd_visible(td->td_ucred, p->p_ucred)))
                return (error);

        if (td->td_ucred->cr_ruid != p->p_ucred->cr_ruid &&
            td->td_ucred->cr_uid != p->p_ucred->cr_ruid) {
                error = priv_check(td, PRIV_SCHED_DIFFCRED);
                if (error)
                        return (error);
        }

        error = cr_can_tamper_with_subjail(td->td_ucred, p->p_ucred,
            PRIV_SCHED_DIFFJAIL);
        if (error)
                return (error);

        return (0);
}

/*
 * Handle getting or setting the prison's unprivileged_proc_debug
 * value.
 */
static int
sysctl_unprivileged_proc_debug(SYSCTL_HANDLER_ARGS)
{
        int error, val;

        val = prison_allow(req->td->td_ucred, PR_ALLOW_UNPRIV_DEBUG);
        error = sysctl_handle_int(oidp, &val, 0, req);
        if (error != 0 || req->newptr == NULL)
                return (error);
        if (val != 0 && val != 1)
                return (EINVAL);
        prison_set_allow(req->td->td_ucred, PR_ALLOW_UNPRIV_DEBUG, val);
        return (0);
}

/*
 * The 'unprivileged_proc_debug' flag may be used to disable a variety of
 * unprivileged inter-process debugging services, including some procfs
 * functionality, ptrace(), and ktrace().  In the past, inter-process
 * debugging has been involved in a variety of security problems, and sites
 * not requiring the service might choose to disable it when hardening
 * systems.
 */
SYSCTL_PROC(_security_bsd, OID_AUTO, unprivileged_proc_debug,
    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_SECURE |
    CTLFLAG_MPSAFE, 0, 0, sysctl_unprivileged_proc_debug, "I",
    "Unprivileged processes may use process debugging facilities");

/*
 * Return true if the object owner/group ids are subset of the active
 * credentials.
 */
bool
cr_xids_subset(struct ucred *active_cred, struct ucred *obj_cred)
{
        int i;
        bool grpsubset, uidsubset;

        /*
         * Is p's group set a subset of td's effective group set?  This
         * includes p's egid, group access list, rgid, and svgid.
         */
        grpsubset = true;
        for (i = 0; i < obj_cred->cr_ngroups; i++) {
                if (!groupmember(obj_cred->cr_groups[i], active_cred)) {
                        grpsubset = false;
                        break;
                }
        }
        grpsubset = grpsubset &&
            groupmember(obj_cred->cr_gid, active_cred) &&
            groupmember(obj_cred->cr_rgid, active_cred) &&
            groupmember(obj_cred->cr_svgid, active_cred);

        /*
         * Are the uids present in obj_cred's credential equal to
         * active_cred's effective uid?  This includes obj_cred's
         * euid, svuid, and ruid.
         */
        uidsubset = (active_cred->cr_uid == obj_cred->cr_uid &&
            active_cred->cr_uid == obj_cred->cr_svuid &&
            active_cred->cr_uid == obj_cred->cr_ruid);

        return (uidsubset && grpsubset);
}

/*-
 * Determine whether td may debug p.
 * Returns: 0 for permitted, an errno value otherwise
 * Locks: Sufficient locks to protect various components of td and p
 *        must be held.  td must be curthread, and a lock must
 *        be held for p.
 * References: td and p must be valid for the lifetime of the call
 */
int
p_candebug(struct thread *td, struct proc *p)
{
        int error;

        KASSERT(td == curthread, ("%s: td not curthread", __func__));
        PROC_LOCK_ASSERT(p, MA_OWNED);
        if (td->td_proc == p)
                return (0);
        if ((error = priv_check(td, PRIV_DEBUG_UNPRIV)))
                return (error);
        if ((error = prison_check(td->td_ucred, p->p_ucred)))
                return (error);
#ifdef MAC
        if ((error = mac_proc_check_debug(td->td_ucred, p)))
                return (error);
#endif
        if ((error = cr_bsd_visible(td->td_ucred, p->p_ucred)))
                return (error);

        /*
         * If p's gids aren't a subset, or the uids aren't a subset,
         * or the credential has changed, require appropriate privilege
         * for td to debug p.
         */
        if (!cr_xids_subset(td->td_ucred, p->p_ucred)) {
                error = priv_check(td, PRIV_DEBUG_DIFFCRED);
                if (error)
                        return (error);
        }

        /*
         * Has the credential of the process changed since the last exec()?
         */
        if ((p->p_flag & P_SUGID) != 0) {
                error = priv_check(td, PRIV_DEBUG_SUGID);
                if (error)
                        return (error);
        }

        error = cr_can_tamper_with_subjail(td->td_ucred, p->p_ucred,
            PRIV_DEBUG_DIFFJAIL);
        if (error)
                return (error);

        /* Can't trace init when securelevel > 0. */
        if (p == initproc) {
                error = securelevel_gt(td->td_ucred, 0);
                if (error)
                        return (error);
        }

        /*
         * Can't trace a process that's currently exec'ing.
         *
         * XXX: Note, this is not a security policy decision, it's a
         * basic correctness/functionality decision.  Therefore, this check
         * should be moved to the caller's of p_candebug().
         */
        if ((p->p_flag & P_INEXEC) != 0)
                return (EBUSY);

        /* Denied explicitly */
        if ((p->p_flag2 & P2_NOTRACE) != 0) {
                error = priv_check(td, PRIV_DEBUG_DENIED);
                if (error != 0)
                        return (error);
        }

        return (0);
}

/*-
 * Determine whether the subject represented by cred can "see" a socket.
 * Returns: 0 for permitted, ENOENT otherwise.
 */
int
cr_canseesocket(struct ucred *cred, struct socket *so)
{
        int error;

        error = prison_check(cred, so->so_cred);
        if (error)
                return (ENOENT);
#ifdef MAC
        error = mac_socket_check_visible(cred, so);
        if (error)
                return (error);
#endif
        if (cr_bsd_visible(cred, so->so_cred))
                return (ENOENT);

        return (0);
}

/*-
 * Determine whether td can wait for the exit of p.
 * Returns: 0 for permitted, an errno value otherwise
 * Locks: Sufficient locks to protect various components of td and p
 *        must be held.  td must be curthread, and a lock must
 *        be held for p.
 * References: td and p must be valid for the lifetime of the call

 */
int
p_canwait(struct thread *td, struct proc *p)
{
        int error;

        KASSERT(td == curthread, ("%s: td not curthread", __func__));
        PROC_LOCK_ASSERT(p, MA_OWNED);
        if ((error = prison_check(td->td_ucred, p->p_ucred)))
                return (error);
#ifdef MAC
        if ((error = mac_proc_check_wait(td->td_ucred, p)))
                return (error);
#endif
#if 0
        /* XXXMAC: This could have odd effects on some shells. */
        if ((error = cr_bsd_visible(td->td_ucred, p->p_ucred)))
                return (error);
#endif

        return (0);
}

/*
 * Credential management.
 *
 * struct ucred objects are rarely allocated but gain and lose references all
 * the time (e.g., on struct file alloc/dealloc) turning refcount updates into
 * a significant source of cache-line ping ponging. Common cases are worked
 * around by modifying thread-local counter instead if the cred to operate on
 * matches td_realucred.
 *
 * The counter is split into 2 parts:
 * - cr_users -- total count of all struct proc and struct thread objects
 *   which have given cred in p_ucred and td_ucred respectively
 * - cr_ref -- the actual ref count, only valid if cr_users == 0
 *
 * If users == 0 then cr_ref behaves similarly to refcount(9), in particular if
 * the count reaches 0 the object is freeable.
 * If users > 0 and curthread->td_realucred == cred, then updates are performed
 * against td_ucredref.
 * In other cases updates are performed against cr_ref.
 *
 * Changing td_realucred into something else decrements cr_users and transfers
 * accumulated updates.
 */
struct ucred *
crcowget(struct ucred *cr)
{

        mtx_lock(&cr->cr_mtx);
        KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p",
            __func__, cr->cr_users, cr));
        cr->cr_users++;
        cr->cr_ref++;
        mtx_unlock(&cr->cr_mtx);
        return (cr);
}

static struct ucred *
crunuse(struct thread *td)
{
        struct ucred *cr, *crold;

        MPASS(td->td_realucred == td->td_ucred);
        cr = td->td_realucred;
        mtx_lock(&cr->cr_mtx);
        cr->cr_ref += td->td_ucredref;
        td->td_ucredref = 0;
        KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p",
            __func__, cr->cr_users, cr));
        cr->cr_users--;
        if (cr->cr_users == 0) {
                KASSERT(cr->cr_ref > 0, ("%s: ref %ld not > 0 on cred %p",
                    __func__, cr->cr_ref, cr));
                crold = cr;
        } else {
                cr->cr_ref--;
                crold = NULL;
        }
        mtx_unlock(&cr->cr_mtx);
        td->td_realucred = NULL;
        return (crold);
}

static void
crunusebatch(struct ucred *cr, u_int users, long ref)
{

        KASSERT(users > 0, ("%s: passed users %d not > 0 ; cred %p",
            __func__, users, cr));
        mtx_lock(&cr->cr_mtx);
        KASSERT(cr->cr_users >= users, ("%s: users %d not > %d on cred %p",
            __func__, cr->cr_users, users, cr));
        cr->cr_users -= users;
        cr->cr_ref += ref;
        cr->cr_ref -= users;
        if (cr->cr_users > 0) {
                mtx_unlock(&cr->cr_mtx);
                return;
        }
        KASSERT(cr->cr_ref >= 0, ("%s: ref %ld not >= 0 on cred %p",
            __func__, cr->cr_ref, cr));
        if (cr->cr_ref > 0) {
                mtx_unlock(&cr->cr_mtx);
                return;
        }
        crfree_final(cr);
}

void
crcowfree(struct thread *td)
{
        struct ucred *cr;

        cr = crunuse(td);
        if (cr != NULL)
                crfree(cr);
}

struct ucred *
crcowsync(void)
{
        struct thread *td;
        struct proc *p;
        struct ucred *crnew, *crold;

        td = curthread;
        p = td->td_proc;
        PROC_LOCK_ASSERT(p, MA_OWNED);

        MPASS(td->td_realucred == td->td_ucred);
        if (td->td_realucred == p->p_ucred)
                return (NULL);

        crnew = crcowget(p->p_ucred);
        crold = crunuse(td);
        td->td_realucred = crnew;
        td->td_ucred = td->td_realucred;
        return (crold);
}

/*
 * Batching.
 */
void
credbatch_add(struct credbatch *crb, struct thread *td)
{
        struct ucred *cr;

        MPASS(td->td_realucred != NULL);
        MPASS(td->td_realucred == td->td_ucred);
        MPASS(TD_GET_STATE(td) == TDS_INACTIVE);
        cr = td->td_realucred;
        KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p",
            __func__, cr->cr_users, cr));
        if (crb->cred != cr) {
                if (crb->users > 0) {
                        MPASS(crb->cred != NULL);
                        crunusebatch(crb->cred, crb->users, crb->ref);
                        crb->users = 0;
                        crb->ref = 0;
                }
        }
        crb->cred = cr;
        crb->users++;
        crb->ref += td->td_ucredref;
        td->td_ucredref = 0;
        td->td_realucred = NULL;
}

void
credbatch_final(struct credbatch *crb)
{

        MPASS(crb->cred != NULL);
        MPASS(crb->users > 0);
        crunusebatch(crb->cred, crb->users, crb->ref);
}

/*
 * Allocate a zeroed cred structure.
 */
struct ucred *
crget(void)
{
        struct ucred *cr;

        cr = malloc(sizeof(*cr), M_CRED, M_WAITOK | M_ZERO);
        mtx_init(&cr->cr_mtx, "cred", NULL, MTX_DEF);
        cr->cr_ref = 1;
#ifdef AUDIT
        audit_cred_init(cr);
#endif
#ifdef MAC
        mac_cred_init(cr);
#endif
        cr->cr_groups = cr->cr_smallgroups;
        cr->cr_agroups = nitems(cr->cr_smallgroups);
        return (cr);
}

/*
 * Claim another reference to a ucred structure.
 */
struct ucred *
crhold(struct ucred *cr)
{
        struct thread *td;

        td = curthread;
        if (__predict_true(td->td_realucred == cr)) {
                KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p",
                    __func__, cr->cr_users, cr));
                td->td_ucredref++;
                return (cr);
        }
        mtx_lock(&cr->cr_mtx);
        cr->cr_ref++;
        mtx_unlock(&cr->cr_mtx);
        return (cr);
}

/*
 * Free a cred structure.  Throws away space when ref count gets to 0.
 */
void
crfree(struct ucred *cr)
{
        struct thread *td;

        td = curthread;
        if (__predict_true(td->td_realucred == cr)) {
                KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p",
                    __func__, cr->cr_users, cr));
                td->td_ucredref--;
                return;
        }
        mtx_lock(&cr->cr_mtx);
        KASSERT(cr->cr_users >= 0, ("%s: users %d not >= 0 on cred %p",
            __func__, cr->cr_users, cr));
        cr->cr_ref--;
        if (cr->cr_users > 0) {
                mtx_unlock(&cr->cr_mtx);
                return;
        }
        KASSERT(cr->cr_ref >= 0, ("%s: ref %ld not >= 0 on cred %p",
            __func__, cr->cr_ref, cr));
        if (cr->cr_ref > 0) {
                mtx_unlock(&cr->cr_mtx);
                return;
        }
        crfree_final(cr);
}

static void
crfree_final(struct ucred *cr)
{

        KASSERT(cr->cr_users == 0, ("%s: users %d not == 0 on cred %p",
            __func__, cr->cr_users, cr));
        KASSERT(cr->cr_ref == 0, ("%s: ref %ld not == 0 on cred %p",
            __func__, cr->cr_ref, cr));

        /*
         * Some callers of crget(), such as nfs_statfs(), allocate a temporary
         * credential, but don't allocate a uidinfo structure.
         */
        if (cr->cr_uidinfo != NULL)
                uifree(cr->cr_uidinfo);
        if (cr->cr_ruidinfo != NULL)
                uifree(cr->cr_ruidinfo);
        if (cr->cr_prison != NULL)
                prison_free(cr->cr_prison);
        if (cr->cr_loginclass != NULL)
                loginclass_free(cr->cr_loginclass);
#ifdef AUDIT
        audit_cred_destroy(cr);
#endif
#ifdef MAC
        mac_cred_destroy(cr);
#endif
        mtx_destroy(&cr->cr_mtx);
        if (cr->cr_groups != cr->cr_smallgroups)
                free(cr->cr_groups, M_CRED);
        free(cr, M_CRED);
}

/*
 * Copy a ucred's contents from a template.  Does not block.
 */
void
crcopy(struct ucred *dest, struct ucred *src)
{

        bcopy(&src->cr_startcopy, &dest->cr_startcopy,
            (unsigned)((caddr_t)&src->cr_endcopy -
                (caddr_t)&src->cr_startcopy));
        dest->cr_flags = src->cr_flags;
        crsetgroups(dest, src->cr_ngroups, src->cr_groups);
        uihold(dest->cr_uidinfo);
        uihold(dest->cr_ruidinfo);
        prison_hold(dest->cr_prison);
        loginclass_hold(dest->cr_loginclass);
#ifdef AUDIT
        audit_cred_copy(src, dest);
#endif
#ifdef MAC
        mac_cred_copy(src, dest);
#endif
}

/*
 * Dup cred struct to a new held one.
 */
struct ucred *
crdup(struct ucred *cr)
{
        struct ucred *newcr;

        newcr = crget();
        crcopy(newcr, cr);
        return (newcr);
}

/*
 * Fill in a struct xucred based on a struct ucred.
 */
void
cru2x(struct ucred *cr, struct xucred *xcr)
{
        int ngroups;

        bzero(xcr, sizeof(*xcr));
        xcr->cr_version = XUCRED_VERSION;
        xcr->cr_uid = cr->cr_uid;
        xcr->cr_gid = cr->cr_gid;

        /*
         * We use a union to alias cr_gid to cr_groups[0] in the xucred, so
         * this is kind of ugly; cr_ngroups still includes the egid for our
         * purposes to avoid bumping the xucred version.
         */
        ngroups = MIN(cr->cr_ngroups + 1, nitems(xcr->cr_groups));
        xcr->cr_ngroups = ngroups;
        bcopy(cr->cr_groups, xcr->cr_sgroups,
            (ngroups - 1) * sizeof(*cr->cr_groups));
}

void
cru2xt(struct thread *td, struct xucred *xcr)
{

        cru2x(td->td_ucred, xcr);
        xcr->cr_pid = td->td_proc->p_pid;
}

/*
 * Change process credentials.
 *
 * Callers are responsible for providing the reference for passed credentials
 * and for freeing old ones.  Calls chgproccnt() to correctly account the
 * current process to the proper real UID, if the latter has changed.  Returns
 * whether the operation was successful.  Failure can happen only on
 * 'enforce_proc_lim' being true and if no new process can be accounted to the
 * new real UID because of the current limit (see the inner comment for more
 * details) and the caller does not have privilege (PRIV_PROC_LIMIT) to override
 * that.  In this case, the reference to 'newcred' is not taken over.
 */
static bool
_proc_set_cred(struct proc *p, struct ucred *newcred, bool enforce_proc_lim)
{
        struct ucred *const oldcred = p->p_ucred;

        MPASS(oldcred != NULL);
        PROC_LOCK_ASSERT(p, MA_OWNED);

        if (newcred->cr_ruidinfo != oldcred->cr_ruidinfo) {
                /*
                 * XXXOC: This check is flawed but nonetheless the best we can
                 * currently do as we don't really track limits per UID contrary
                 * to what we pretend in setrlimit(2).  Until this is reworked,
                 * we just check here that the number of processes for our new
                 * real UID doesn't exceed this process' process number limit
                 * (which is meant to be associated with the current real UID).
                 */
                const int proccnt_changed = chgproccnt(newcred->cr_ruidinfo, 1,
                    enforce_proc_lim ? lim_cur_proc(p, RLIMIT_NPROC) : 0);

                if (!proccnt_changed) {
                        if (priv_check_cred(oldcred, PRIV_PROC_LIMIT) != 0)
                                return (false);
                        (void)chgproccnt(newcred->cr_ruidinfo, 1, 0);
                }
        }

        mtx_lock(&oldcred->cr_mtx);
        KASSERT(oldcred->cr_users > 0, ("%s: users %d not > 0 on cred %p",
            __func__, oldcred->cr_users, oldcred));
        oldcred->cr_users--;
        mtx_unlock(&oldcred->cr_mtx);
        mtx_lock(&newcred->cr_mtx);
        newcred->cr_users++;
        mtx_unlock(&newcred->cr_mtx);
        p->p_ucred = newcred;
        PROC_UPDATE_COW(p);
        if (newcred->cr_ruidinfo != oldcred->cr_ruidinfo)
                (void)chgproccnt(oldcred->cr_ruidinfo, -1, 0);
        return (true);
}

void
proc_set_cred(struct proc *p, struct ucred *newcred)
{
        bool success __diagused = _proc_set_cred(p, newcred, false);

        MPASS(success);
}

bool
proc_set_cred_enforce_proc_lim(struct proc *p, struct ucred *newcred)
{
        return (_proc_set_cred(p, newcred, true));
}

void
proc_unset_cred(struct proc *p, bool decrement_proc_count)
{
        struct ucred *cr;

        MPASS(p->p_state == PRS_ZOMBIE || p->p_state == PRS_NEW);
        cr = p->p_ucred;
        p->p_ucred = NULL;
        KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p",
            __func__, cr->cr_users, cr));
        mtx_lock(&cr->cr_mtx);
        cr->cr_users--;
        if (cr->cr_users == 0)
                KASSERT(cr->cr_ref > 0, ("%s: ref %ld not > 0 on cred %p",
                    __func__, cr->cr_ref, cr));
        mtx_unlock(&cr->cr_mtx);
        if (decrement_proc_count)
                (void)chgproccnt(cr->cr_ruidinfo, -1, 0);
        crfree(cr);
}

struct ucred *
crcopysafe(struct proc *p, struct ucred *cr)
{
        struct ucred *oldcred;
        int groups;

        PROC_LOCK_ASSERT(p, MA_OWNED);

        oldcred = p->p_ucred;
        while (cr->cr_agroups < oldcred->cr_ngroups) {
                groups = oldcred->cr_ngroups;
                PROC_UNLOCK(p);
                crextend(cr, groups);
                PROC_LOCK(p);
                oldcred = p->p_ucred;
        }
        crcopy(cr, oldcred);

        return (oldcred);
}

/*
 * Extend the passed-in credentials to hold n groups.
 *
 * Must not be called after groups have been set.
 */
void
crextend(struct ucred *cr, int n)
{
        size_t nbytes;

        MPASS2(cr->cr_ref == 1, "'cr_ref' must be 1 (referenced, unshared)");
        MPASS2((cr->cr_flags & CRED_FLAG_GROUPSET) == 0,
            "groups on 'cr' already set!");
        groups_check_positive_len(n);
        groups_check_max_len(n);

        if (n <= cr->cr_agroups)
                return;

        nbytes = n * sizeof(gid_t);
        if (nbytes < n)
                panic("Too many groups (memory size overflow)! "
                    "Computation of 'kern.ngroups' should have prevented this, "
                    "please fix it. In the meantime, reduce 'kern.ngroups'.");

        /*
         * We allocate a power of 2 larger than 'nbytes', except when that
         * exceeds PAGE_SIZE, in which case we allocate the right multiple of
         * pages.  We assume PAGE_SIZE is a power of 2 (the call to roundup2()
         * below) but do not need to for sizeof(gid_t).
         */
        if (nbytes < PAGE_SIZE) {
                if (!powerof2(nbytes))
                        /* fls*() return a bit index starting at 1. */
                        nbytes = 1 << flsl(nbytes);
        } else
                nbytes = roundup2(nbytes, PAGE_SIZE);

        /* Free the old array. */
        if (cr->cr_groups != cr->cr_smallgroups)
                free(cr->cr_groups, M_CRED);

        cr->cr_groups = malloc(nbytes, M_CRED, M_WAITOK | M_ZERO);
        cr->cr_agroups = nbytes / sizeof(gid_t);
}

/*
 * Normalizes a set of groups to be applied to a 'struct ucred'.
 *
 * Normalization ensures that the supplementary groups are sorted in ascending
 * order and do not contain duplicates.  This allows group_is_supplementary() to
 * do a binary search.
 */
static void
groups_normalize(int *ngrp, gid_t *groups)
{
        gid_t prev_g;
        int ins_idx;

        groups_check_positive_len(*ngrp);
        groups_check_max_len(*ngrp);

        if (*ngrp <= 1)
                return;

        qsort(groups, *ngrp, sizeof(*groups), gidp_cmp);

        /* Remove duplicates. */
        prev_g = groups[0];
        ins_idx = 1;
        for (int i = ins_idx; i < *ngrp; ++i) {
                const gid_t g = groups[i];

                if (g != prev_g) {
                        if (i != ins_idx)
                                groups[ins_idx] = g;
                        ++ins_idx;
                        prev_g = g;
                }
        }
        *ngrp = ins_idx;

        groups_check_normalized(*ngrp, groups);
}

/*
 * Internal function copying groups into a credential.
 *
 * 'ngrp' must be strictly positive.  Either the passed 'groups' array must have
 * been normalized in advance (see groups_normalize()), else it must be so
 * before the structure is to be used again.
 *
 * This function is suitable to be used under any lock (it doesn't take any lock
 * itself nor sleep, and in particular doesn't allocate memory).  crextend()
 * must have been called beforehand to ensure sufficient space is available.
 * See also crsetgroups(), which handles that.
 */
static void
crsetgroups_internal(struct ucred *cr, int ngrp, const gid_t *groups)
{

        MPASS2(cr->cr_ref == 1, "'cr_ref' must be 1 (referenced, unshared)");
        MPASS2(cr->cr_agroups >= ngrp, "'cr_agroups' too small");
        groups_check_positive_len(ngrp);

        bcopy(groups, cr->cr_groups, ngrp * sizeof(gid_t));
        cr->cr_ngroups = ngrp;
        cr->cr_flags |= CRED_FLAG_GROUPSET;
}

/*
 * Copy groups in to a credential after expanding it if required.
 *
 * May sleep in order to allocate memory (except if, e.g., crextend() was called
 * before with 'ngrp' or greater).  Truncates the list to 'ngroups_max' if
 * it is too large.  Array 'groups' doesn't need to be sorted.  'ngrp' must be
 * positive.
 */
void
crsetgroups(struct ucred *cr, int ngrp, const gid_t *groups)
{

        if (ngrp > ngroups_max)
                ngrp = ngroups_max;
        cr->cr_ngroups = 0;
        if (ngrp == 0) {
                cr->cr_flags |= CRED_FLAG_GROUPSET;
                return;
        }

        /*
         * crextend() asserts that groups are not set, as it may allocate a new
         * backing storage without copying the content of the old one.  Since we
         * are going to install a completely new set anyway, signal that we
         * consider the old ones thrown away.
         */
        cr->cr_flags &= ~CRED_FLAG_GROUPSET;

        crextend(cr, ngrp);
        crsetgroups_internal(cr, ngrp, groups);
        groups_normalize(&cr->cr_ngroups, cr->cr_groups);
}

/*
 * Same as crsetgroups() but sets the effective GID as well.
 *
 * This function ensures that an effective GID is always present in credentials.
 * An empty array will only set the effective GID to 'default_egid', while
 * a non-empty array will peel off groups[0] to set as the effective GID and use
 * the remainder, if any, as supplementary groups.
 */
void
crsetgroups_and_egid(struct ucred *cr, int ngrp, const gid_t *groups,
    const gid_t default_egid)
{
        if (ngrp == 0) {
                cr->cr_gid = default_egid;
                cr->cr_ngroups = 0;
                cr->cr_flags |= CRED_FLAG_GROUPSET;
                return;
        }

        crsetgroups(cr, ngrp - 1, groups + 1);
        cr->cr_gid = groups[0];
}

/*
 * Get login name, if available.
 */
#ifndef _SYS_SYSPROTO_H_
struct getlogin_args {
        char    *namebuf;
        u_int   namelen;
};
#endif
/* ARGSUSED */
int
sys_getlogin(struct thread *td, struct getlogin_args *uap)
{
        char login[MAXLOGNAME];
        struct proc *p = td->td_proc;
        size_t len;

        if (uap->namelen > MAXLOGNAME)
                uap->namelen = MAXLOGNAME;
        PROC_LOCK(p);
        SESS_LOCK(p->p_session);
        len = strlcpy(login, p->p_session->s_login, uap->namelen) + 1;
        SESS_UNLOCK(p->p_session);
        PROC_UNLOCK(p);
        if (len > uap->namelen)
                return (ERANGE);
        return (copyout(login, uap->namebuf, len));
}

/*
 * Set login name.
 */
#ifndef _SYS_SYSPROTO_H_
struct setlogin_args {
        char    *namebuf;
};
#endif
/* ARGSUSED */
int
sys_setlogin(struct thread *td, struct setlogin_args *uap)
{
        struct proc *p = td->td_proc;
        int error;
        char logintmp[MAXLOGNAME];

        CTASSERT(sizeof(p->p_session->s_login) >= sizeof(logintmp));

        error = priv_check(td, PRIV_PROC_SETLOGIN);
        if (error)
                return (error);
        error = copyinstr(uap->namebuf, logintmp, sizeof(logintmp), NULL);
        if (error != 0) {
                if (error == ENAMETOOLONG)
                        error = EINVAL;
                return (error);
        }
        AUDIT_ARG_LOGIN(logintmp);
        PROC_LOCK(p);
        SESS_LOCK(p->p_session);
        strcpy(p->p_session->s_login, logintmp);
        SESS_UNLOCK(p->p_session);
        PROC_UNLOCK(p);
        return (0);
}

void
setsugid(struct proc *p)
{

        PROC_LOCK_ASSERT(p, MA_OWNED);
        p->p_flag |= P_SUGID;
}

/*-
 * Change a process's effective uid.
 * Side effects: newcred->cr_uid and newcred->cr_uidinfo will be modified.
 * References: newcred must be an exclusive credential reference for the
 *             duration of the call.
 */
void
change_euid(struct ucred *newcred, struct uidinfo *euip)
{

        newcred->cr_uid = euip->ui_uid;
        uihold(euip);
        uifree(newcred->cr_uidinfo);
        newcred->cr_uidinfo = euip;
}

/*-
 * Change a process's effective gid.
 * Side effects: newcred->cr_gid will be modified.
 * References: newcred must be an exclusive credential reference for the
 *             duration of the call.
 */
void
change_egid(struct ucred *newcred, gid_t egid)
{

        newcred->cr_gid = egid;
}

/*-
 * Change a process's real uid.
 * Side effects: newcred->cr_ruid will be updated, newcred->cr_ruidinfo
 *               will be updated.
 * References: newcred must be an exclusive credential reference for the
 *             duration of the call.
 */
void
change_ruid(struct ucred *newcred, struct uidinfo *ruip)
{

        newcred->cr_ruid = ruip->ui_uid;
        uihold(ruip);
        uifree(newcred->cr_ruidinfo);
        newcred->cr_ruidinfo = ruip;
}

/*-
 * Change a process's real gid.
 * Side effects: newcred->cr_rgid will be updated.
 * References: newcred must be an exclusive credential reference for the
 *             duration of the call.
 */
void
change_rgid(struct ucred *newcred, gid_t rgid)
{

        newcred->cr_rgid = rgid;
}

/*-
 * Change a process's saved uid.
 * Side effects: newcred->cr_svuid will be updated.
 * References: newcred must be an exclusive credential reference for the
 *             duration of the call.
 */
void
change_svuid(struct ucred *newcred, uid_t svuid)
{

        newcred->cr_svuid = svuid;
}

/*-
 * Change a process's saved gid.
 * Side effects: newcred->cr_svgid will be updated.
 * References: newcred must be an exclusive credential reference for the
 *             duration of the call.
 */
void
change_svgid(struct ucred *newcred, gid_t svgid)
{

        newcred->cr_svgid = svgid;
}

bool allow_ptrace = true;
SYSCTL_BOOL(_security_bsd, OID_AUTO, allow_ptrace, CTLFLAG_RWTUN,
    &allow_ptrace, 0,
    "Deny ptrace(2) use by returning ENOSYS");