root/usr/src/uts/common/os/brand.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
 * Copyright 2019 Joyent, Inc.
 */

#include <sys/kmem.h>
#include <sys/errno.h>
#include <sys/systm.h>
#include <sys/cmn_err.h>
#include <sys/brand.h>
#include <sys/machbrand.h>
#include <sys/modctl.h>
#include <sys/rwlock.h>
#include <sys/zone.h>
#include <sys/pathname.h>

#define SUPPORTED_BRAND_VERSION BRAND_VER_1

#if defined(__sparcv9)
/* sparcv9 uses system wide brand interposition hooks */
static void brand_plat_interposition_enable(void);
static void brand_plat_interposition_disable(void);

struct brand_mach_ops native_mach_ops  = {
                NULL, NULL
};
#else /* !__sparcv9 */
struct brand_mach_ops native_mach_ops  = {
                NULL, NULL, NULL, NULL
};
#endif /* !__sparcv9 */

brand_t native_brand = {
                BRAND_VER_1,
                "native",
                NULL,
                &native_mach_ops
};

/*
 * Used to maintain a list of all the brands currently loaded into the
 * kernel.
 */
struct brand_list {
        int                     bl_refcnt;
        struct brand_list       *bl_next;
        brand_t                 *bl_brand;
};

static struct brand_list *brand_list = NULL;

/*
 * This lock protects the integrity of the brand list.
 */
static kmutex_t brand_list_lock;

void
brand_init()
{
        mutex_init(&brand_list_lock, NULL, MUTEX_DEFAULT, NULL);
        p0.p_brand = &native_brand;
}

int
brand_register(brand_t *brand)
{
        struct brand_list *list, *scan;

        if (brand == NULL)
                return (EINVAL);

        if (brand->b_version != SUPPORTED_BRAND_VERSION) {
                if (brand->b_version < SUPPORTED_BRAND_VERSION) {
                        cmn_err(CE_WARN,
                            "brand '%s' was built to run on older versions "
                            "of Solaris.",
                            brand->b_name);
                } else {
                        cmn_err(CE_WARN,
                            "brand '%s' was built to run on a newer version "
                            "of Solaris.",
                            brand->b_name);
                }
                return (EINVAL);
        }

        /* Sanity checks */
        if (brand->b_name == NULL || brand->b_ops == NULL ||
            brand->b_ops->b_brandsys == NULL) {
                cmn_err(CE_WARN, "Malformed brand");
                return (EINVAL);
        }

        list = kmem_alloc(sizeof (struct brand_list), KM_SLEEP);

        /* Add the brand to the list of loaded brands. */
        mutex_enter(&brand_list_lock);

        /*
         * Check to be sure we haven't already registered this brand.
         */
        for (scan = brand_list; scan != NULL; scan = scan->bl_next) {
                if (strcmp(brand->b_name, scan->bl_brand->b_name) == 0) {
                        cmn_err(CE_WARN,
                            "Invalid attempt to load a second instance of "
                            "brand %s", brand->b_name);
                        mutex_exit(&brand_list_lock);
                        kmem_free(list, sizeof (struct brand_list));
                        return (EINVAL);
                }
        }

#if defined(__sparcv9)
        /* sparcv9 uses system wide brand interposition hooks */
        if (brand_list == NULL)
                brand_plat_interposition_enable();
#endif /* __sparcv9 */

        list->bl_brand = brand;
        list->bl_refcnt = 0;
        list->bl_next = brand_list;
        brand_list = list;

        mutex_exit(&brand_list_lock);

        return (0);
}

/*
 * The kernel module implementing this brand is being unloaded, so remove
 * it from the list of active brands.
 */
int
brand_unregister(brand_t *brand)
{
        struct brand_list *list, *prev;

        /* Sanity checks */
        if (brand == NULL || brand->b_name == NULL) {
                cmn_err(CE_WARN, "Malformed brand");
                return (EINVAL);
        }

        prev = NULL;
        mutex_enter(&brand_list_lock);

        for (list = brand_list; list != NULL; list = list->bl_next) {
                if (list->bl_brand == brand)
                        break;
                prev = list;
        }

        if (list == NULL) {
                cmn_err(CE_WARN, "Brand %s wasn't registered", brand->b_name);
                mutex_exit(&brand_list_lock);
                return (EINVAL);
        }

        if (list->bl_refcnt > 0) {
                cmn_err(CE_WARN, "Unregistering brand %s which is still in use",
                    brand->b_name);
                mutex_exit(&brand_list_lock);
                return (EBUSY);
        }

        /* Remove brand from the list */
        if (prev != NULL)
                prev->bl_next = list->bl_next;
        else
                brand_list = list->bl_next;

#if defined(__sparcv9)
        /* sparcv9 uses system wide brand interposition hooks */
        if (brand_list == NULL)
                brand_plat_interposition_disable();
#endif /* __sparcv9 */

        mutex_exit(&brand_list_lock);

        kmem_free(list, sizeof (struct brand_list));

        return (0);
}

/*
 * Record that a zone of this brand has been instantiated.  If the kernel
 * module implementing this brand's functionality is not present, this
 * routine attempts to load the module as a side effect.
 */
brand_t *
brand_register_zone(struct brand_attr *attr)
{
        struct brand_list *l = NULL;
        ddi_modhandle_t hdl = NULL;
        char *modname;
        int err = 0;

        if (is_system_labeled()) {
                cmn_err(CE_WARN,
                    "Branded zones are not allowed on labeled systems.");
                return (NULL);
        }

        /*
         * We make at most two passes through this loop.  The first time
         * through, we're looking to see if this is a new user of an
         * already loaded brand.  If the brand hasn't been loaded, we
         * call ddi_modopen() to force it to be loaded and then make a
         * second pass through the list of brands.  If we don't find the
         * brand the second time through it means that the modname
         * specified in the brand_attr structure doesn't provide the brand
         * specified in the brandname field.  This would suggest a bug in
         * the brand's config.xml file.  We close the module and return
         * 'NULL' to the caller.
         */
        for (;;) {
                /*
                 * Search list of loaded brands
                 */
                mutex_enter(&brand_list_lock);
                for (l = brand_list; l != NULL; l = l->bl_next)
                        if (strcmp(attr->ba_brandname,
                            l->bl_brand->b_name) == 0)
                                break;
                if ((l != NULL) || (hdl != NULL))
                        break;
                mutex_exit(&brand_list_lock);

                /*
                 * We didn't find that the requested brand has been loaded
                 * yet, so we trigger the load of the appropriate kernel
                 * module and search the list again.
                 */
                modname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
                (void) strcpy(modname, "brand/");
                (void) strcat(modname, attr->ba_modname);
                hdl = ddi_modopen(modname, KRTLD_MODE_FIRST, &err);
                kmem_free(modname, MAXPATHLEN);

                if (err != 0)
                        return (NULL);
        }

        /*
         * If we found the matching brand, bump its reference count.
         */
        if (l != NULL)
                l->bl_refcnt++;

        mutex_exit(&brand_list_lock);

        if (hdl != NULL)
                (void) ddi_modclose(hdl);

        return ((l != NULL) ? l->bl_brand : NULL);
}

/*
 * Return the number of zones currently using this brand.
 */
int
brand_zone_count(struct brand *bp)
{
        struct brand_list *l;
        int cnt = 0;

        mutex_enter(&brand_list_lock);
        for (l = brand_list; l != NULL; l = l->bl_next)
                if (l->bl_brand == bp) {
                        cnt = l->bl_refcnt;
                        break;
                }
        mutex_exit(&brand_list_lock);

        return (cnt);
}

void
brand_unregister_zone(struct brand *bp)
{
        struct brand_list *list;

        mutex_enter(&brand_list_lock);
        for (list = brand_list; list != NULL; list = list->bl_next) {
                if (list->bl_brand == bp) {
                        ASSERT(list->bl_refcnt > 0);
                        list->bl_refcnt--;
                        break;
                }
        }
        mutex_exit(&brand_list_lock);
}

void
brand_setbrand(proc_t *p)
{
        brand_t *bp = p->p_zone->zone_brand;

        ASSERT(bp != NULL);
        ASSERT(p->p_brand == &native_brand);

        /*
         * We should only be called from exec(), when we know the process
         * is single-threaded.
         */
        ASSERT(p->p_tlist == p->p_tlist->t_forw);

        p->p_brand = bp;
        ASSERT(PROC_IS_BRANDED(p));
        BROP(p)->b_setbrand(p);
}

void
brand_clearbrand(proc_t *p, boolean_t no_lwps)
{
        brand_t *bp = p->p_zone->zone_brand;
        klwp_t *lwp = NULL;
        ASSERT(bp != NULL);
        ASSERT(!no_lwps || (p->p_tlist == NULL));

        /*
         * If called from exec_common() or proc_exit(),
         * we know the process is single-threaded.
         * If called from fork_fail, p_tlist is NULL.
         */
        if (!no_lwps) {
                ASSERT(p->p_tlist == p->p_tlist->t_forw);
                lwp = p->p_tlist->t_lwp;
        }

        ASSERT(PROC_IS_BRANDED(p));
        BROP(p)->b_proc_exit(p, lwp);
        p->p_brand = &native_brand;
}

#if defined(__sparcv9)
/*
 * Currently, only sparc has system level brand syscall interposition.
 * On x86 we're able to enable syscall interposition on a per-cpu basis
 * when a branded thread is scheduled to run on a cpu.
 */

/* Local variables needed for dynamic syscall interposition support */
static uint32_t syscall_trap_patch_instr_orig;
static uint32_t syscall_trap32_patch_instr_orig;

/* Trap Table syscall entry hot patch points */
extern void     syscall_trap_patch_point(void);
extern void     syscall_trap32_patch_point(void);

/* Alternate syscall entry handlers used when branded zones are running */
extern void     syscall_wrapper(void);
extern void     syscall_wrapper32(void);

/* Macros used to facilitate sparcv9 instruction generation */
#define BA_A_INSTR      0x30800000      /* ba,a addr */
#define DISP22(from, to) \
        ((((uintptr_t)(to) - (uintptr_t)(from)) >> 2) & 0x3fffff)

/*ARGSUSED*/
static void
brand_plat_interposition_enable(void)
{
        ASSERT(MUTEX_HELD(&brand_list_lock));

        /*
         * Before we hot patch the kernel save the current instructions
         * so that we can restore them later.
         */
        syscall_trap_patch_instr_orig =
            *(uint32_t *)syscall_trap_patch_point;
        syscall_trap32_patch_instr_orig =
            *(uint32_t *)syscall_trap32_patch_point;

        /*
         * Modify the trap table at the patch points.
         *
         * We basically replace the first instruction at the patch
         * point with a ba,a instruction that will transfer control
         * to syscall_wrapper or syscall_wrapper32 for 64-bit and
         * 32-bit syscalls respectively.  It's important to note that
         * the annul bit is set in the branch so we don't execute
         * the instruction directly following the one we're patching
         * during the branch's delay slot.
         *
         * It also doesn't matter that we're not atomically updating both
         * the 64 and 32 bit syscall paths at the same time since there's
         * no actual branded processes running on the system yet.
         */
        hot_patch_kernel_text((caddr_t)syscall_trap_patch_point,
            BA_A_INSTR | DISP22(syscall_trap_patch_point, syscall_wrapper),
            4);
        hot_patch_kernel_text((caddr_t)syscall_trap32_patch_point,
            BA_A_INSTR | DISP22(syscall_trap32_patch_point, syscall_wrapper32),
            4);
}

/*ARGSUSED*/
static void
brand_plat_interposition_disable(void)
{
        ASSERT(MUTEX_HELD(&brand_list_lock));

        /*
         * Restore the original instructions at the trap table syscall
         * patch points to disable the brand syscall interposition
         * mechanism.
         */
        hot_patch_kernel_text((caddr_t)syscall_trap_patch_point,
            syscall_trap_patch_instr_orig, 4);
        hot_patch_kernel_text((caddr_t)syscall_trap32_patch_point,
            syscall_trap32_patch_instr_orig, 4);
}
#endif /* __sparcv9 */

/*
 * The following functions can be shared among kernel brand modules which
 * implement Solaris-derived brands, all of which need to do similar tasks
 * to manage the brand.
 */

#if defined(_LP64)
static void
Ehdr32to64(Elf32_Ehdr *src, Ehdr *dst)
{
        bcopy(src->e_ident, dst->e_ident, sizeof (src->e_ident));
        dst->e_type =           src->e_type;
        dst->e_machine =        src->e_machine;
        dst->e_version =        src->e_version;
        dst->e_entry =          src->e_entry;
        dst->e_phoff =          src->e_phoff;
        dst->e_shoff =          src->e_shoff;
        dst->e_flags =          src->e_flags;
        dst->e_ehsize =         src->e_ehsize;
        dst->e_phentsize =      src->e_phentsize;
        dst->e_phnum =          src->e_phnum;
        dst->e_shentsize =      src->e_shentsize;
        dst->e_shnum =          src->e_shnum;
        dst->e_shstrndx =       src->e_shstrndx;
}
#endif /* _LP64 */

/*
 * Return -1 if the cmd was not handled by this function.
 */
/*ARGSUSED*/
int
brand_solaris_cmd(int cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3,
    struct brand *pbrand, int brandvers)
{
        brand_proc_data_t       *spd;
        brand_proc_reg_t        reg;
        proc_t                  *p = curproc;
        int                     err;

        /*
         * There is one operation that is supported for a native
         * process; B_EXEC_BRAND.  This brand operaion is redundant
         * since the kernel assumes a native process doing an exec
         * in a branded zone is going to run a branded processes.
         * hence we don't support this operation.
         */
        if (cmd == B_EXEC_BRAND)
                return (ENOSYS);

        /* For all other operations this must be a branded process. */
        if (p->p_brand == &native_brand)
                return (ENOSYS);

        ASSERT(p->p_brand == pbrand);
        ASSERT(p->p_brand_data != NULL);

        spd = (brand_proc_data_t *)p->p_brand_data;

        switch ((cmd)) {
        case B_EXEC_NATIVE:
                err = exec_common((char *)arg1, (const char **)arg2,
                    (const char **)arg3, NULL, EBA_NATIVE);
                return (err);

        /*
         * Get the address of the user-space system call handler from
         * the user process and attach it to the proc structure.
         */
        case B_REGISTER:
                if (p->p_model == DATAMODEL_NATIVE) {
                        if (copyin((void *)arg1, &reg, sizeof (reg)) != 0)
                                return (EFAULT);
                }
#if defined(_LP64)
                else {
                        brand_common_reg32_t reg32;

                        if (copyin((void *)arg1, &reg32, sizeof (reg32)) != 0)
                                return (EFAULT);
                        reg.sbr_version = reg32.sbr_version;
                        reg.sbr_handler = (caddr_t)(uintptr_t)reg32.sbr_handler;
                }
#endif /* _LP64 */

                if (reg.sbr_version != brandvers)
                        return (ENOTSUP);
                spd->spd_handler = reg.sbr_handler;
                return (0);

        case B_ELFDATA:
                if (p->p_model == DATAMODEL_NATIVE) {
                        if (copyout(&spd->spd_elf_data, (void *)arg1,
                            sizeof (brand_elf_data_t)) != 0)
                                return (EFAULT);
                }
#if defined(_LP64)
                else {
                        brand_elf_data32_t sed32;

                        sed32.sed_phdr = spd->spd_elf_data.sed_phdr;
                        sed32.sed_phent = spd->spd_elf_data.sed_phent;
                        sed32.sed_phnum = spd->spd_elf_data.sed_phnum;
                        sed32.sed_entry = spd->spd_elf_data.sed_entry;
                        sed32.sed_base = spd->spd_elf_data.sed_base;
                        sed32.sed_ldentry = spd->spd_elf_data.sed_ldentry;
                        sed32.sed_lddata = spd->spd_elf_data.sed_lddata;
                        if (copyout(&sed32, (void *)arg1, sizeof (sed32))
                            != 0)
                                return (EFAULT);
                }
#endif /* _LP64 */
                return (0);

        /*
         * The B_TRUSS_POINT subcommand exists so that we can see
         * truss output from interposed system calls that return
         * without first calling any other system call, meaning they
         * would be invisible to truss(1).
         * If the second argument is set non-zero, set errno to that
         * value as well.
         *
         * Common arguments seen with truss are:
         *
         *      arg1: syscall number
         *      arg2: errno
         */
        case B_TRUSS_POINT:
                return ((arg2 == 0) ? 0 : set_errno((uint_t)arg2));
        }

        return (-1);
}

/*ARGSUSED*/
void
brand_solaris_copy_procdata(proc_t *child, proc_t *parent, struct brand *pbrand)
{
        brand_proc_data_t       *spd;

        ASSERT(parent->p_brand == pbrand);
        ASSERT(child->p_brand == pbrand);
        ASSERT(parent->p_brand_data != NULL);
        ASSERT(child->p_brand_data == NULL);

        /*
         * Just duplicate all the proc data of the parent for the
         * child
         */
        spd = kmem_alloc(sizeof (brand_proc_data_t), KM_SLEEP);
        bcopy(parent->p_brand_data, spd, sizeof (brand_proc_data_t));
        child->p_brand_data = spd;
}

static void
restoreexecenv(struct execenv *ep, stack_t *sp)
{
        klwp_t *lwp = ttolwp(curthread);

        setexecenv(ep);
        lwp->lwp_sigaltstack.ss_sp = sp->ss_sp;
        lwp->lwp_sigaltstack.ss_size = sp->ss_size;
        lwp->lwp_sigaltstack.ss_flags = sp->ss_flags;
}

/*ARGSUSED*/
int
brand_solaris_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args,
    intpdata_t *idatap, int level, size_t *execsz, int setid, caddr_t exec_file,
    cred_t *cred, int brand_action, struct brand *pbrand, char *bname,
    char *brandlib, char *brandlib32, char *brandlinker, char *brandlinker32)
{

        vnode_t         *nvp;
        Ehdr            ehdr;
        Addr            uphdr_vaddr;
        intptr_t        voffset;
        int             interp;
        int             i, err;
        struct execenv  env;
        struct execenv  origenv;
        stack_t         orig_sigaltstack;
        struct user     *up = PTOU(curproc);
        proc_t          *p = ttoproc(curthread);
        klwp_t          *lwp = ttolwp(curthread);
        brand_proc_data_t       *spd;
        brand_elf_data_t sed, *sedp;
        char            *linker;
        uintptr_t       lddata; /* lddata of executable's linker */

        ASSERT(curproc->p_brand == pbrand);
        ASSERT(curproc->p_brand_data != NULL);

        spd = (brand_proc_data_t *)curproc->p_brand_data;
        sedp = &spd->spd_elf_data;

        args->brandname = bname;

        /*
         * We will exec the brand library and then map in the target
         * application and (optionally) the brand's default linker.
         */
        if (args->to_model == DATAMODEL_NATIVE) {
                args->emulator = brandlib;
                linker = brandlinker;
        }
#if defined(_LP64)
        else {
                args->emulator = brandlib32;
                linker = brandlinker32;
        }
#endif  /* _LP64 */

        if ((err = lookupname(args->emulator, UIO_SYSSPACE, FOLLOW,
            NULLVPP, &nvp)) != 0) {
                uprintf("%s: not found.", args->emulator);
                return (err);
        }

        /*
         * The following elf{32}exec call changes the execenv in the proc
         * struct which includes changing the p_exec member to be the vnode
         * for the brand library (e.g. /.SUNWnative/usr/lib/s10_brand.so.1).
         * We will eventually set the p_exec member to be the vnode for the new
         * executable when we call setexecenv().  However, if we get an error
         * before that call we need to restore the execenv to its original
         * values so that when we return to the caller fop_close() works
         * properly while cleaning up from the failed exec().  Restoring the
         * original value will also properly decrement the 2nd VN_RELE that we
         * took on the brand library.
         */
        origenv.ex_bssbase = p->p_bssbase;
        origenv.ex_brkbase = p->p_brkbase;
        origenv.ex_brksize = p->p_brksize;
        origenv.ex_vp = p->p_exec;
        orig_sigaltstack.ss_sp = lwp->lwp_sigaltstack.ss_sp;
        orig_sigaltstack.ss_size = lwp->lwp_sigaltstack.ss_size;
        orig_sigaltstack.ss_flags = lwp->lwp_sigaltstack.ss_flags;

        if (args->to_model == DATAMODEL_NATIVE) {
                err = elfexec(nvp, uap, args, idatap, INTP_MAXDEPTH + 1, execsz,
                    setid, exec_file, cred, brand_action);
        }
#if defined(_LP64)
        else {
                err = elf32exec(nvp, uap, args, idatap, INTP_MAXDEPTH + 1,
                    execsz, setid, exec_file, cred, brand_action);
        }
#endif  /* _LP64 */
        VN_RELE(nvp);
        if (err != 0) {
                restoreexecenv(&origenv, &orig_sigaltstack);
                return (err);
        }

        /*
         * The u_auxv veCTors are set up by elfexec to point to the
         * brand emulation library and linker.  Save these so they can
         * be copied to the specific brand aux vectors.
         */
        bzero(&sed, sizeof (sed));
        for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
                switch (up->u_auxv[i].a_type) {
                case AT_SUN_LDDATA:
                        sed.sed_lddata = up->u_auxv[i].a_un.a_val;
                        break;
                case AT_BASE:
                        sed.sed_base = up->u_auxv[i].a_un.a_val;
                        break;
                case AT_ENTRY:
                        sed.sed_entry = up->u_auxv[i].a_un.a_val;
                        break;
                case AT_PHDR:
                        sed.sed_phdr = up->u_auxv[i].a_un.a_val;
                        break;
                case AT_PHENT:
                        sed.sed_phent = up->u_auxv[i].a_un.a_val;
                        break;
                case AT_PHNUM:
                        sed.sed_phnum = up->u_auxv[i].a_un.a_val;
                        break;
                default:
                        break;
                }
        }
        /* Make sure the emulator has an entry point */
        ASSERT(sed.sed_entry != 0);
        ASSERT(sed.sed_phdr != 0);

        bzero(&env, sizeof (env));
        if (args->to_model == DATAMODEL_NATIVE) {
                err = mapexec_brand(vp, args, &ehdr, &uphdr_vaddr,
                    &voffset, exec_file, &interp, &env.ex_bssbase,
                    &env.ex_brkbase, &env.ex_brksize, NULL);
        }
#if defined(_LP64)
        else {
                Elf32_Ehdr ehdr32;
                Elf32_Addr uphdr_vaddr32;
                err = mapexec32_brand(vp, args, &ehdr32, &uphdr_vaddr32,
                    &voffset, exec_file, &interp, &env.ex_bssbase,
                    &env.ex_brkbase, &env.ex_brksize, NULL);
                Ehdr32to64(&ehdr32, &ehdr);

                if (uphdr_vaddr32 == (Elf32_Addr)-1)
                        uphdr_vaddr = (Addr)-1;
                else
                        uphdr_vaddr = uphdr_vaddr32;
        }
#endif  /* _LP64 */
        if (err != 0) {
                restoreexecenv(&origenv, &orig_sigaltstack);
                return (err);
        }

        /*
         * Save off the important properties of the executable. The
         * brand library will ask us for this data later, when it is
         * initializing and getting ready to transfer control to the
         * brand application.
         */
        if (uphdr_vaddr == (Addr)-1)
                sedp->sed_phdr = voffset + ehdr.e_phoff;
        else
                sedp->sed_phdr = voffset + uphdr_vaddr;
        sedp->sed_entry = voffset + ehdr.e_entry;
        sedp->sed_phent = ehdr.e_phentsize;
        sedp->sed_phnum = ehdr.e_phnum;

        if (interp) {
                if (ehdr.e_type == ET_DYN) {
                        /*
                         * This is a shared object executable, so we
                         * need to pick a reasonable place to put the
                         * heap. Just don't use the first page.
                         */
                        env.ex_brkbase = (caddr_t)PAGESIZE;
                        env.ex_bssbase = (caddr_t)PAGESIZE;
                }

                /*
                 * If the program needs an interpreter (most do), map
                 * it in and store relevant information about it in the
                 * aux vector, where the brand library can find it.
                 */
                if ((err = lookupname(linker, UIO_SYSSPACE,
                    FOLLOW, NULLVPP, &nvp)) != 0) {
                        uprintf("%s: not found.", brandlinker);
                        restoreexecenv(&origenv, &orig_sigaltstack);
                        return (err);
                }
                if (args->to_model == DATAMODEL_NATIVE) {
                        err = mapexec_brand(nvp, args, &ehdr,
                            &uphdr_vaddr, &voffset, exec_file, &interp,
                            NULL, NULL, NULL, &lddata);
                }
#if defined(_LP64)
                else {
                        Elf32_Ehdr ehdr32;
                        Elf32_Addr uphdr_vaddr32;
                        err = mapexec32_brand(nvp, args, &ehdr32,
                            &uphdr_vaddr32, &voffset, exec_file, &interp,
                            NULL, NULL, NULL, &lddata);
                        Ehdr32to64(&ehdr32, &ehdr);

                        if (uphdr_vaddr32 == (Elf32_Addr)-1)
                                uphdr_vaddr = (Addr)-1;
                        else
                                uphdr_vaddr = uphdr_vaddr32;
                }
#endif  /* _LP64 */
                VN_RELE(nvp);
                if (err != 0) {
                        restoreexecenv(&origenv, &orig_sigaltstack);
                        return (err);
                }

                /*
                 * Now that we know the base address of the brand's
                 * linker, place it in the aux vector.
                 */
                sedp->sed_base = voffset;
                sedp->sed_ldentry = voffset + ehdr.e_entry;
                sedp->sed_lddata = voffset + lddata;
        } else {
                /*
                 * This program has no interpreter. The brand library
                 * will jump to the address in the AT_SUN_BRAND_LDENTRY
                 * aux vector, so in this case, put the entry point of
                 * the main executable there.
                 */
                if (ehdr.e_type == ET_EXEC) {
                        /*
                         * An executable with no interpreter, this must
                         * be a statically linked executable, which
                         * means we loaded it at the address specified
                         * in the elf header, in which case the e_entry
                         * field of the elf header is an absolute
                         * address.
                         */
                        sedp->sed_ldentry = ehdr.e_entry;
                        sedp->sed_entry = ehdr.e_entry;
                        sedp->sed_lddata = 0;
                        sedp->sed_base = 0;
                } else {
                        /*
                         * A shared object with no interpreter, we use
                         * the calculated address from above.
                         */
                        sedp->sed_ldentry = sedp->sed_entry;
                        sedp->sed_entry = 0;
                        sedp->sed_phdr = 0;
                        sedp->sed_phent = 0;
                        sedp->sed_phnum = 0;
                        sedp->sed_lddata = 0;
                        sedp->sed_base = voffset;

                        if (ehdr.e_type == ET_DYN) {
                                /*
                                 * Delay setting the brkbase until the
                                 * first call to brk(); see elfexec()
                                 * for details.
                                 */
                                env.ex_bssbase = (caddr_t)0;
                                env.ex_brkbase = (caddr_t)0;
                                env.ex_brksize = 0;
                        }
                }
        }

        env.ex_magic = elfmagic;
        env.ex_vp = vp;
        setexecenv(&env);

        /*
         * It's time to manipulate the process aux vectors.  First
         * we need to update the AT_SUN_AUXFLAGS aux vector to set
         * the AF_SUN_NOPLM flag.
         */
        if (args->to_model == DATAMODEL_NATIVE) {
                auxv_t          auxflags_auxv;

                if (copyin(args->auxp_auxflags, &auxflags_auxv,
                    sizeof (auxflags_auxv)) != 0)
                        return (EFAULT);

                ASSERT(auxflags_auxv.a_type == AT_SUN_AUXFLAGS);
                auxflags_auxv.a_un.a_val |= AF_SUN_NOPLM;
                if (copyout(&auxflags_auxv, args->auxp_auxflags,
                    sizeof (auxflags_auxv)) != 0)
                        return (EFAULT);
        }
#if defined(_LP64)
        else {
                auxv32_t        auxflags_auxv32;

                if (copyin(args->auxp_auxflags, &auxflags_auxv32,
                    sizeof (auxflags_auxv32)) != 0)
                        return (EFAULT);

                ASSERT(auxflags_auxv32.a_type == AT_SUN_AUXFLAGS);
                auxflags_auxv32.a_un.a_val |= AF_SUN_NOPLM;
                if (copyout(&auxflags_auxv32, args->auxp_auxflags,
                    sizeof (auxflags_auxv32)) != 0)
                        return (EFAULT);
        }
#endif  /* _LP64 */

        /* Second, copy out the brand specific aux vectors. */
        if (args->to_model == DATAMODEL_NATIVE) {
                auxv_t brand_auxv[] = {
                    { AT_SUN_BRAND_AUX1, 0 },
                    { AT_SUN_BRAND_AUX2, 0 },
                    { AT_SUN_BRAND_AUX3, 0 }
                };

                ASSERT(brand_auxv[0].a_type ==
                    AT_SUN_BRAND_COMMON_LDDATA);
                brand_auxv[0].a_un.a_val = sed.sed_lddata;

                if (copyout(&brand_auxv, args->auxp_brand,
                    sizeof (brand_auxv)) != 0)
                        return (EFAULT);
        }
#if defined(_LP64)
        else {
                auxv32_t brand_auxv32[] = {
                    { AT_SUN_BRAND_AUX1, 0 },
                    { AT_SUN_BRAND_AUX2, 0 },
                    { AT_SUN_BRAND_AUX3, 0 }
                };

                ASSERT(brand_auxv32[0].a_type == AT_SUN_BRAND_COMMON_LDDATA);
                brand_auxv32[0].a_un.a_val = (uint32_t)sed.sed_lddata;
                if (copyout(&brand_auxv32, args->auxp_brand,
                    sizeof (brand_auxv32)) != 0)
                        return (EFAULT);
        }
#endif  /* _LP64 */

        /*
         * Third, the /proc aux vectors set up by elfexec() point to
         * brand emulation library and it's linker.  Copy these to the
         * /proc brand specific aux vector, and update the regular
         * /proc aux vectors to point to the executable (and it's
         * linker).  This will enable debuggers to access the
         * executable via the usual /proc or elf notes aux vectors.
         *
         * The brand emulation library's linker will get it's aux
         * vectors off the stack, and then update the stack with the
         * executable's aux vectors before jumping to the executable's
         * linker.
         *
         * Debugging the brand emulation library must be done from
         * the global zone, where the librtld_db module knows how to
         * fetch the brand specific aux vectors to access the brand
         * emulation libraries linker.
         */
        for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
                ulong_t val;

                switch (up->u_auxv[i].a_type) {
                case AT_SUN_BRAND_COMMON_LDDATA:
                        up->u_auxv[i].a_un.a_val = sed.sed_lddata;
                        continue;
                case AT_BASE:
                        val = sedp->sed_base;
                        break;
                case AT_ENTRY:
                        val = sedp->sed_entry;
                        break;
                case AT_PHDR:
                        val = sedp->sed_phdr;
                        break;
                case AT_PHENT:
                        val = sedp->sed_phent;
                        break;
                case AT_PHNUM:
                        val = sedp->sed_phnum;
                        break;
                case AT_SUN_LDDATA:
                        val = sedp->sed_lddata;
                        break;
                default:
                        continue;
                }

                up->u_auxv[i].a_un.a_val = val;
                if (val == 0) {
                        /* Hide the entry for static binaries */
                        up->u_auxv[i].a_type = AT_IGNORE;
                }
        }

        /*
         * The last thing we do here is clear spd->spd_handler.  This
         * is important because if we're already a branded process and
         * if this exec succeeds, there is a window between when the
         * exec() first returns to the userland of the new process and
         * when our brand library get's initialized, during which we
         * don't want system calls to be re-directed to our brand
         * library since it hasn't been initialized yet.
         */
        spd->spd_handler = NULL;

        return (0);
}

void
brand_solaris_exec(struct brand *pbrand)
{
        brand_proc_data_t       *spd = curproc->p_brand_data;

        ASSERT(curproc->p_brand == pbrand);
        ASSERT(curproc->p_brand_data != NULL);
        ASSERT(ttolwp(curthread)->lwp_brand != NULL);

        /*
         * We should only be called from exec(), when we know the process
         * is single-threaded.
         */
        ASSERT(curproc->p_tlist == curproc->p_tlist->t_forw);

        /* Upon exec, reset our lwp brand data. */
        (void) brand_solaris_freelwp(ttolwp(curthread), pbrand);
        (void) brand_solaris_initlwp(ttolwp(curthread), pbrand);

        /*
         * Upon exec, reset all the proc brand data, except for the elf
         * data associated with the executable we are exec'ing.
         */
        spd->spd_handler = NULL;
}

int
brand_solaris_fini(char **emul_table, struct modlinkage *modlinkage,
    struct brand *pbrand)
{
        int err;

        /*
         * If there are any zones using this brand, we can't allow it
         * to be unloaded.
         */
        if (brand_zone_count(pbrand))
                return (EBUSY);

        kmem_free(*emul_table, NSYSCALL);
        *emul_table = NULL;

        err = mod_remove(modlinkage);
        if (err)
                cmn_err(CE_WARN, "Couldn't unload brand module");

        return (err);
}

/*ARGSUSED*/
void
brand_solaris_forklwp(klwp_t *p, klwp_t *c, struct brand *pbrand)
{
        ASSERT(p->lwp_procp->p_brand == pbrand);
        ASSERT(c->lwp_procp->p_brand == pbrand);

        ASSERT(p->lwp_procp->p_brand_data != NULL);
        ASSERT(c->lwp_procp->p_brand_data != NULL);

        /*
         * Both LWPs have already had been initialized via
         * brand_solaris_initlwp().
         */
        ASSERT(p->lwp_brand != NULL);
        ASSERT(c->lwp_brand != NULL);
}

/*ARGSUSED*/
void
brand_solaris_freelwp(klwp_t *l, struct brand *pbrand)
{
        ASSERT(l->lwp_procp->p_brand == pbrand);
        ASSERT(l->lwp_procp->p_brand_data != NULL);
        ASSERT(l->lwp_brand != NULL);
        l->lwp_brand = NULL;
}

/*ARGSUSED*/
int
brand_solaris_initlwp(klwp_t *l, struct brand *pbrand)
{
        ASSERT(l->lwp_procp->p_brand == pbrand);
        ASSERT(l->lwp_procp->p_brand_data != NULL);
        ASSERT(l->lwp_brand == NULL);
        l->lwp_brand = (void *)-1;
        return (0);
}

/*ARGSUSED*/
void
brand_solaris_lwpexit(klwp_t *l, struct brand *pbrand)
{
        proc_t  *p = l->lwp_procp;

        ASSERT(l->lwp_procp->p_brand == pbrand);
        ASSERT(l->lwp_procp->p_brand_data != NULL);
        ASSERT(l->lwp_brand != NULL);

        /*
         * We should never be called for the last thread in a process.
         * (That case is handled by brand_solaris_proc_exit().)
         * Therefore this lwp must be exiting from a multi-threaded
         * process.
         */
        ASSERT(p->p_tlist != p->p_tlist->t_forw);

        l->lwp_brand = NULL;
}

/*ARGSUSED*/
void
brand_solaris_proc_exit(struct proc *p, klwp_t *l, struct brand *pbrand)
{
        ASSERT(p->p_brand == pbrand);
        ASSERT(p->p_brand_data != NULL);

        /*
         * When called from proc_exit(), we know that process is
         * single-threaded and free our lwp brand data.
         * otherwise just free p_brand_data and return.
         */
        if (l != NULL) {
                ASSERT(p->p_tlist == p->p_tlist->t_forw);
                ASSERT(p->p_tlist->t_lwp == l);
                (void) brand_solaris_freelwp(l, pbrand);
        }

        /* upon exit, free our proc brand data */
        kmem_free(p->p_brand_data, sizeof (brand_proc_data_t));
        p->p_brand_data = NULL;
}

void
brand_solaris_setbrand(proc_t *p, struct brand *pbrand)
{
        ASSERT(p->p_brand == pbrand);
        ASSERT(p->p_brand_data == NULL);

        /*
         * We should only be called from exec(), when we know the process
         * is single-threaded.
         */
        ASSERT(p->p_tlist == p->p_tlist->t_forw);

        p->p_brand_data = kmem_zalloc(sizeof (brand_proc_data_t), KM_SLEEP);
        (void) brand_solaris_initlwp(p->p_tlist->t_lwp, pbrand);
}