root/usr/src/cmd/svc/startd/fork.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 * Copyright 2012 Milan Jurik. All rights reserved.
 */

/*
 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
 */

/*
 * fork.c - safe forking for svc.startd
 *
 * fork_configd() and fork_sulogin() are related, special cases that handle the
 * spawning of specific client processes for svc.startd.
 */

#include <sys/contract/process.h>
#include <sys/corectl.h>
#include <sys/ctfs.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <sys/wait.h>
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <libcontract.h>
#include <libcontract_priv.h>
#include <libscf_priv.h>
#include <limits.h>
#include <poll.h>
#include <port.h>
#include <signal.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <utmpx.h>
#include <spawn.h>

#include "manifest_hash.h"
#include "configd_exit.h"
#include "protocol.h"
#include "startd.h"

static  struct  utmpx   *utmpp; /* pointer for getutxent() */

pid_t
startd_fork1(int *forkerr)
{
        pid_t p;

        /*
         * prefork stack
         */
        wait_prefork();
        utmpx_prefork();

        p = fork1();

        if (p == -1 && forkerr != NULL)
                *forkerr = errno;

        /*
         * postfork stack
         */
        utmpx_postfork();
        wait_postfork(p);

        return (p);
}

/*
 * void fork_mount(char *, char *)
 *   Run mount(8) with the given options and mount point.  (mount(8) has much
 *   hidden knowledge; it's much less correct to reimplement that logic here to
 *   save a fork(2)/exec(2) invocation.)
 */
int
fork_mount(char *path, char *opts)
{
        pid_t pid;
        uint_t tries = 0;
        int status;

        for (pid = fork1(); pid == -1; pid = fork1()) {
                if (++tries > MAX_MOUNT_RETRIES)
                        return (-1);

                (void) sleep(tries);
        }

        if (pid != 0) {
                (void) waitpid(pid, &status, 0);

                /*
                 * If our mount(8) invocation exited by peculiar means, or with
                 * a non-zero status, our mount likelihood is low.
                 */
                if (!WIFEXITED(status) ||
                    WEXITSTATUS(status) != 0)
                        return (-1);

                return (0);
        }

        (void) execl("/sbin/mount", "mount", "-o", opts, path, NULL);

        return (-1);
}

/*
 * pid_t fork_common(...)
 *   Common routine used by fork_sulogin, fork_emi, and fork_configd to
 *   fork a process in a contract with the provided terms.  Invokes
 *   fork_sulogin (with its no-fork argument set) on errors.
 */
static pid_t
fork_common(const char *name, const char *svc_fmri, int retries, ctid_t *ctidp,
    uint_t inf, uint_t crit, uint_t fatal, uint_t param, uint64_t cookie)
{
        uint_t tries = 0;
        int ctfd, err;
        pid_t pid;

        /*
         * Establish process contract terms.
         */
        if ((ctfd = open64(CTFS_ROOT "/process/template", O_RDWR)) == -1) {
                fork_sulogin(B_TRUE, "Could not open process contract template "
                    "for %s: %s\n", name, strerror(errno));
                /* NOTREACHED */
        }

        err = ct_tmpl_set_critical(ctfd, crit);
        err |= ct_pr_tmpl_set_fatal(ctfd, fatal);
        err |= ct_tmpl_set_informative(ctfd, inf);
        err |= ct_pr_tmpl_set_param(ctfd, param);
        err |= ct_tmpl_set_cookie(ctfd, cookie);
        err |= ct_pr_tmpl_set_svc_fmri(ctfd, svc_fmri);
        err |= ct_pr_tmpl_set_svc_aux(ctfd, name);
        if (err) {
                (void) close(ctfd);
                fork_sulogin(B_TRUE, "Could not set %s process contract "
                    "terms\n", name);
                /* NOTREACHED */
        }

        if (err = ct_tmpl_activate(ctfd)) {
                (void) close(ctfd);
                fork_sulogin(B_TRUE, "Could not activate %s process contract "
                    "template: %s\n", name, strerror(err));
                /* NOTREACHED */
        }

        utmpx_prefork();

        /*
         * Attempt to fork "retries" times.
         */
        for (pid = fork1(); pid == -1; pid = fork1()) {
                if (++tries > retries) {
                        /*
                         * When we exit the sulogin session, init(8)
                         * will restart svc.startd(8).
                         */
                        err = errno;
                        (void) ct_tmpl_clear(ctfd);
                        (void) close(ctfd);
                        utmpx_postfork();
                        fork_sulogin(B_TRUE, "Could not fork to start %s: %s\n",
                            name, strerror(err));
                        /* NOTREACHED */
                }
                (void) sleep(tries);
        }

        utmpx_postfork();

        /*
         * Clean up, return pid and ctid.
         */
        if (pid != 0 && (errno = contract_latest(ctidp)) != 0)
                uu_die("Could not get new contract id for %s\n", name);
        (void) ct_tmpl_clear(ctfd);
        (void) close(ctfd);

        return (pid);
}

/*
 * void fork_sulogin(boolean_t, const char *, ...)
 *   When we are invoked with the -s flag from boot (or run into an unfixable
 *   situation), we run a private copy of sulogin.  When the sulogin session
 *   is ended, we continue.  This is the last fallback action for system
 *   maintenance.
 *
 *   If immediate is true, fork_sulogin() executes sulogin(8) directly, without
 *   forking.
 *
 *   Because fork_sulogin() is needed potentially before we daemonize, we leave
 *   it outside the wait_register() framework.
 */
/*PRINTFLIKE2*/
void
fork_sulogin(boolean_t immediate, const char *format, ...)
{
        va_list args;
        int fd_console;

        (void) printf("Requesting System Maintenance Mode\n");

        if (!booting_to_single_user)
                (void) printf("(See /lib/svc/share/README for more "
                    "information.)\n");

        va_start(args, format);
        (void) vprintf(format, args);
        va_end(args);

        if (!immediate) {
                ctid_t  ctid;
                pid_t   pid;

                pid = fork_common("sulogin", SVC_SULOGIN_FMRI,
                    MAX_SULOGIN_RETRIES, &ctid, CT_PR_EV_HWERR, 0,
                    CT_PR_EV_HWERR, CT_PR_PGRPONLY, SULOGIN_COOKIE);

                if (pid != 0) {
                        (void) waitpid(pid, NULL, 0);
                        contract_abandon(ctid);
                        return;
                }
                /* close all inherited fds */
                closefrom(0);
        } else {
                (void) printf("Directly executing sulogin.\n");
                /*
                 * Can't call closefrom() in this MT section
                 * so safely close a minimum set of fds.
                 */
                (void) close(STDIN_FILENO);
                (void) close(STDOUT_FILENO);
                (void) close(STDERR_FILENO);
        }

        (void) setpgrp();

        /* open the console for sulogin */
        if ((fd_console = open("/dev/console", O_RDWR)) >= 0) {
                if (fd_console != STDIN_FILENO)
                        while (dup2(fd_console, STDIN_FILENO) < 0 &&
                            errno == EINTR)
                                ;
                if (fd_console != STDOUT_FILENO)
                        while (dup2(fd_console, STDOUT_FILENO) < 0 &&
                            errno == EINTR)
                                ;
                if (fd_console != STDERR_FILENO)
                        while (dup2(fd_console, STDERR_FILENO) < 0 &&
                            errno == EINTR)
                                ;
                if (fd_console > STDERR_FILENO)
                        (void) close(fd_console);
        }

        setutxent();
        while ((utmpp = getutxent()) != NULL) {
                if (strcmp(utmpp->ut_user, "LOGIN") != 0) {
                        if (strcmp(utmpp->ut_line, "console") == 0) {
                                (void) kill(utmpp->ut_pid, 9);
                                break;
                        }
                }
        }

        (void) execl("/sbin/sulogin", "sulogin", NULL);

        uu_warn("Could not exec() sulogin");

        exit(1);
}

#define CONFIGD_PATH    "/lib/svc/bin/svc.configd"

/*
 * void fork_configd(int status)
 *   We are interested in exit events (since the parent's exiting means configd
 *   is ready to run and since the child's exiting indicates an error case) and
 *   in empty events.  This means we have a unique template for initiating
 *   configd.
 */
void
fork_configd(int exitstatus)
{
        pid_t pid;
        ctid_t ctid = -1;
        int err;
        char path[PATH_MAX];

        /*
         * Checking the existatus for the potential failure of the
         * daemonized svc.configd.  If this is not the first time
         * through, but a call from the svc.configd monitoring thread
         * after a failure this is the status that is expected.  Other
         * failures are exposed during initialization or are fixed
         * by a restart (e.g door closings).
         *
         * If this is on-disk database corruption it will also be
         * caught by a restart but could be cleared before the restart.
         *
         * Or this could be internal database corruption due to a
         * rogue service that needs to be cleared before restart.
         */
        if (WEXITSTATUS(exitstatus) == CONFIGD_EXIT_DATABASE_BAD) {
                fork_sulogin(B_FALSE, "svc.configd exited with database "
                    "corrupt error after initialization of the repository\n");
        }

retry:
        log_framework(LOG_DEBUG, "fork_configd trying to start svc.configd\n");

        /*
         * If we're retrying, we will have an old contract lying around
         * from the failure.  Since we're going to be creating a new
         * contract shortly, we abandon the old one now.
         */
        if (ctid != -1)
                contract_abandon(ctid);
        ctid = -1;

        pid = fork_common("svc.configd", SCF_SERVICE_CONFIGD,
            MAX_CONFIGD_RETRIES, &ctid, 0, CT_PR_EV_EXIT, 0,
            CT_PR_INHERIT | CT_PR_REGENT, CONFIGD_COOKIE);

        if (pid != 0) {
                int exitstatus;

                st->st_configd_pid = pid;

                if (waitpid(pid, &exitstatus, 0) == -1) {
                        fork_sulogin(B_FALSE, "waitpid on svc.configd "
                            "failed: %s\n", strerror(errno));
                } else if (WIFEXITED(exitstatus)) {
                        char *errstr;

                        /*
                         * Examine exitstatus.  This will eventually get more
                         * complicated, as we will want to teach startd how to
                         * invoke configd with alternate repositories, etc.
                         *
                         * Note that exec(2) failure results in an exit status
                         * of 1, resulting in the default clause below.
                         */

                        /*
                         * Assign readable strings to cases we don't handle, or
                         * have error outcomes that cannot be eliminated.
                         */
                        switch (WEXITSTATUS(exitstatus)) {
                        case CONFIGD_EXIT_BAD_ARGS:
                                errstr = "bad arguments";
                                break;

                        case CONFIGD_EXIT_DATABASE_BAD:
                                errstr = "database corrupt";
                                break;

                        case CONFIGD_EXIT_DATABASE_LOCKED:
                                errstr = "database locked";
                                break;
                        case CONFIGD_EXIT_INIT_FAILED:
                                errstr = "initialization failure";
                                break;
                        case CONFIGD_EXIT_DOOR_INIT_FAILED:
                                errstr = "door initialization failure";
                                break;
                        case CONFIGD_EXIT_DATABASE_INIT_FAILED:
                                errstr = "database initialization failure";
                                break;
                        case CONFIGD_EXIT_NO_THREADS:
                                errstr = "no threads available";
                                break;
                        case CONFIGD_EXIT_LOST_MAIN_DOOR:
                                errstr = "lost door server attachment";
                                break;
                        case 1:
                                errstr = "execution failure";
                                break;
                        default:
                                errstr = "unknown error";
                                break;
                        }

                        /*
                         * Remedial actions for various configd failures.
                         */
                        switch (WEXITSTATUS(exitstatus)) {
                        case CONFIGD_EXIT_OKAY:
                                break;

                        case CONFIGD_EXIT_DATABASE_LOCKED:
                                /* attempt remount of / read-write */
                                if (fs_is_read_only("/", NULL) == 1) {
                                        if (fs_remount("/") == -1)
                                                fork_sulogin(B_FALSE,
                                                    "remount of root "
                                                    "filesystem failed\n");

                                        goto retry;
                                }
                                break;

                        default:
                                fork_sulogin(B_FALSE, "svc.configd exited "
                                    "with status %d (%s)\n",
                                    WEXITSTATUS(exitstatus), errstr);
                                goto retry;
                        }
                } else if (WIFSIGNALED(exitstatus)) {
                        char signame[SIG2STR_MAX];

                        if (sig2str(WTERMSIG(exitstatus), signame))
                                (void) snprintf(signame, SIG2STR_MAX,
                                    "signum %d", WTERMSIG(exitstatus));

                        fork_sulogin(B_FALSE, "svc.configd signalled:"
                            " %s\n", signame);

                        goto retry;
                } else {
                        fork_sulogin(B_FALSE, "svc.configd non-exit "
                            "condition: 0x%x\n", exitstatus);

                        goto retry;
                }

                /*
                 * Announce that we have a valid svc.configd status.
                 */
                MUTEX_LOCK(&st->st_configd_live_lock);
                st->st_configd_lives = 1;
                err = pthread_cond_broadcast(&st->st_configd_live_cv);
                assert(err == 0);
                MUTEX_UNLOCK(&st->st_configd_live_lock);

                log_framework(LOG_DEBUG, "fork_configd broadcasts configd is "
                    "live\n");
                return;
        }

        /*
         * Set our per-process core file path to leave core files in
         * /etc/svc/volatile directory, named after the PID to aid in debugging.
         */
        (void) snprintf(path, sizeof (path),
            "/etc/svc/volatile/core.configd.%%p");

        (void) core_set_process_path(path, strlen(path) + 1, getpid());

        log_framework(LOG_DEBUG, "executing svc.configd\n");

        (void) execl(CONFIGD_PATH, CONFIGD_PATH, NULL);

        /*
         * Status code is used above to identify configd exec failure.
         */
        exit(1);
}

void *
fork_configd_thread(void *vctid)
{
        int fd, err;
        ctid_t configd_ctid = (ctid_t)vctid;

        if (configd_ctid == -1) {
                log_framework(LOG_DEBUG,
                    "fork_configd_thread starting svc.configd\n");
                fork_configd(0);
        } else {
                /*
                 * configd_ctid is known:  we broadcast and continue.
                 * test contract for appropriate state by verifying that
                 * there is one or more processes within it?
                 */
                log_framework(LOG_DEBUG,
                    "fork_configd_thread accepting svc.configd with CTID %ld\n",
                    configd_ctid);
                MUTEX_LOCK(&st->st_configd_live_lock);
                st->st_configd_lives = 1;
                (void) pthread_cond_broadcast(&st->st_configd_live_cv);
                MUTEX_UNLOCK(&st->st_configd_live_lock);
        }

        fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
        if (fd == -1)
                uu_die("process bundle open failed");

        /*
         * Make sure we get all events (including those generated by configd
         * before this thread was started).
         */
        err = ct_event_reset(fd);
        assert(err == 0);

        for (;;) {
                int efd, sfd;
                ct_evthdl_t ev;
                uint32_t type;
                ctevid_t evid;
                ct_stathdl_t status;
                ctid_t ctid;
                uint64_t cookie;
                pid_t pid;

                if (err = ct_event_read_critical(fd, &ev)) {
                        assert(err != EINVAL && err != EAGAIN);
                        log_error(LOG_WARNING,
                            "Error reading next contract event: %s",
                            strerror(err));
                        continue;
                }

                evid = ct_event_get_evid(ev);
                ctid = ct_event_get_ctid(ev);
                type = ct_event_get_type(ev);

                /* Fetch cookie. */
                sfd = contract_open(ctid, "process", "status", O_RDONLY);
                if (sfd < 0) {
                        ct_event_free(ev);
                        continue;
                }

                if (err = ct_status_read(sfd, CTD_COMMON, &status)) {
                        log_framework(LOG_WARNING, "Could not get status for "
                            "contract %ld: %s\n", ctid, strerror(err));

                        ct_event_free(ev);
                        startd_close(sfd);
                        continue;
                }

                cookie = ct_status_get_cookie(status);

                ct_status_free(status);

                startd_close(sfd);

                /*
                 * Don't process events from contracts we aren't interested in.
                 */
                if (cookie != CONFIGD_COOKIE) {
                        ct_event_free(ev);
                        continue;
                }

                if (type == CT_PR_EV_EXIT) {
                        int exitstatus;

                        (void) ct_pr_event_get_pid(ev, &pid);
                        (void) ct_pr_event_get_exitstatus(ev,
                            &exitstatus);

                        if (st->st_configd_pid != pid) {
                                /*
                                 * This is the child exiting, so we
                                 * abandon the contract and restart
                                 * configd.
                                 */
                                contract_abandon(ctid);
                                fork_configd(exitstatus);
                        }
                }

                efd = contract_open(ctid, "process", "ctl", O_WRONLY);
                if (efd != -1) {
                        (void) ct_ctl_ack(efd, evid);
                        startd_close(efd);
                }

                ct_event_free(ev);

        }

        /*NOTREACHED*/
        return (NULL);
}

void
fork_rc_script(char rl, const char *arg, boolean_t wait)
{
        pid_t pid;
        int tmpl, err, stat;
        char path[20] = "/sbin/rc.", log[20] = "rc..log", timebuf[20];
        time_t now;
        struct tm ltime;
        size_t sz;
        char *pathenv;
        char **nenv;

        path[8] = rl;

        tmpl = open64(CTFS_ROOT "/process/template", O_RDWR);
        if (tmpl >= 0) {
                err = ct_tmpl_set_critical(tmpl, 0);
                assert(err == 0);

                err = ct_tmpl_set_informative(tmpl, 0);
                assert(err == 0);

                err = ct_pr_tmpl_set_fatal(tmpl, 0);
                assert(err == 0);

                err = ct_tmpl_activate(tmpl);
                assert(err == 0);

                err = close(tmpl);
                assert(err == 0);
        } else {
                uu_warn("Could not create contract template for %s.\n", path);
        }

        pid = startd_fork1(NULL);
        if (pid < 0) {
                return;
        } else if (pid != 0) {
                /* parent */
                if (wait) {
                        do
                                err = waitpid(pid, &stat, 0);
                        while (err != 0 && errno == EINTR)
                                ;

                        if (!WIFEXITED(stat)) {
                                log_framework(LOG_INFO,
                                    "%s terminated with waitpid() status %d.\n",
                                    path, stat);
                        } else if (WEXITSTATUS(stat) != 0) {
                                log_framework(LOG_INFO,
                                    "%s failed with status %d.\n", path,
                                    WEXITSTATUS(stat));
                        }
                }

                return;
        }

        /* child */

        log[2] = rl;

        setlog(log);

        now = time(NULL);
        sz = strftime(timebuf, sizeof (timebuf), "%b %e %T",
            localtime_r(&now, &ltime));
        assert(sz != 0);

        (void) fprintf(stderr, "%s Executing %s %s\n", timebuf, path, arg);

        if (rl == 'S')
                pathenv = "PATH=/sbin:/usr/sbin:/usr/bin";
        else
                pathenv = "PATH=/usr/sbin:/usr/bin";

        nenv = set_smf_env(NULL, 0, pathenv, NULL, NULL);

        (void) execle(path, path, arg, 0, nenv);

        perror("exec");
        exit(0);
}

#define SVCCFG_PATH     "/usr/sbin/svccfg"
#define EMI_MFST        "/lib/svc/manifest/system/early-manifest-import.xml"
#define EMI_PATH        "/lib/svc/method/manifest-import"

/*
 * Set Early Manifest Import service's state and log file.
 */
static int
emi_set_state(restarter_instance_state_t state, boolean_t setlog)
{
        int r, ret = 1;
        instance_data_t idata;
        scf_handle_t *hndl = NULL;
        scf_instance_t *inst = NULL;

retry:
        if (hndl == NULL)
                hndl = libscf_handle_create_bound(SCF_VERSION);

        if (hndl == NULL) {
                /*
                 * In the case that we can't bind to the repository
                 * (which should have been started), we need to allow
                 * the user into maintenance mode to determine what's
                 * failed.
                 */
                fork_sulogin(B_FALSE, "Unable to bind a new repository"
                    " handle: %s\n", scf_strerror(scf_error()));
                goto retry;
        }

        if (inst == NULL)
                inst = safe_scf_instance_create(hndl);

        if (scf_handle_decode_fmri(hndl, SCF_INSTANCE_EMI, NULL, NULL,
            inst, NULL, NULL, SCF_DECODE_FMRI_EXACT) == -1) {
                switch (scf_error()) {
                case SCF_ERROR_NOT_FOUND:
                        goto out;

                case SCF_ERROR_CONNECTION_BROKEN:
                case SCF_ERROR_NOT_BOUND:
                        libscf_handle_rebind(hndl);
                        goto retry;

                default:
                        fork_sulogin(B_FALSE, "Couldn't fetch %s service: "
                            "%s\n", SCF_INSTANCE_EMI,
                            scf_strerror(scf_error()));
                        goto retry;
                }
        }

        if (setlog) {
                (void) libscf_note_method_log(inst, st->st_log_prefix, EMI_LOG);
                log_framework(LOG_DEBUG,
                    "Set logfile property for %s\n", SCF_INSTANCE_EMI);
        }

        idata.i_fmri = SCF_INSTANCE_EMI;
        idata.i_state =  RESTARTER_STATE_NONE;
        idata.i_next_state = RESTARTER_STATE_NONE;
        switch (r = _restarter_commit_states(hndl, &idata, state,
            RESTARTER_STATE_NONE, NULL)) {
        case 0:
                break;

        case ECONNABORTED:
                libscf_handle_rebind(hndl);
                goto retry;

        case ENOMEM:
        case ENOENT:
        case EPERM:
        case EACCES:
        case EROFS:
                fork_sulogin(B_FALSE, "Could not set state of "
                    "%s: %s\n", SCF_INSTANCE_EMI, strerror(r));
                goto retry;

        case EINVAL:
        default:
                bad_error("_restarter_commit_states", r);
        }
        ret = 0;

out:
        scf_instance_destroy(inst);
        scf_handle_destroy(hndl);
        return (ret);
}

/*
 * It is possible that the early-manifest-import service is disabled.  This
 * would not be the normal case for Solaris, but it may happen on dedicated
 * systems.  So this function checks the state of the general/enabled
 * property for Early Manifest Import.
 *
 * It is also possible that the early-manifest-import service does not yet
 * have a repository representation when this function runs.  This happens
 * if non-Early Manifest Import system is upgraded to an Early Manifest
 * Import based system.  Thus, the non-existence of general/enabled is not
 * an error.
 *
 * Returns 1 if Early Manifest Import is disabled and 0 otherwise.
 */
static int
emi_is_disabled()
{
        int disabled = 0;
        int disconnected = 1;
        int enabled;
        scf_handle_t *hndl = NULL;
        scf_instance_t *inst = NULL;
        uchar_t stored_hash[MHASH_SIZE];
        char *pname;
        int hashash, r;

        while (hndl == NULL) {
                hndl = libscf_handle_create_bound(SCF_VERSION);

                if (hndl == NULL) {
                        /*
                         * In the case that we can't bind to the repository
                         * (which should have been started), we need to
                         * allow the user into maintenance mode to
                         * determine what's failed.
                         */
                        fork_sulogin(B_FALSE, "Unable to bind a new repository "
                            "handle: %s\n", scf_strerror(scf_error()));
                }
        }

        while (disconnected) {
                r = libscf_fmri_get_instance(hndl, SCF_INSTANCE_EMI, &inst);
                if (r != 0) {
                        switch (r) {
                        case ECONNABORTED:
                                libscf_handle_rebind(hndl);
                                continue;

                        case ENOENT:
                                /*
                                 * Early Manifest Import service is not in
                                 * the repository. Check the manifest file
                                 * and service's hash in smf/manifest to
                                 * figure out whether Early Manifest Import
                                 * service was deleted. If Early Manifest Import
                                 * service was deleted, treat that as a disable
                                 * and don't run early import.
                                 */

                                if (access(EMI_MFST, F_OK)) {
                                        /*
                                         * Manifest isn't found, so service is
                                         * properly removed.
                                         */
                                        disabled = 1;
                                } else {
                                        /*
                                         * If manifest exists and we have the
                                         * hash, the service was improperly
                                         * deleted, generate a warning and treat
                                         * this as a disable.
                                         */

                                        if ((pname = mhash_filename_to_propname(
                                            EMI_MFST, B_TRUE)) == NULL) {
                                                /*
                                                 * Treat failure to get propname
                                                 * as a disable.
                                                 */
                                                disabled = 1;
                                                uu_warn("Failed to get propname"
                                                    " for %s.\n",
                                                    SCF_INSTANCE_EMI);
                                        } else {
                                                hashash = mhash_retrieve_entry(
                                                    hndl, pname,
                                                    stored_hash,
                                                    NULL) == 0;
                                                uu_free(pname);

                                                if (hashash) {
                                                        disabled = 1;
                                                        uu_warn("%s service is "
                                                            "deleted \n",
                                                            SCF_INSTANCE_EMI);
                                                }
                                        }

                                }

                                disconnected = 0;
                                continue;

                        default:
                                bad_error("libscf_fmri_get_instance",
                                    scf_error());
                        }
                }
                r = libscf_get_basic_instance_data(hndl, inst, SCF_INSTANCE_EMI,
                    &enabled, NULL, NULL);
                if (r == 0) {
                        /*
                         * enabled can be returned as -1, which indicates
                         * that the enabled property was not found.  To us
                         * that means that the service was not disabled.
                         */
                        if (enabled == 0)
                                disabled = 1;
                } else {
                        switch (r) {
                        case ECONNABORTED:
                                libscf_handle_rebind(hndl);
                                continue;

                        case ECANCELED:
                        case ENOENT:
                                break;
                        default:
                                bad_error("libscf_get_basic_instance_data", r);
                        }
                }
                disconnected = 0;
        }

        if (inst != NULL)
                scf_instance_destroy(inst);
        scf_handle_destroy(hndl);
        return (disabled);
}

void
fork_emi()
{
        pid_t pid;
        ctid_t ctid = -1;
        char **envp, **np;
        char *emipath;
        char corepath[PATH_MAX];
        char *svc_state;
        int setemilog;
        int sz;

        if (emi_is_disabled()) {
                log_framework(LOG_NOTICE, "%s is  disabled and will "
                    "not be run.\n", SCF_INSTANCE_EMI);
                return;
        }

        /*
         * Early Manifest Import should run only once, at boot. If svc.startd
         * is some how restarted, Early Manifest Import  should not run again.
         * Use the Early Manifest Import service's state to figure out whether
         * Early Manifest Import has successfully completed earlier and bail
         * out if it did.
         */
        if (svc_state = smf_get_state(SCF_INSTANCE_EMI)) {
                if (strcmp(svc_state, SCF_STATE_STRING_ONLINE) == 0) {
                        free(svc_state);
                        return;
                }
                free(svc_state);
        }

        /*
         * Attempt to set Early Manifest Import service's state and log file.
         * If emi_set_state fails, set log file again in the next call to
         * emi_set_state.
         */
        setemilog = emi_set_state(RESTARTER_STATE_OFFLINE, B_TRUE);

        /* Don't go further if /usr isn't available */
        if (access(SVCCFG_PATH, F_OK)) {
                log_framework(LOG_NOTICE, "Early Manifest Import is not "
                    "supported on systems with a separate /usr filesystem.\n");
                return;
        }

fork_retry:
        log_framework(LOG_DEBUG, "Starting Early Manifest Import\n");

        /*
         * If we're retrying, we will have an old contract lying around
         * from the failure.  Since we're going to be creating a new
         * contract shortly, we abandon the old one now.
         */
        if (ctid != -1)
                contract_abandon(ctid);
        ctid = -1;

        pid = fork_common(SCF_INSTANCE_EMI, SCF_INSTANCE_EMI,
            MAX_EMI_RETRIES, &ctid, 0, 0, 0, 0, EMI_COOKIE);

        if (pid != 0) {
                int exitstatus;

                if (waitpid(pid, &exitstatus, 0) == -1) {
                        fork_sulogin(B_FALSE, "waitpid on %s failed: "
                            "%s\n", SCF_INSTANCE_EMI, strerror(errno));
                } else if (WIFEXITED(exitstatus)) {
                        if (WEXITSTATUS(exitstatus)) {
                                fork_sulogin(B_FALSE, "%s exited with status "
                                    "%d \n", SCF_INSTANCE_EMI,
                                    WEXITSTATUS(exitstatus));
                                goto fork_retry;
                        }
                } else if (WIFSIGNALED(exitstatus)) {
                        char signame[SIG2STR_MAX];

                        if (sig2str(WTERMSIG(exitstatus), signame))
                                (void) snprintf(signame, SIG2STR_MAX,
                                    "signum %d", WTERMSIG(exitstatus));

                        fork_sulogin(B_FALSE, "%s signalled: %s\n",
                            SCF_INSTANCE_EMI, signame);
                        goto fork_retry;
                } else {
                        fork_sulogin(B_FALSE, "%s non-exit condition: 0x%x\n",
                            SCF_INSTANCE_EMI, exitstatus);
                        goto fork_retry;
                }

                log_framework(LOG_DEBUG, "%s completed successfully\n",
                    SCF_INSTANCE_EMI);

                /*
                 * Once Early Manifest Import completed, the Early Manifest
                 * Import service must have been imported so set log file and
                 * state properties. Since this information is required for
                 * late manifest import and common admin operations, failing to
                 * set these properties should result in su login so admin can
                 * correct the problem.
                 */
                (void) emi_set_state(RESTARTER_STATE_ONLINE,
                    setemilog ? B_TRUE : B_FALSE);

                return;
        }

        /* child */

        /*
         * Set our per-process core file path to leave core files in
         * /etc/svc/volatile directory, named after the PID to aid in debugging.
         */
        (void) snprintf(corepath, sizeof (corepath),
            "/etc/svc/volatile/core.emi.%%p");
        (void) core_set_process_path(corepath, strlen(corepath) + 1, getpid());

        /*
         * Similar to running legacy services, we need to manually set
         * log files here and environment variables.
         */
        setlog(EMI_LOG);

        envp = startd_zalloc(sizeof (char *) * 3);
        np = envp;

        sz = sizeof ("SMF_FMRI=") + strlen(SCF_INSTANCE_EMI);
        *np = startd_zalloc(sz);
        (void) strlcpy(*np, "SMF_FMRI=", sz);
        (void) strncat(*np, SCF_INSTANCE_EMI, sz);
        np++;

        emipath = getenv("PATH");
        if (emipath == NULL)
                emipath = strdup("/usr/sbin:/usr/bin");

        sz = sizeof ("PATH=") + strlen(emipath);
        *np = startd_zalloc(sz);
        (void) strlcpy(*np, "PATH=", sz);
        (void) strncat(*np, emipath, sz);

        log_framework(LOG_DEBUG, "executing Early Manifest Import\n");
        (void) execle(EMI_PATH, EMI_PATH, NULL, envp);

        /*
         * Status code is used above to identify Early Manifest Import
         * exec failure.
         */
        exit(1);
}

extern char **environ;

/*
 * A local variation on system(3c) which accepts a timeout argument.  This
 * allows us to better ensure that the system will actually shut down.
 *
 * gracetime specifies an amount of time in seconds which the routine must wait
 * after the command exits, to allow for asynchronous effects (like sent
 * signals) to take effect.  This can be zero.
 */
void
fork_with_timeout(const char *cmd, uint_t gracetime, uint_t timeout)
{
        int err = 0;
        pid_t pid;
        char *argv[4];
        posix_spawnattr_t attr;
        posix_spawn_file_actions_t factions;

        sigset_t mask, savemask;
        uint_t msec_timeout;
        uint_t msec_spent = 0;
        uint_t msec_gracetime;
        int status;

        msec_timeout = timeout * 1000;
        msec_gracetime = gracetime * 1000;

        /*
         * See also system(3c) in libc.  This is very similar, except
         * that we avoid some unneeded complexity.
         */
        err = posix_spawnattr_init(&attr);
        if (err == 0)
                err = posix_spawnattr_setflags(&attr,
                    POSIX_SPAWN_SETSIGMASK | POSIX_SPAWN_SETSIGDEF |
                    POSIX_SPAWN_NOSIGCHLD_NP | POSIX_SPAWN_WAITPID_NP |
                    POSIX_SPAWN_NOEXECERR_NP);

        /*
         * We choose to close fd's above 2, a deviation from system.
         */
        if (err == 0)
                err = posix_spawn_file_actions_init(&factions);
        if (err == 0)
                err = posix_spawn_file_actions_addclosefrom_np(&factions,
                    STDERR_FILENO + 1);

        (void) sigemptyset(&mask);
        (void) sigaddset(&mask, SIGCHLD);
        (void) thr_sigsetmask(SIG_BLOCK, &mask, &savemask);

        argv[0] = "/bin/sh";
        argv[1] = "-c";
        argv[2] = (char *)cmd;
        argv[3] = NULL;

        if (err == 0)
                err = posix_spawn(&pid, "/bin/sh", &factions, &attr,
                    (char *const *)argv, (char *const *)environ);

        (void) posix_spawnattr_destroy(&attr);
        (void) posix_spawn_file_actions_destroy(&factions);

        if (err) {
                uu_warn("Failed to spawn %s: %s\n", cmd, strerror(err));
        } else {
                for (;;) {
                        int w;
                        w = waitpid(pid, &status, WNOHANG);
                        if (w == -1 && errno != EINTR)
                                break;
                        if (w > 0) {
                                /*
                                 * Command succeeded, so give it gracetime
                                 * seconds for it to have an effect.
                                 */
                                if (status == 0 && msec_gracetime != 0)
                                        (void) poll(NULL, 0, msec_gracetime);
                                break;
                        }

                        (void) poll(NULL, 0, 100);
                        msec_spent += 100;
                        /*
                         * If we timed out, kill off the process, then try to
                         * wait for it-- it's possible that we could accumulate
                         * a zombie here since we don't allow waitpid to hang,
                         * but it's better to let that happen and continue to
                         * make progress.
                         */
                        if (msec_spent >= msec_timeout) {
                                uu_warn("'%s' timed out after %d "
                                    "seconds.  Killing.\n", cmd,
                                    timeout);
                                (void) kill(pid, SIGTERM);
                                (void) poll(NULL, 0, 100);
                                (void) kill(pid, SIGKILL);
                                (void) poll(NULL, 0, 100);
                                (void) waitpid(pid, &status, WNOHANG);
                                break;
                        }
                }
        }
        (void) thr_sigsetmask(SIG_BLOCK, &savemask, NULL);
}