root/usr/src/cmd/fs.d/ufs/fsck/setup.c
/*
 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

/*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T     */
/*        All Rights Reserved   */

/*
 * Copyright (c) 1980, 1986, 1990 The Regents of the University of California.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms are permitted
 * provided that: (1) source distributions retain this entire copyright
 * notice and comment, and (2) distributions including binaries display
 * the following acknowledgement:  ``This product includes software
 * developed by the University of California, Berkeley and its contributors''
 * in the documentation or other materials provided with the distribution
 * and in all advertising materials mentioning features or use of this
 * software. Neither the name of the University nor the names of its
 * contributors may be used to endorse or promote products derived
 * from this software without specific prior written permission.
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 */

#define DKTYPENAMES
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <malloc.h>
#include <limits.h>
#include <wait.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/sysmacros.h>
#include <sys/mntent.h>
#include <sys/dkio.h>
#include <sys/filio.h>
#include <sys/isa_defs.h>       /* for ENDIAN defines */
#include <sys/int_const.h>
#include <sys/fs/ufs_fs.h>
#include <sys/vnode.h>
#include <sys/fs/ufs_fs.h>
#include <sys/fs/ufs_inode.h>
#include <sys/fs/ufs_log.h>
#include <sys/stat.h>
#include <sys/fcntl.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/vfstab.h>
#include "roll_log.h"
#include "fsck.h"

/*
 * The size of a cylinder group is calculated by CGSIZE. The maximum size
 * is limited by the fact that cylinder groups are at most one block.
 * Its size is derived from the size of the maps maintained in the
 * cylinder group and the (struct cg) size.
 */
#define CGSIZE(fs) \
        /* base cg */     (sizeof (struct cg) + \
        /* blktot size */ (fs)->fs_cpg * sizeof (int32_t) + \
        /* blks size */   (fs)->fs_cpg * (fs)->fs_nrpos * sizeof (short) + \
        /* inode map */   howmany((fs)->fs_ipg, NBBY) + \
        /* block map */   howmany((fs)->fs_cpg * (fs)->fs_spc / NSPF(fs), NBBY))

#define altsblock (*asblk.b_un.b_fs)
#define POWEROF2(num)   (((num) & ((num) - 1)) == 0)

/*
 * Methods of determining where alternate superblocks should
 * be.  MAX_SB_STYLES must be the last one, and the others need
 * to be positive.
 */
typedef enum {
        MKFS_STYLE = 1, NEWFS_STYLE, MAX_SB_STYLES
} calcsb_t;

static caddr_t calcsb_names[] = {
        "<UNKNOWN>", "MKFS", "NEWFS", "<OUT OF RANGE>"
};

fsck_ino_t lfdir;
int64_t numacls, aclmax, aclplast;
int64_t numdirs, listmax, inplast;
char havesb;
int fsreadfd;
int isdirty;
int pid;
int secsize;
size_t dev_bsize;
struct bufarea sblk;
static struct bufarea asblk;    /* alternate superblock */
struct inoinfo **inphead, **inpsort;
struct shadowclientinfo *shadowclientinfo = NULL;
struct shadowclientinfo *attrclientinfo = NULL;
int maxshadowclients = 1024;    /* allocation size, not limit  */

static void badsb(int, caddr_t);
static int calcsb(calcsb_t, caddr_t, int, struct fs *);
static int checksb(int);
static void flush_fs(void);
static void sblock_init(void);
static void uncreate_maps(void);

static int
read_super_block(int listerr)
{
        int fd;
        caddr_t err;

        if (mount_point != NULL) {
                fd = open(mount_point, O_RDONLY);
                if (fd == -1) {
                        errexit("fsck: open mount point error: %s",
                            strerror(errno));
                        /* NOTREACHED */
                }
                /* get the latest super block */
                if (ioctl(fd, _FIOGETSUPERBLOCK, &sblock)) {
                        errexit("fsck: ioctl _FIOGETSUPERBLOCK error: %s",
                            strerror(errno));
                        /* NOTREACHED */
                }
                (void) close(fd);
        } else {
                (void) fsck_bread(fsreadfd, (caddr_t)&sblock,
                        bflag != 0 ? (diskaddr_t)bflag : (diskaddr_t)SBLOCK,
                        SBSIZE);
        }

        /*
         * Don't let trash from the disk trip us up later
         * in ungetsummaryinfo().
         */
        sblock.fs_u.fs_csp = NULL;

        /*
         * Rudimentary consistency checks.  Can't really call
         * checksb() here, because there may be outstanding
         * deltas that still need to be applied.
         */
        if ((sblock.fs_magic != FS_MAGIC) &&
            (sblock.fs_magic != MTB_UFS_MAGIC)) {
                err = "MAGIC NUMBER WRONG";
                goto fail;
        }
        if (sblock.fs_magic == FS_MAGIC &&
                (sblock.fs_version != UFS_EFISTYLE4NONEFI_VERSION_2 &&
                sblock.fs_version != UFS_VERSION_MIN)) {
                err = "UNRECOGNIZED VERSION";
                goto fail;
        }
        if (sblock.fs_magic == MTB_UFS_MAGIC &&
                (sblock.fs_version > MTB_UFS_VERSION_1 ||
                sblock.fs_version < MTB_UFS_VERSION_MIN)) {
                err = "UNRECOGNIZED VERSION";
                goto fail;
        }
        if (sblock.fs_ncg < 1) {
                err = "NCG OUT OF RANGE";
                goto fail;
        }
        if (sblock.fs_cpg < 1) {
                err = "CPG OUT OF RANGE";
                goto fail;
        }
        if (sblock.fs_ncg * sblock.fs_cpg < sblock.fs_ncyl ||
                (sblock.fs_ncg - 1) * sblock.fs_cpg >= sblock.fs_ncyl) {
                err = "NCYL IS INCONSISTENT WITH NCG*CPG";
                goto fail;
        }
        if (sblock.fs_sbsize < 0 || sblock.fs_sbsize > SBSIZE) {
                err = "SIZE OUT OF RANGE";
                goto fail;
        }

        return (1);

fail:
        badsb(listerr, err);
        return (0);
}

static void
flush_fs()
{
        int fd;

        if (mount_point != NULL) {
                fd = open(mount_point, O_RDONLY);
                if (fd == -1) {
                        errexit("fsck: open mount point error: %s",
                            strerror(errno));
                        /* NOTREACHED */
                }
                if (ioctl(fd, _FIOFFS, NULL)) { /* flush file system */
                        errexit("fsck: ioctl _FIOFFS error: %s",
                            strerror(errno));
                        /* NOTREACHED */
                }
                (void) close(fd);
        }
}

/*
 * Roll the embedded log, if any, and set up the global variables
 * islog and islogok.
 */
static int
logsetup(caddr_t devstr)
{
        void            *buf;
        extent_block_t  *ebp;
        ml_unit_t       *ul;
        ml_odunit_t     *ud;
        void            *ud_buf;
        int             badlog;

        islog = islogok = 0;
        if (bflag != 0)
                return (1); /* can't roll log while alternate sb specified */

        /*
         * Roll the log, if any.  A bad sb implies we'll be using
         * an alternate sb as far as logging goes, so just fail back
         * to the caller if we can't read the default sb.  Suppress
         * complaints, because the caller will be reading the same
         * superblock again and running full verification on it, so
         * whatever is bad will be reported then.
         */
        sblock.fs_logbno = 0;
        badlog = 0;
        if (!read_super_block(0))
                return (1);

        /*
         * Roll the log in 3 cases:
         * 1. If it's unmounted (mount_point == NULL) and it's not marked
         *    as fully rolled (sblock.fs_rolled != FS_ALL_ROLLED)
         * 2. If it's mounted and anything other than a sanity
         *    check fsck (mflag) is being done, as we have the current
         *    super block. Note, only a sanity check is done for
         *    root/usr at boot. If a roll were done then the expensive
         *    ufs_flush() gets called, leading to a slower boot.
         * 3. If anything other then a sanity check (mflag) is being done
         *    to a mounted filesystem while it is in read-only state
         *    (e.g. root during early boot stages) we have to detect this
         *    and have to roll the log as well. NB. the read-only mount
         *    will flip fs_clean from FSLOG to FSSTABLE and marks the
         *    log as FS_NEED_ROLL.
         */
        if (sblock.fs_logbno &&
            (((mount_point == NULL) && (sblock.fs_rolled != FS_ALL_ROLLED)) ||
            ((mount_point != NULL) && !mflag))) {
                int roll_log_err = 0;

                if (sblock.fs_ronly && (sblock.fs_clean == FSSTABLE) &&
                        (sblock.fs_state + sblock.fs_time == FSOKAY)) {
                        /*
                         * roll the log without a mount
                         */
                        flush_fs();
                }
                if (sblock.fs_clean == FSLOG &&
                        (sblock.fs_state + sblock.fs_time == FSOKAY)) {
                        if (rl_roll_log(devstr) != RL_SUCCESS)
                                roll_log_err = 1;
                }
                if (roll_log_err) {
                        (void) printf("Can't roll the log for %s.\n", devstr);
                        /*
                         * There are two cases where we want to set
                         * an error code and return:
                         *  - We're preening
                         *  - We're not on a live root and the user
                         *    chose *not* to ignore the log
                         * Otherwise, we want to mark the log as bad
                         * and continue to check the filesystem.  This
                         * has the side effect of destroying the log.
                         */
                        if (preen || (!hotroot &&
                            reply(
                        "DISCARDING THE LOG MAY DISCARD PENDING TRANSACTIONS.\n"
                                        "DISCARD THE LOG AND CONTINUE") == 0)) {
                                exitstat = EXERRFATAL;
                                return (0);
                        }
                        ++badlog;
                }
        }

        /* Logging UFS may be enabled */
        if (sblock.fs_logbno) {
                ++islog;

                /* log is not okay; check the fs */
                if (FSOKAY != (sblock.fs_state + sblock.fs_time))
                        return (1);

                /*
                 * If logging or (stable and mounted) then continue
                 */
                if (!((sblock.fs_clean == FSLOG) ||
                    (sblock.fs_clean == FSSTABLE) && (mount_point != NULL)))
                        return (1);

                /* get the log allocation block */
                buf = malloc(dev_bsize);
                if (buf == NULL) {
                        return (1);
                }
                ud_buf = malloc(dev_bsize);
                if (ud_buf == NULL) {
                        free(buf);
                        return (1);
                }
                (void) fsck_bread(fsreadfd, buf,
                    logbtodb(&sblock, sblock.fs_logbno),
                    dev_bsize);
                ebp = (extent_block_t *)buf;

                /* log allocation block is not okay; check the fs */
                if (ebp->type != LUFS_EXTENTS) {
                        free(buf);
                        free(ud_buf);
                        return (1);
                }

                /* get the log state block(s) */
                if (fsck_bread(fsreadfd, ud_buf,
                    (logbtodb(&sblock, ebp->extents[0].pbno)),
                    dev_bsize)) {
                        (void) fsck_bread(fsreadfd, ud_buf,
                            (logbtodb(&sblock, ebp->extents[0].pbno)) + 1,
                            dev_bsize);
                }
                ud = (ml_odunit_t *)ud_buf;
                ul = (ml_unit_t *)malloc(sizeof (*ul));
                if (ul == NULL) {
                        free(buf);
                        free(ud_buf);
                        return (1);
                }
                ul->un_ondisk = *ud;

                /* log state is okay; don't need to check the fs */
                if ((ul->un_chksum == ul->un_head_ident + ul->un_tail_ident) &&
                    (ul->un_version == LUFS_VERSION_LATEST) &&
                    (ul->un_badlog == 0) && (!badlog)) {
                        ++islogok;
                }
                free(ud_buf);
                free(buf);
                free(ul);
        }

        return (1);
}

/*
 * - given a pathname, determine the pathname to actually check
 * - if a directory
 *   - if it is in mnttab, set devstr to the special (block) name
 *   - if it is in vfstab, set devstr to the special (block) name
 *   - if it has not been found, bail
 * - a file is used as-is, clear rflag
 * - a device is converted to block version (so can search mnttab)
 */
static void
derive_devstr(const caddr_t dev, caddr_t devstr, size_t str_size)
{
        mode_t mode;
        struct stat statb;

        if (stat(dev, &statb) < 0) {
                exitstat = EXNOSTAT;
                errexit("fsck: could not stat %s: %s", dev, strerror(errno));
        }

        mode = statb.st_mode & S_IFMT;
        switch (mode) {
        case S_IFDIR:
                /*
                 * The check_*() routines update devstr with the name.
                 */
                devstr[0] = '\0';
                if (!(check_mnttab(dev, devstr, str_size) ||
                    check_vfstab(dev, devstr, str_size))) {
                        exitstat = EXBADPARM;
                        errexit(
                    "fsck: could not find mountpoint %s in mnttab nor vfstab",
                            dev);
                }
                break;
        case S_IFREG:
                rflag = 0;
                (void) strlcpy(devstr, dev, str_size);
                break;
        case S_IFCHR:
        case S_IFBLK:
                (void) strlcpy(devstr, unrawname(dev), str_size);
                break;
        default:
                exitstat = EXBADPARM;
                errexit("fsck: %s must be a mountpoint, device, or file", dev);
                /* NOTREACHED */
        }
}

/*
 * Reports the index of the magic filesystem that mntp names.
 * If it does not correspond any of them, returns zero (hence
 * the backwards loop).
 */
static int
which_corefs(const caddr_t mntp)
{
        int corefs;

        for (corefs = MAGIC_LIMIT - 1; corefs > 0; corefs--)
                if (strcmp(mntp, magic_fs[corefs]) == 0)
                        break;

        return (corefs);
}

/*
 * - set mount_point to NULL
 * - if name is mounted (search mnttab)
 *   - if it is a device, clear rflag
 *   - if mounted on /, /usr, or /var, set corefs
 *   - if corefs and read-only, set hotroot and continue
 *   - if errorlocked, continue
 *   - if preening, bail
 *   - ask user whether to continue, bail if not
 * - if it is a device and not mounted and rflag, convert
 *   name to raw version
 */
static int
check_mount_state(caddr_t devstr, size_t str_size)
{
        int corefs = 0;
        int is_dev = 0;
        struct stat statb;

        if (stat(devstr, &statb) < 0) {
                exitstat = EXNOSTAT;
                errexit("fsck: could not stat %s: %s", devstr, strerror(errno));
        }
        if (S_ISCHR(statb.st_mode) || S_ISBLK(statb.st_mode))
                is_dev = 1;

        /*
         * mounted() will update mount_point when returning true.
         */
        mount_point = NULL;
        if ((mountedfs = mounted(devstr, devstr, str_size)) != M_NOMNT) {
                if (is_dev)
                        rflag = 0;
                corefs = which_corefs(mount_point);
                if (corefs && (mountedfs == M_RO)) {
                        hotroot++;
                } else if (errorlocked) {
                        goto carry_on;
                } else if (preen) {
                        exitstat = EXMOUNTED;
                        pfatal("%s IS CURRENTLY MOUNTED%s.",
                            devstr, mountedfs == M_RW ? " READ/WRITE" : "");
                } else {
                        if (!nflag && !mflag) {
                                pwarn("%s IS CURRENTLY MOUNTED READ/%s.",
                                    devstr, mountedfs == M_RW ? "WRITE" :
                                    "ONLY");
                                if (reply("CONTINUE") == 0) {
                                        exitstat = EXMOUNTED;
                                        errexit("Program terminated");
                                }
                        }
                }
        } else if (is_dev && rflag) {
                (void) strlcpy(devstr, rawname(devstr), str_size);
        }

carry_on:
        return (corefs);
}

static int
open_and_intro(caddr_t devstr, int corefs)
{
        int retval = 0;

        if ((fsreadfd = open64(devstr, O_RDONLY)) < 0) {
                (void) printf("Can't open %s: %s\n", devstr, strerror(errno));
                exitstat = EXNOSTAT;
                retval = -1;
                goto finish;
        }
        if (!preen || debug != 0)
                (void) printf("** %s", devstr);

        if (errorlocked) {
                if (debug && elock_combuf != NULL)
                        (void) printf(" error-lock comment: \"%s\" ",
                            elock_combuf);
                fflag = 1;
        }
        pid = getpid();
        if (nflag || (fswritefd = open64(devstr, O_WRONLY)) < 0) {
                fswritefd = -1;
                if (preen && !debug)
                        pfatal("(NO WRITE ACCESS)\n");
                (void) printf(" (NO WRITE)");
        }
        if (!preen)
                (void) printf("\n");
        else if (debug)
                (void) printf(" pid %d\n", pid);
        if (debug && (hotroot || (mountedfs != M_NOMNT))) {
                (void) printf("** %s", devstr);
                if (hotroot)
                        (void) printf(" is %s fs", magic_fs[corefs]);
                if (mountedfs != M_NOMNT)
                        (void) printf(" and is mounted read-%s",
                            (mountedfs == M_RO) ? "only" : "write");
                if (errorlocked)
                        (void) printf(" and is error-locked");

                (void) printf(".\n");
        }

finish:
        return (retval);
}

static int
find_superblock(caddr_t devstr)
{
        int cg = 0;
        int retval = 0;
        int first;
        int found;
        calcsb_t style;
        struct fs proto;

        /*
         * Check the superblock, looking for alternates if necessary.
         * In more-recent times, some UFS instances get created with
         * only the first ten and last ten superblock backups.  Since
         * if we can't get the necessary information from any of those,
         * the odds are also against us for the ones in between, we'll
         * just look at those twenty to save time.
         */
        if (!read_super_block(1) || !checksb(1)) {
                if (bflag || preen) {
                        retval = -1;
                        goto finish;
                }
                for (style = MKFS_STYLE; style < MAX_SB_STYLES; style++) {
                        if (reply("LOOK FOR ALTERNATE SUPERBLOCKS WITH %s",
                            calcsb_names[style]) == 0)
                                continue;
                        first = 1;
                        found = 0;
                        if (!calcsb(style, devstr, fsreadfd, &proto)) {
                                cg = proto.fs_ncg;
                                continue;
                        }
                        if (debug) {
                                (void) printf(
                            "debug: calcsb(%s) gave fpg %d, cgoffset %d, ",
                                    calcsb_names[style],
                                    proto.fs_fpg, proto.fs_cgoffset);
                                (void) printf("cgmask 0x%x, sblk %d, ncg %d\n",
                                    proto.fs_cgmask, proto.fs_sblkno,
                                    proto.fs_ncg);
                        }
                        for (cg = 0; cg < proto.fs_ncg; cg++) {
                                bflag = fsbtodb(&proto, cgsblock(&proto, cg));
                                if (debug)
                                        (void) printf(
                                            "debug: trying block %lld\n",
                                            (longlong_t)bflag);
                                if (read_super_block(0) && checksb(0)) {
                                        (void) printf(
                                    "FOUND ALTERNATE SUPERBLOCK %d WITH %s\n",
                                            bflag, calcsb_names[style]);
                                        if (reply(
                                            "USE ALTERNATE SUPERBLOCK") == 1) {
                                                found = 1;
                                                break;
                                        }
                                }
                                if (first && (cg >= 9)) {
                                        first = 0;
                                        if (proto.fs_ncg <= 9)
                                                cg = proto.fs_ncg;
                                        else if (proto.fs_ncg <= 19)
                                                cg = 9;
                                        else
                                                cg = proto.fs_ncg - 10;
                                }
                        }

                        if (found)
                                break;
                }

                /*
                 * Didn't find one?  Try to fake it.
                 */
                if (style >= MAX_SB_STYLES) {
                        pwarn("SEARCH FOR ALTERNATE SUPERBLOCKS FAILED.\n");
                        for (style = MKFS_STYLE; style < MAX_SB_STYLES;
                            style++) {
                                if (reply("USE GENERIC SUPERBLOCK FROM %s",
                                    calcsb_names[style]) == 1 &&
                                    calcsb(style, devstr, fsreadfd, &sblock)) {
                                        break;
                                }
                        }
                        /*
                         * We got something from mkfs/newfs, so use it.
                         */
                        if (style < MAX_SB_STYLES) {
                                proto.fs_ncg = sblock.fs_ncg;
                                bflag = 0;
                        }
                }

                /*
                 * Still no luck?  Tell the user they're on their own.
                 */
                if (style >= MAX_SB_STYLES) {
                        pwarn("SEARCH FOR ALTERNATE SUPERBLOCKS FAILED. "
                            "YOU MUST USE THE -o b OPTION\n"
                            "TO FSCK TO SPECIFY THE LOCATION OF A VALID "
                            "ALTERNATE SUPERBLOCK TO\n"
                            "SUPPLY NEEDED INFORMATION; SEE fsck(8).\n");
                        bflag = 0;
                        retval = -1;
                        goto finish;
                }

                /*
                 * Need to make sure a human really wants us to use
                 * this.  -y mode could've gotten us this far, so
                 * we need to ask something that has to be answered
                 * in the negative.
                 *
                 * Note that we can't get here when preening.
                 */
                if (!found) {
                        pwarn("CALCULATED GENERIC SUPERBLOCK WITH %s\n",
                            calcsb_names[style]);
                } else {
                        pwarn("FOUND ALTERNATE SUPERBLOCK AT %d USING %s\n",
                            bflag, calcsb_names[style]);
                }
                pwarn("If filesystem was created with manually-specified ");
                pwarn("geometry, using\nauto-discovered superblock may ");
                pwarn("result in irrecoverable damage to\nfilesystem and ");
                pwarn("user data.\n");
                if (reply("CANCEL FILESYSTEM CHECK") == 1) {
                        if (cg >= 0) {
                                pwarn("Please verify that the indicated block "
                                    "contains a proper\nsuperblock for the "
                                    "filesystem (see fsdb(8)).\n");
                                if (yflag)
                                        pwarn("\nFSCK was running in YES "
                                            "mode.  If you wish to run in "
                                            "that mode using\nthe alternate "
                                            "superblock, run "
                                            "`fsck -y -o b=%d %s'.\n",
                                            bflag, devstr);
                        }
                        retval = -1;
                        goto finish;
                }

                /*
                 * Pretend we found it as an alternate, so everything
                 * gets updated when we clean up at the end.
                 */
                if (!found) {
                        havesb = 1;
                        sblk.b_bno = fsbtodb(&sblock, cgsblock(&sblock, 0));
                        bwrite(fswritefd, (caddr_t)&sblock, SBLOCK, SBSIZE);
                        write_altsb(fswritefd);
                }
        }

finish:
        return (retval);
}

/*
 * Check and potentially fix certain fields in the super block.
 */
static void
fixup_superblock(void)
{
        /*
         * Kernel looks for FS_OPTTIME, and assumes that if that's not
         * what's there, it must be FS_OPTSPACE, so not fixing does not
         * require setting iscorrupt.
         */
        if (sblock.fs_optim != FS_OPTTIME && sblock.fs_optim != FS_OPTSPACE) {
                pfatal("UNDEFINED OPTIMIZATION IN SUPERBLOCK");
                if (reply("SET TO DEFAULT") == 1) {
                        sblock.fs_optim = FS_OPTTIME;
                        sbdirty();
                }
        }
        if ((sblock.fs_minfree < 0 || sblock.fs_minfree > 99)) {
                pfatal("IMPOSSIBLE MINFREE=%d IN SUPERBLOCK",
                    sblock.fs_minfree);
                if (reply("SET TO DEFAULT") == 1) {
                        sblock.fs_minfree = 10;
                        sbdirty();
                } else if (sblock.fs_minfree < 0) {
                        /*
                         * Kernel uses minfree without verification,
                         * and a negative value would do bad things.
                         */
                        iscorrupt = 1;
                }
        }
}

static int
initial_error_state_adjust(void)
{
        int retval = 0;

        /* do this right away to prevent any other fscks on this fs */
        switch (sblock.fs_clean) {
        case FSBAD:
                break;
        case FSFIX:
                if (preen)
                        errexit("ERROR-LOCKED; MARKED \"FSFIX\"\n");
                if (reply("marked FSFIX, CONTINUE") == 0) {
                        retval = -1;
                        goto finish;
                }
                break;
        case FSCLEAN:
                if (preen)
                        errexit("ERROR-LOCKED; MARKED \"FSCLEAN\"\n");
                if (reply("marked FSCLEAN, CONTINUE") == 0) {
                        retval = -1;
                        goto finish;
                }
                break;
        default:
                if (preen) {
                        if (debug)
                                pwarn("ERRORLOCKED; NOT MARKED \"FSBAD\"\n");
                        else
                                errexit("ERRORLOCKED; NOT MARKED \"FSBAD\"\n");
                } else {
                        (void) printf("error-locked but not marked \"FSBAD\";");
                        if (reply(" CONTINUE") == 0) {
                                retval = -1;
                                goto finish;
                        }
                }
                break;
        }

        if (!do_errorlock(LOCKFS_ELOCK)) {
                if (preen) {
                        retval = -1;
                        goto finish;
                }
                if (reply("error-lock reset failed; CONTINUE") == 0) {
                        retval = -1;
                        goto finish;
                }
        }

        sblock.fs_state = FSOKAY - (long)sblock.fs_time;
        sblock.fs_clean = FSFIX;
        sbdirty();
        write_altsb(fswritefd);

finish:
        return (retval);
}

static void
getsummaryinfo(void)
{
        size_t size;
        int failed;
        int asked;
        int i, j;
        caddr_t sip;

        /*
         * read in the summary info.
         */
        sblock.fs_u.fs_csp = calloc(1, sblock.fs_cssize);
        if (sblock.fs_u.fs_csp == NULL)
                errexit(
            "cannot allocate %u bytes for cylinder group summary info\n",
                    (unsigned)sblock.fs_cssize);
        sip = (caddr_t)sblock.fs_u.fs_csp;
        asked = 0;
        for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) {
                size = sblock.fs_cssize - i < sblock.fs_bsize ?
                        sblock.fs_cssize - i : sblock.fs_bsize;
                failed = fsck_bread(fsreadfd, sip,
                    fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag),
                    size);
                if (failed && !asked) {
                        pfatal("BAD SUMMARY INFORMATION");
                        if (reply("CONTINUE") == 0) {
                                ckfini();
                                exit(EXFNDERRS);
                        }
                        asked = 1;
                }
                sip += size;
        }
}

/*
 * Reverses the effects of getsummaryinfo().
 */
static void
ungetsummaryinfo(void)
{
        if ((sblk.b_un.b_fs != NULL) &&
            (sblk.b_un.b_fs->fs_u.fs_csp != NULL)) {
                free(sblk.b_un.b_fs->fs_u.fs_csp);
                sblk.b_un.b_fs->fs_u.fs_csp = NULL;
        }
}

/*
 * Allocate and initialize the global tables.
 * It is the responsibility of the caller to clean up and allocations
 * if an error is returned.
 */
static int
create_and_init_maps(void)
{
        int64_t bmapsize;
        int retval = 0;

        maxfsblock = sblock.fs_size;
        maxino = sblock.fs_ncg * sblock.fs_ipg;

        bmapsize = roundup(howmany((uint64_t)maxfsblock, NBBY),
            sizeof (short));
        blockmap = calloc((size_t)bmapsize, sizeof (char));
        if (blockmap == NULL) {
                (void) printf("cannot alloc %lld bytes for blockmap\n",
                    (longlong_t)bmapsize);
                retval = -1;
                goto finish;
        }
        statemap = calloc((size_t)(maxino + 1), sizeof (*statemap));
        if (statemap == NULL) {
                (void) printf("cannot alloc %lld bytes for statemap\n",
                    (longlong_t)(maxino + 1) * sizeof (*statemap));
                retval = -1;
                goto finish;
        }
        lncntp = (short *)calloc((size_t)(maxino + 1), sizeof (short));
        if (lncntp == NULL) {
                (void) printf("cannot alloc %lld bytes for lncntp\n",
                    (longlong_t)(maxino + 1) * sizeof (short));
                retval = -1;
                goto finish;
        }

        /*
         * If we had to fake up a superblock, it won't show that there
         * are any directories at all.  This causes problems when we
         * use numdirs to calculate hash keys, so use something at least
         * vaguely plausible.
         */
        numdirs = sblock.fs_cstotal.cs_ndir;
        if (numdirs == 0)
                numdirs = sblock.fs_ipg * sblock.fs_ncg / 2;
        listmax = numdirs + 10;
        inpsort = (struct inoinfo **)calloc((unsigned)listmax,
            sizeof (struct inoinfo *));
        inphead = (struct inoinfo **)calloc((unsigned)numdirs,
            sizeof (struct inoinfo *));
        if (inpsort == NULL || inphead == NULL) {
                (void) printf("cannot alloc %lld bytes for inphead\n",
                    (longlong_t)numdirs * sizeof (struct inoinfo *));
                retval = -1;
                goto finish;
        }
        if (debug) {
                if (listmax > ULONG_MAX)
                        errexit("create_and_init_maps: listmax overflowed\n");
                if (numdirs > ULONG_MAX)
                        errexit("create_and_init_maps: numdirs overflowed\n");
        }

        numacls = numdirs;
        aclmax = numdirs + 10;
        aclpsort = (struct inoinfo **)calloc((unsigned)aclmax,
            sizeof (struct inoinfo *));
        aclphead = (struct inoinfo **)calloc((unsigned)numacls,
            sizeof (struct inoinfo *));
        if (aclpsort == NULL || aclphead == NULL) {
                (void) printf("cannot alloc %lld bytes for aclphead\n",
                    (longlong_t)numacls * sizeof (struct inoinfo *));
                retval = -1;
                goto finish;
        }
        if (debug) {
                if (aclmax > ULONG_MAX)
                        errexit("create_and_init_maps: aclmax overflowed\n");
                if (numacls > ULONG_MAX)
                        errexit("create_and_init_maps: numacls overflowed\n");
        }
        aclplast = 0L;
        inplast = 0L;

finish:
        return (retval);
}

caddr_t
setup(caddr_t dev)
{
        int corefs;
        static char devstr[MAXPATHLEN + 1];

        havesb = 0;
        devname = devstr;

        derive_devstr(dev, devstr, sizeof (devstr));
        errorlocked = is_errorlocked(devstr);
        corefs = check_mount_state(devstr, sizeof (devstr));

        sblock_init();

        if (open_and_intro(devstr, corefs) == -1)
                goto cleanup;

        if (mflag && mounted(devstr, devstr,
            sizeof (devstr)) == M_RW)
                return (devstr);

        /*
         * Check log state
         */
        if (!logsetup(devstr))
                goto cleanup;

        /*
         * Flush fs if we're going to do anything other than a sanity check.
         * Note, if logging then the fs was already flushed in logsetup().
         */
        if (!islog && !mflag)
                flush_fs();

        if (find_superblock(devstr) == -1)
                goto cleanup;

        fixup_superblock();

        if (errorlocked &&
            (initial_error_state_adjust() == -1))
                goto cleanup;

        /*
         * asblk could be dirty because we found a mismatch between
         * the primary superblock and one of its backups in checksb().
         */
        if (asblk.b_dirty && !bflag) {
                (void) memmove(&altsblock, &sblock, (size_t)sblock.fs_sbsize);
                flush(fswritefd, &asblk);
        }

        getsummaryinfo();

        /*
         * if not error-locked, using the standard superblock,
         *   not bad log, not forced, preening, and is clean;
         *   stop checking
         */
        if (!errorlocked && (bflag == 0) &&
            ((!islog || islogok) &&
            (fflag == 0) && preen &&
            (FSOKAY == (sblock.fs_state + sblock.fs_time)) &&
            ((sblock.fs_clean == FSLOG && islog) ||
            ((sblock.fs_clean == FSCLEAN) || (sblock.fs_clean == FSSTABLE))))) {
                iscorrupt = 0;
                printclean();
                goto cleanup;
        }

        if (create_and_init_maps() == -1)
                goto nomaps;

        bufinit();
        return (devstr);

nomaps:
        ckfini();
        exitstat = EXERRFATAL;
        /* FALLTHROUGH */

cleanup:
        unbufinit();
        uncreate_maps();
        ungetsummaryinfo();

        /*
         * Can't get rid of the superblock buffer, because our
         * caller references it to generate the summary statistics.
         */

        return (NULL);
}

/*
 * Undoes the allocations in create_and_init_maps()
 */
static void
uncreate_maps(void)
{
        /*
         * No ordering dependency amongst these, so they are here in
         * the same order they were calculated.
         */
        if (blockmap != NULL)
                free(blockmap);
        if (statemap != NULL)
                free(statemap);
        if (lncntp != NULL)
                free(lncntp);
        if (inpsort != NULL)
                free(inpsort);
        if (inphead != NULL)
                free(inphead);
        if (aclpsort != NULL)
                free(aclpsort);
        if (aclphead != NULL)
                free(aclphead);
}

/*
 *  mkfs limits the size of the inode map to be no more than a third of
 *  the cylinder group space.  We'll use that value for sanity checking
 *  the superblock's inode per group value.
 */
#define MAXIpG  (roundup(sblock.fs_bsize * NBBY / 3, sblock.fs_inopb))

/*
 * Check the super block and its summary info.
 */
static int
checksb(int listerr)
{
        caddr_t err;

        /*
         * When the fs check is successfully completed, the alternate super
         * block at sblk.b_bno will be overwritten by ckfini() with the
         * repaired super block.
         */
        sblk.b_bno = bflag ? bflag : (SBOFF / dev_bsize);
        sblk.b_size = SBSIZE;

        /*
         * Sanity-check some of the values we are going to use later
         * in allocation requests.
         */
        if (sblock.fs_cstotal.cs_ndir < 1 ||
            sblock.fs_cstotal.cs_ndir > sblock.fs_ncg * sblock.fs_ipg) {
                if (verbose)
                        (void) printf(
            "Found %d directories, should be between 1 and %d inclusive.\n",
                            sblock.fs_cstotal.cs_ndir,
                            sblock.fs_ncg * sblock.fs_ipg);
                err = "NUMBER OF DIRECTORIES OUT OF RANGE";
                goto failedsb;
        }

        if (sblock.fs_nrpos <= 0 || sblock.fs_postbloff < 0 ||
            sblock.fs_cpc < 0 ||
            (sblock.fs_postbloff +
            (sblock.fs_nrpos * sblock.fs_cpc * sizeof (short))) >
            sblock.fs_sbsize) {
                err = "ROTATIONAL POSITION TABLE SIZE OUT OF RANGE";
                goto failedsb;
        }

        if (sblock.fs_cssize !=
            fragroundup(&sblock, sblock.fs_ncg * sizeof (struct csum))) {
                err = "SIZE OF CYLINDER GROUP SUMMARY AREA WRONG";
                goto failedsb;
        }

        if (sblock.fs_inopb != (sblock.fs_bsize / sizeof (struct dinode))) {
                err = "INOPB NONSENSICAL RELATIVE TO BSIZE";
                goto failedsb;
        }

        if (sblock.fs_bsize > MAXBSIZE) {
                err = "BLOCK SIZE LARGER THAN MAXIMUM SUPPORTED";
                goto failedsb;
        }

        if (sblock.fs_bsize != (sblock.fs_frag * sblock.fs_fsize)) {
                err = "FRAGS PER BLOCK OR FRAG SIZE WRONG";
                goto failedsb;
        }

        if (sblock.fs_dsize >= sblock.fs_size) {
                err = "NUMBER OF DATA BLOCKS OUT OF RANGE";
                goto failedsb;
        }

#if 0
        if (sblock.fs_size >
            (sblock.fs_nsect * sblock.fs_ntrak * sblock.fs_ncyl)) {
                err = "FILESYSTEM SIZE LARGER THAN DEVICE";
                goto failedsb;
        }
#endif

        /*
         *  Check that the number of inodes per group isn't less than or
         *  equal to zero.  Also makes sure it isn't more than the
         *  maximum number mkfs enforces.
         */
        if (sblock.fs_ipg <= 0 || sblock.fs_ipg > MAXIpG) {
                err = "INODES PER GROUP OUT OF RANGE";
                goto failedsb;
        }

        if (sblock.fs_cgsize > sblock.fs_bsize) {
                err = "CG HEADER LARGER THAN ONE BLOCK";
                goto failedsb;
        }

        /*
         * Set all possible fields that could differ, then do check
         * of whole super block against an alternate super block.
         * When an alternate super-block is specified this check is skipped.
         */
        (void) getblk(&asblk, cgsblock(&sblock, sblock.fs_ncg - 1),
            (size_t)sblock.fs_sbsize);
        if (asblk.b_errs != 0) {
                brelse(&asblk);
                return (0);
        }
        if (bflag != 0) {
                /*
                 * Invalidate clean flag and state information.
                 * Note that we couldn't return until after the
                 * above getblk(), because we're going to want to
                 * update asblk when everything's done.
                 */
                sblock.fs_clean = FSACTIVE;
                sblock.fs_state = (long)sblock.fs_time;
                sblock.fs_reclaim = 0;
                sbdirty();
                havesb = 1;
                return (1);
        }
        altsblock.fs_link = sblock.fs_link;
        altsblock.fs_rolled = sblock.fs_rolled;
        altsblock.fs_time = sblock.fs_time;
        altsblock.fs_state = sblock.fs_state;
        altsblock.fs_cstotal = sblock.fs_cstotal;
        altsblock.fs_cgrotor = sblock.fs_cgrotor;
        altsblock.fs_fmod = sblock.fs_fmod;
        altsblock.fs_clean = sblock.fs_clean;
        altsblock.fs_ronly = sblock.fs_ronly;
        altsblock.fs_flags = sblock.fs_flags;
        altsblock.fs_maxcontig = sblock.fs_maxcontig;
        altsblock.fs_minfree = sblock.fs_minfree;
        altsblock.fs_optim = sblock.fs_optim;
        altsblock.fs_rotdelay = sblock.fs_rotdelay;
        altsblock.fs_maxbpg = sblock.fs_maxbpg;
        altsblock.fs_logbno = sblock.fs_logbno;
        altsblock.fs_reclaim = sblock.fs_reclaim;
        altsblock.fs_si = sblock.fs_si;
        (void) memmove((void *)altsblock.fs_fsmnt, (void *)sblock.fs_fsmnt,
            sizeof (sblock.fs_fsmnt));
        /*
         * The following should not have to be copied.
         */
        (void) memmove((void *)altsblock.fs_u.fs_csp_pad,
            (void *)sblock.fs_u.fs_csp_pad, sizeof (sblock.fs_u.fs_csp_pad));
        altsblock.fs_fsbtodb = sblock.fs_fsbtodb;
        altsblock.fs_npsect = sblock.fs_npsect;
        altsblock.fs_nrpos = sblock.fs_nrpos;
        if (memcmp((void *)&sblock, (void *)&altsblock,
            (size_t)sblock.fs_sbsize) != 0) {
                err = "BAD VALUES IN SUPER BLOCK";
                goto failedsb;
        }
        havesb = 1;
        return (1);

failedsb:
        badsb(listerr, err);
        return (0);
}

static void
badsb(int listerr, caddr_t s)
{
        if (!listerr)
                return;
        if (preen)
                (void) printf("%s: ", devname);
        (void) printf("BAD SUPERBLOCK AT BLOCK %d: %s\n",
            bflag != 0 ? bflag : SBLOCK, s);
        if (preen) {
                pwarn(
            "USE AN ALTERNATE SUPERBLOCK TO SUPPLY NEEDED INFORMATION;\n");
                pwarn("e.g. fsck [-F ufs] -o b=# [special ...] \n");
                exitstat = EXERRFATAL;
                pfatal(
            "where # is the alternate super block. SEE fsck_ufs(8). \n");
        }
        /* we're expected to return if not preening */
}

/*
 * Write out the super block into each of the alternate super blocks.
 */
void
write_altsb(int fd)
{
        int cylno;

        for (cylno = 0; cylno < sblock.fs_ncg; cylno++)
                bwrite(fd, (caddr_t)&sblock, fsbtodb(&sblock,
                    cgsblock(&sblock, cylno)), sblock.fs_sbsize);
}

static void
sblock_init(void)
{
        fsmodified = 0;
        if (errorlocked)
                isdirty = 1;
        lfdir = 0;
        initbarea(&sblk);
        initbarea(&asblk);

        /*
         * May have buffer left over from previous filesystem check.
         */
        if (sblk.b_un.b_buf == NULL)
                sblk.b_un.b_buf = calloc(1, SBSIZE);
        if (asblk.b_un.b_buf == NULL)
                asblk.b_un.b_buf = calloc(1, SBSIZE);
        if (sblk.b_un.b_buf == NULL || asblk.b_un.b_buf == NULL)
                errexit("cannot allocate space for superblock\n");
        /*
         * Could get the actual sector size from the device here,
         * but considering how much would need to change in the rest
         * of the system before it'd be a problem for us, it's not
         * worth worrying about right now.
         */
        dev_bsize = secsize = DEV_BSIZE;
}

/*
 * Calculate a prototype superblock based on information in the disk label.
 * When done the cgsblock macro can be calculated and the fs_ncg field
 * can be used. Do NOT attempt to use other macros without verifying that
 * their needed information is available!
 *
 * In BSD, the disk label includes all sorts of useful information,
 * like cpg.  Solaris doesn't have that, and deriving it (as well as
 * some other parameters) is difficult.  Rather than duplicate the
 * code, just ask mkfs what it would've come up with by default.
 * Ideally, we'd just link in the code, but given the source base
 * involved, it's more practical to just get a binary dump.
 *
 * The one minor drawback to the above approach is that newfs and mkfs
 * will produce vastly different layouts for the same partition if
 * they're allowed to default everything.  So, if the superblock that
 * mkfs gives us doesn't work for guessing where the alternates are,
 * we need to try newfs.
 */
static int
calcsb(calcsb_t style, caddr_t dev, int devfd, struct fs *fs)
{
#define FROM_CHILD      0
#define TO_FSCK         1
#define CMD_IDX         0
#define DEV_IDX         3
#define SIZE_IDX        4

        int child_pipe[2];
        caddr_t mkfsline[] = {
                "",             /* CMD_IDX */
                "-o",
                "calcbinsb,N",
                NULL,           /* DEV_IDX */
                NULL,           /* SIZE_IDX */
                NULL
        };
        caddr_t newfsline[] = {
                "",             /* CMD_IDX */
                "-B",
                "-N",
                NULL,           /* DEV_IDX */
                NULL
        };
        int pending, transferred;
        caddr_t *cmdline;
        caddr_t target;
        caddr_t sizestr = NULL;
        caddr_t path_old, path_new, mkfs_dir, mkfs_path, newfs_path;
        caddr_t slash;
        diskaddr_t size;
        int devnull;

        switch (style) {
        case MKFS_STYLE:
                if (debug)
                        (void) printf("calcsb() going with style MKFS\n");
                cmdline = mkfsline;
                break;
        case NEWFS_STYLE:
                if (debug)
                        (void) printf("calcsb() going with style NEWFS\n");
                cmdline = newfsline;
                break;
        default:
                if (debug)
                        (void) printf("calcsb() doesn't undestand style %d\n",
                            style);
                return (0);
        }

        cmdline[DEV_IDX] = dev;

        /*
         * Normally, only use the stock versions of the utilities.
         * However, if we're debugging, the odds are that we're
         * using experimental versions of them as well, so allow
         * some flexibility.
         */
        mkfs_path = getenv("MKFS_PATH");
        if (!debug || (mkfs_path == NULL))
                mkfs_path = MKFS_PATH;

        newfs_path = getenv("NEWFS_PATH");
        if (!debug || (newfs_path == NULL))
                newfs_path = NEWFS_PATH;

        if (style == MKFS_STYLE) {
                cmdline[CMD_IDX] = mkfs_path;

                size = getdisksize(dev, devfd);
                if (size == 0)
                        return (0);

                (void) fsck_asprintf(&sizestr, "%lld", (longlong_t)size);
                cmdline[SIZE_IDX] = sizestr;
        } else if (style == NEWFS_STYLE) {
                /*
                 * Make sure that newfs will find the right version of mkfs.
                 */
                cmdline[CMD_IDX] = newfs_path;
                path_old = getenv("PATH");
                /* mkfs_path is always initialized, despite lint's concerns */
                mkfs_dir = strdup(mkfs_path);
                if (mkfs_dir == NULL)
                        return (0);
                /*
                 * If no location data for mkfs, don't need to do
                 * anything about PATH.
                 */
                slash = strrchr(mkfs_dir, '/');
                if (slash != NULL) {
                        /*
                         * Just want the dir, so discard the executable name.
                         */
                        *slash = '\0';

                        /*
                         * newfs uses system() to find mkfs, so make sure
                         * that the one we want to use is first on the
                         * list.  Don't free path_new upon success, as it
                         * has become part of the environment.
                         */
                        (void) fsck_asprintf(&path_new, "PATH=%s:%s",
                            mkfs_dir, path_old);
                        if (putenv(path_new) != 0) {
                                free(mkfs_dir);
                                free(path_new);
                                return (0);
                        }
                }
                free(mkfs_dir);
        } else {
                /*
                 * Bad search style, quietly return failure.
                 */
                if (debug) {
                        (void) printf("calcsb: got bad style number %d\n",
                            (int)style);
                }
                return (0);
        }

        if (pipe(child_pipe) < 0) {
                pfatal("calcsb: could not create pipe: %s\n", strerror(errno));
                if (sizestr != NULL)
                        free(sizestr);
                return (0);
        }

        switch (fork()) {
        case -1:
                pfatal("calcsb: fork failed: %s\n", strerror(errno));
                if (sizestr != NULL)
                        free(sizestr);
                return (0);
        case 0:
                if (dup2(child_pipe[TO_FSCK], fileno(stdout)) < 0) {
                        (void) printf(
                            "calcsb: could not rename file descriptor: %s\n",
                            strerror(errno));
                        exit(EXBADPARM);
                }
                devnull = open("/dev/null", O_WRONLY);
                if (devnull == -1) {
                        (void) printf("calcsb: could not open /dev/null: %s\n",
                            strerror(errno));
                        exit(EXBADPARM);
                }
                if (dup2(devnull, fileno(stderr)) < 0) {
                        (void) printf(
                            "calcsb: could not rename file descriptor: %s\n",
                            strerror(errno));
                        exit(EXBADPARM);
                }
                (void) close(child_pipe[FROM_CHILD]);
                (void) execv(cmdline[CMD_IDX], cmdline);
                (void) printf("calcsb: could not exec %s: %s\n",
                    cmdline[CMD_IDX], strerror(errno));
                exit(EXBADPARM);
                /* NOTREACHED */
        default:
                break;
        }

        (void) close(child_pipe[TO_FSCK]);
        if (sizestr != NULL)
                free(sizestr);

        pending = sizeof (struct fs);
        target = (caddr_t)fs;
        do {
                transferred = read(child_pipe[FROM_CHILD], target, pending);
                pending -= transferred;
                target += transferred;
        } while ((pending > 0) && (transferred > 0));

        if (pending > 0) {
                if (transferred < 0)
                        pfatal(
                    "calcsb: binary read of superblock from %s failed: %s\n",
                            (style == MKFS_STYLE) ? "mkfs" : "newfs",
                            (transferred < 0) ? strerror(errno) : "");
                else
                        pfatal(
                    "calcsb: short read of superblock from %s\n",
                            (style == MKFS_STYLE) ? "mkfs" : "newfs");
                return (0);
        }

        (void) close(child_pipe[FROM_CHILD]);
        (void) wait(NULL);

        if ((fs->fs_magic != FS_MAGIC) &&
            (fs->fs_magic != MTB_UFS_MAGIC))
                return (0);

        return (1);
}