root/usr/src/cmd/filesync/anal.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License, Version 1.0 only
 * (the "License").  You may not use this file except in compliance
 * with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright (c) 1995 Sun Microsystems, Inc.  All Rights Reserved
 *
 * module:
 *      anal.c
 *
 * purpose:
 *      routines to analyze the file trees and figure out what has changed
 *      and queue files for reconciliation.  It also contains tree enumeration
 *      routines to for other purposes (pruning and link location).
 *
 * contents:
 *
 *  change analysis:
 *      analyze .... (top level) analyze all files in the tree for changes
 *      summary .... print out change/reconciliation statistics for each base
 *      check_file . (static) look for changes and queue file for reconciliation
 *      check_changes (static) figure out if a particular file has changed
 *      queue_file . (static) add a file to the reconciliation list
 *
 *  other tree enumeration functions:
 *      prune_file . (static) recursive descent and actual pruning
 *      prune ...... (top level) initiate pruning analysis for nonexistant files
 *      find_link .. look for other files to which a file may be a link
 *      link_update. propagate changed stat info to all other links
 *      same_name .. (static) figure out if two nodes describe same file
 *
 *  misc:
 *      push_name .. maintain a running full pathname as we descend
 *      pop_name ... maintain a running full pathname as we pop back
 *      get_name ... return full pathname for the current file
 *
 * notes:
 *      analysis is limited to files that were evaluated in the previous
 *      pass ... since we don't have complete information about files that
 *      were not evaluated in the previous pass.
 */

#include <stdio.h>
#include <stdlib.h>
#include <strings.h>

#include "messages.h"
#include "filesync.h"
#include "database.h"
#include "debug.h"

/*
 * routines:
 */
void push_name(const char *);
void pop_name();
char *get_name(struct file *);
static errmask_t check_file(struct file *fp);
static diffmask_t check_changes(struct file *fp, int first, int second);
static int prune_file(struct file *fp);
static void queue_file(struct file *fp);

/*
 * globals
 */
static struct file *changes;    /* list of files to be reconciled       */

static long total_files;        /* total number of files being considered  */
static long est_deletes;        /* estimated number of files to be deleted */
static long est_rmdirs;         /* est rmdirs of non-empty directories     */

int inum_changes;               /* LISTed directories whose I#s changed    */

/*
 * routine:
 *      analyze
 *
 * purpose:
 *      top level routine for the analysis/reconciliation process
 *
 * parameters:
 *      none
 *
 * returns:
 *      error mask
 *
 * notes:
 *      a critical side effect of this routine is the creation of
 *      the reconciliation list, an ordered list of files that
 *      needed to be processed in the subsequent reconciliation pass
 */
errmask_t
analyze()
{       struct base *bp;
        struct file *fp;
        int errs = 0;
        int err;
        int percentage;
        bool_t aborted = FALSE;
        char msgbuf[MAX_LINE];

        /*
         * run through all bases and directories looking for files
         * that have been renamed.  This must be done before the
         * difference analysis because a directory rename can introduce
         * radical restructuring into a name-based tree.
         */
        for (bp = bases; bp; bp = bp->b_next) {
                for (fp = bp->b_files; fp; fp = fp->f_next)
                        if (fp->f_flags & F_EVALUATE)
                                errs |= find_renames(fp);
        }

        /*
         * run through all bases and files looking for candidates
         * note, however that we only descend into trees that have
         * the evaluate flag turned on.  As a result of new rules or
         * restriction arguments, we may be deliberatly ignoring
         * large amounts of the baseline.   This means we won't do
         * any stats to update the information in those nodes, and
         * they will be written back just as they were.
         *
         * note that there is code to prune out baseline nodes for
         * files that no longer exist, but that code is in reconcile
         * and will never get a chance to run on nodes that aren't
         * analyzed.
         *
         * we also want to run though all nodes with STAT errors
         * so that we can put them on the reconciliation list.
         */
        for (bp = bases; bp; bp = bp->b_next) {
                for (fp = bp->b_files; fp; fp = fp->f_next)
                        if (fp->f_flags & (F_EVALUATE|F_STAT_ERROR))
                                errs |= check_file(fp);
        }

        /*
         * my greatest fear is that someday, somehow, by messing with
         * variables or baselines or who-knows-what, that someone will
         * run a reconciliation against a large tree that doesn't correspond
         * to the baseline, and I will infer that a bazillion files have
         * been deleted and will propagate the slaughter before anyone
         * can say somebody stop that maniac.
         *
         * in order to prevent such a possibility, we have a few different
         * sanity checks.  There is, of course, a tradeoff here between
         * danger and irritation.  The current set of heuristics for whether
         * or not to generate a warning are (any of)
         *
         *      at least CONFIRM_MIN files have been deleted AND
         *      CONFIRM_PCT of all files have been deleted
         *
         *      the inode number on a LISTed directory has changed
         *
         *      a non-empty directory has been deleted.
         */
        msgbuf[0] = 0;

        percentage = (est_deletes * 100) / (total_files ? total_files : 1);
        if (est_deletes >= CONFIRM_MIN && percentage >= CONFIRM_PCT)
                sprintf(msgbuf, gettext(WARN_deletes), est_deletes);
        else if (inum_changes > 0)
                sprintf(msgbuf, gettext(WARN_ichange), inum_changes);
        else if (est_rmdirs)
                sprintf(msgbuf, gettext(WARN_rmdirs), est_rmdirs);

        if (msgbuf[0])
                confirm(msgbuf);

        /*
         * TRICK:
         *      the change list contains both files that have changed
         *      (and probably warrant reconciliation) and files that
         *      we couldn't get up-to-date stat information on.  The
         *      latter files should just be flagged as being in conflict
         *      so they can be reported in the summary.  The same is
         *      true of all subsequent files if we abort reconciliation.
         */
        for (fp = changes; fp; fp = fp->f_rnext)
                if (aborted || (fp->f_flags & F_STAT_ERROR)) {
                        fp->f_flags |= F_CONFLICT;
                        /* if it isn't in the baseline yet, don't add it */
                        if ((fp->f_flags & F_IN_BASELINE) == 0)
                                fp->f_flags |= F_REMOVE;
                        fp->f_problem = aborted ? PROB_aborted : PROB_restat;
                        (fp->f_base)->b_unresolved++;
                        errs |= ERR_UNRESOLVED;
                        if (opt_verbose)
                                fprintf(stdout,
                                        gettext(aborted ? V_suppressed
                                                        : V_nostat),
                                        fp->f_fullname);
                } else {
                        err = reconcile(fp);
                        errs |= err;
                        if (opt_halt && (err & ERR_ABORT)) {
                                fprintf(stderr, gettext(ERR_abort_h));
                                aborted = TRUE;
                        }
                }

        return (errs);
}

/*
 * routine:
 *      prune_file
 *
 * purpose:
 *      to look for file entries that should be pruned from baseline
 *      prune the current file if it needs pruning, and recursively
 *      descend if it is a directory.
 *
 * parameters:
 *      pointer to file node
 */
static int
prune_file(struct file *fp)
{       struct file *cp;
        int prunes = 0;

        /* if node hasn't been evaluated, mark it for removal   */
        if ((fp->f_flags & (F_EVALUATE|F_STAT_ERROR)) == 0) {
                fp->f_flags |= F_REMOVE;
                prunes++;
                if (opt_debug & DBG_ANAL)
                        fprintf(stderr, "ANAL: PRUNE %s\n", fp->f_name);
        }

        /* now check our children                               */
        for (cp = fp->f_files; cp; cp = cp->f_next)
                prunes += prune_file(cp);

        return (prunes);
}

/*
 * routine:
 *      prune
 *
 * purpose:
 *      to prune the baseline of entries that no longer correspond to
 *      existing rules.
 *
 * notes:
 *      This routine just calls prune_file on the top of each base tree.
 */
int
prune()
{       struct base *bp;
        struct file *fp;
        int prunes = 0;

        for (bp = bases; bp; bp = bp->b_next) {
                for (fp = bp->b_files; fp; fp = fp->f_next)
                        prunes += prune_file(fp);

                if ((bp->b_flags & F_EVALUATE) == 0)
                        bp->b_flags |= F_REMOVE;
        }

        return (prunes);
}

/*
 * routine:
 *      summary
 *
 * purpose:
 *      to print out statics and conflict lists
 */
void
summary()
{       struct base *bp;
        struct file *fp;
        extern bool_t need_super;

        (void) fflush(stdout);

        for (bp = bases; bp; bp = bp->b_next) {

                /* see if this base was irrelevant      */
                if ((bp->b_flags & F_EVALUATE) == 0)
                        continue;

                /* print out a summary for this base    */
                fprintf(stderr, gettext(SUM_hd),
                        bp->b_src_spec, bp->b_dst_spec, bp->b_totfiles);
                fprintf(stderr, gettext(SUM_dst),
                        bp->b_dst_copies, bp->b_dst_deletes, bp->b_dst_misc);
                fprintf(stderr, gettext(SUM_src),
                        bp->b_src_copies, bp->b_src_deletes, bp->b_src_misc);
                if (bp->b_unresolved)
                        fprintf(stderr, gettext(SUM_unresolved),
                                bp->b_unresolved);


                /* print out a list of unreconciled files for this base */
                for (fp = changes; fp; fp = fp->f_rnext) {
                        if (fp->f_base != bp)
                                continue;
                        if ((fp->f_flags & F_CONFLICT) == 0)
                                continue;
                        fprintf(stderr, "\t\t%s (%s)\n", fp->f_fullname,
                                fp->f_problem ? fp->f_problem : "???");
                }

                fprintf(stderr, "\n");
        }

        if (need_super)
                fprintf(stderr, gettext(WARN_super));
}

/*
 * routine:
 *      check_file
 *
 * purpose:
 *      figure out if a file requires reconciliation and recursively
 *      descend into all sub-files and directories
 *
 * parameters:
 *      base pointer
 *      file pointer
 *
 * returns:
 *      error mask
 *      built up changes needed list
 *      updated statistics
 *
 * notes:
 *      this routine builds up a path name as it descends through
 *      the tree (see push_name, pop_name, get_name).
 */
static errmask_t
check_file(struct file *fp)
{       struct file *cp;
        int errs = 0;

        if ((fp->f_flags & F_STAT_ERROR) == 0) {
                /* see if the source has changed        */
                fp->f_info[OPT_BASE].f_modtime  = fp->f_s_modtime;
                fp->f_info[OPT_BASE].f_ino      = fp->f_s_inum;
                fp->f_info[OPT_BASE].f_d_maj    = fp->f_s_maj;
                fp->f_info[OPT_BASE].f_d_min    = fp->f_s_min;
                fp->f_info[OPT_BASE].f_nlink    = fp->f_s_nlink;
                fp->f_srcdiffs |= check_changes(fp, OPT_BASE, OPT_SRC);

                /* see if the destination has changed   */
                fp->f_info[OPT_BASE].f_modtime  = fp->f_d_modtime;
                fp->f_info[OPT_BASE].f_ino      = fp->f_d_inum;
                fp->f_info[OPT_BASE].f_d_maj    = fp->f_d_maj;
                fp->f_info[OPT_BASE].f_d_min    = fp->f_d_min;
                fp->f_info[OPT_BASE].f_nlink    = fp->f_d_nlink;
                fp->f_dstdiffs |= check_changes(fp, OPT_BASE, OPT_DST);

                /* if nobody thinks the file exists, baseline needs pruning */
                if ((fp->f_flags & (F_IN_SOURCE|F_IN_DEST)) == 0) {
                        fp->f_srcdiffs |= D_DELETE;
                        fp->f_dstdiffs |= D_DELETE;
                }

                /* keep track of possible deletions to look for trouble */
                if ((fp->f_dstdiffs | fp->f_srcdiffs) & D_DELETE) {
                        est_deletes++;

                        /* see if file is (or has been) a non-empty directory */
                        if (fp->f_files)
                                est_rmdirs++;
                }
        }

        /* if we found differences, queue the file for reconciliation   */
        if (fp->f_srcdiffs || fp->f_dstdiffs || fp->f_flags & F_STAT_ERROR) {
                queue_file(fp);

                if (opt_debug & DBG_ANAL) {
                        fprintf(stderr, "ANAL: src=%s",
                                showflags(diffmap, fp->f_srcdiffs));
                        fprintf(stderr, " dst=%s",
                                showflags(diffmap, fp->f_dstdiffs));
                        fprintf(stderr, " flgs=%s",
                                showflags(fileflags, fp->f_flags));
                        fprintf(stderr, " name=%s\n", fp->f_fullname);
                }
        }

        /* bump the total file count    */
        fp->f_base->b_totfiles++;
        total_files++;

        /* if this is not a directory, we're done       */
        if (fp->f_files == 0)
                return (errs);

        /*
         * If this is a directory, we need to recursively analyze
         * our children, but only children who have been evaluated.
         * If a node has not been evaluated, then we don't have
         * updated stat information and there is nothing to analyze.
         *
         * we also want to run though all nodes with STAT errors
         * so that we can put them on the reconciliation list.
         * If a directory is unreadable on one side, all files
         * under that directory (ON BOTH SIDES) must be marked as
         * blocked by stat errors.
         */
        push_name(fp->f_name);

        for (cp = fp->f_files; cp; cp = cp->f_next) {
                if (fp->f_flags & F_STAT_ERROR)
                        cp->f_flags |= F_STAT_ERROR;
                if (cp->f_flags & (F_EVALUATE|F_STAT_ERROR))
                        errs |= check_file(cp);
        }

        pop_name();

        return (errs);
}

/*
 * routine:
 *      check_changes
 *
 * purpose:
 *      to figure out what has changed for a specific file
 *
 * parameters:
 *      file pointer
 *      the reference info
 *      the info to be checked for changes
 *
 * returns:
 *      diff mask
 *
 * notes:
 *      this routine doesn't pretend to understand what happened.
 *      it merely enumerates the ways in which the files differ.
 */
static diffmask_t
check_changes(struct file *fp, int ref, int new)
{       struct fileinfo *rp, *np;
        int mask = 0;
        int type;

        rp = &fp->f_info[ref];
        np = &fp->f_info[new];

        if (np->f_uid != rp->f_uid)
                mask |= D_UID;

        if (np->f_gid != rp->f_gid)
                mask |= D_GID;

        if (np->f_mode != rp->f_mode)
                mask |= D_PROT;

        type = np->f_type;
        if (type != rp->f_type) {
                if (type == 0)
                        mask |= D_DELETE;
                else if (rp->f_type == 0)
                        mask |= D_CREATE;
                else
                        mask |= D_TYPE;
        } else if (type == S_IFBLK || type == S_IFCHR) {
                /*
                 * for special files, we only look at the maj/min
                 */
                if (np->f_rd_maj != rp->f_rd_maj)
                        mask |= D_SIZE;
                if (np->f_rd_min != rp->f_rd_min)
                        mask |= D_SIZE;
        } else if (type != S_IFDIR) {
                /*
                 * for directories, we don't look directly at
                 * the contents, so these fields don't mean
                 * anything.  If the directories have changed
                 * in any interesting way, we'll find it by
                 * walking the tree.
                 */
                if (np->f_modtime > rp->f_modtime)
                        mask |= D_MTIME;

                if (np->f_size != rp->f_size)
                        mask |= D_SIZE;

                if (np->f_nlink != rp->f_nlink)
                        mask |= D_LINKS;
        }

        if (cmp_acls(rp, np) == 0)
                mask |= D_FACLS;

        return (mask);
}

/*
 * routine:
 *      same_name
 *
 * purpose:
 *      to figure out whether or not two databsae nodes actually refer to
 *      the same file.
 *
 * parameters:
 *      pointers to two file description nodes
 *      which side we should check
 *
 * returns:
 *      TRUE/FALSE
 *
 * notes:
 *      if a single directory is specified in multiple base pairs, it
 *      is possible to have multiple nodes in the database describing
 *      the same file.  This routine is supposed to detect those cases.
 *
 *      what should be a trivial string comparison is complicated by
 *      the possibility that the two nodes might describe the same file
 *      from base directories at different depths.  Thus, rather than
 *      comparing two strings, we really want to compare the concatenation
 *      of two pairs of strings.  Unfortunately calling full_name would
 *      be awkward right now, so instead we have our own comparison
 *      routine that automatically skips from the first string to
 *      the second.
 */
static bool_t
same_name(struct file *f1, struct file *f2, side_t srcdst)
{
        char *s1, *s2, *x1, *x2;

        if (srcdst == OPT_SRC) {
                s1 = (f1->f_base)->b_src_name;
                s2 = (f2->f_base)->b_src_name;
        } else {
                s1 = (f1->f_base)->b_dst_name;
                s2 = (f2->f_base)->b_dst_name;
        }
        x1 = f1->f_fullname;
        x2 = f2->f_fullname;

        /*
         * Compare the two names, and if they differ before they end
         * this is a non-match.  If they both end at the same time,
         * this is a match.
         *
         * The trick here is that each string is actually the logical
         * concatenation of two strings, and we need to automatically
         * wrap from the first to the second string in each pair.  There
         * is no requirement that the two (concatenated) strings be
         * broken at the same point, so we have a slightly baroque
         * comparsion loop.
         */
        while (*s1 && *s1 == *s2) {

                /*
                 * strings have been identical so far, so advance the
                 * pointers and continue the comparison.  The trick
                 * is that when either string ends, we have to wrap
                 * over to its extension.
                 */
                s1++; s2++;
                if (*s1 && *s2)
                        continue;

                /*
                 * at least one of the strings has ended.
                 * there is an implicit slash between the string
                 * and its extension, and this has to be matched
                 * against the other string.
                 */
                if (*s1 != *s2) {
                        if (*s1 == 0 && *s2 == '/')
                                s2++;
                        else if (*s2 == 0 && *s1 == '/')
                                s1++;
                        else
                                /* the disagreement doesn't come at a slash */
                                break;
                }

                /*
                 * if either string has ended, wrap to its extension
                 */
                if (*s1 == 0 && x1 != 0) {
                        s1 = x1;
                        x1 = 0;
                }
                if (*s2 == 0 && x2 != 0) {
                        s2 = x2;
                        x2 = 0;
                }
        }

        return (*s1 == *s2);
}

/*
 * routine:
 *      find_link
 *
 * purpose:
 *      to figure out if there is a file to which we should
 *      be creating a link (rather than making a copy)
 *
 * parameters:
 *      file node for the file to be created (that we hope is merely a link)
 *      which side is to be changed (src/dst)
 *
 * return:
 *      0       no link is appropriate
 *      else    pointer to file node for link referent
 *
 * notes:
 *      there are a few strange heuristics in this routine and I
 *      wouldn't bet my soul that I got all of them right.  The general
 *      theory is that when a new file is created, we look to see if it
 *      is a link to another file on the changed side, and if it is, we
 *      find the corresponding file on the unchanged side.
 *
 *      cases we want to be able to handle:
 *          1.  one or more links are created to a prexisting file
 *          2.  a preexisting only link is renamed
 *          3.  a rename of one of multiple links to a preexisting file
 *          4.  a single file is created with multiple links
 */
struct file *
find_link(struct file *fp, side_t srcdst)
{       struct file *lp;
        side_t chgside, tgtside;
        struct fileinfo *chgp, *tgtp, *basp, *fcp, *ftp;

        /* chg = side on which the change was noticed           */
        /* tgt = side to which the change is to be propagated   */
        chgside = (srcdst == OPT_SRC) ? OPT_DST : OPT_SRC;
        tgtside = (srcdst == OPT_SRC) ? OPT_SRC : OPT_DST;
        fcp = &fp->f_info[chgside];
        ftp = &fp->f_info[tgtside];

        /*
         * cases 1 and 3
         *
         * When a new link is created, we should be able to find
         * another file in the changed hierarchy that has the same
         * I-node number.  We expect it to be on the changed list
         * because the link count will have gone up or because all
         * of the copies are new.  If we find one, then the new file
         * on the receiving file should be a link to the corresponding
         * existing file.
         *
         * case 4
         *
         * the first link will be dealt with as a copy, but all
         * subsequent links should find an existing file analogous
         * to one of the links on the changed side, and create
         * corresponding links on the other side.
         *
         * in each of these cases, there should be multiple links
         * on the changed side.  If the linkcount on the changed
         * side is one, we needn't bother searching for other links.
         */
        if (fcp->f_nlink > 1)
        for (lp = changes; lp; lp = lp->f_rnext) {
                /* finding the same node doesn't count  */
                if (fp == lp)
                        continue;

                tgtp = &lp->f_info[tgtside];
                chgp = &lp->f_info[chgside];

                /*
                 * if the file doesn't already exist on the target side
                 * we cannot make a link to it
                 */
                if (tgtp->f_mode == 0)
                        continue;

                /*
                 * if this is indeed a link, then the prospective file on
                 * the changed side will have the same dev/inum as the file
                 * we are looking for
                 */
                if (fcp->f_d_maj != chgp->f_d_maj)
                        continue;
                if (fcp->f_d_min != chgp->f_d_min)
                        continue;
                if (fcp->f_ino != chgp->f_ino)
                        continue;

                /*
                 * if the target side is already a link to this file,
                 * then there is no new link to be created
                 * FIX: how does this interact with copies over links
                 */
                if ((ftp->f_d_maj == tgtp->f_d_maj) &&
                    (ftp->f_d_min == tgtp->f_d_min) &&
                    (ftp->f_ino   == tgtp->f_ino))
                        continue;

                /*
                 * there is a pathological situation where a single file
                 * might appear under multiple base directories.  This is
                 * damned awkward to detect in any other way, so we must
                 * check to see if we have just found another database
                 * instance for the same file (on the changed side).
                 */
                if ((fp->f_base != lp->f_base) && same_name(fp, lp, chgside))
                        continue;

                if (opt_debug & DBG_ANAL)
                        fprintf(stderr, "ANAL: FIND LINK %s and %s\n",
                                fp->f_fullname, lp->f_fullname);

                return (lp);
        }

        /*
         * case 2: a simple rename of the only link
         *
         * In this case, there may not be any other existing file on
         * the changed side that has the same I-node number.  There
         * might, however, be a record of such a file in the baseline.
         * If we can find an identical file with a different name that
         * has recently disappeared, we have a likely rename.
         */
        for (lp = changes; lp; lp = lp->f_rnext) {

                /* finding the same node doesn't count                  */
                if (fp == lp)
                        continue;

                tgtp = &lp->f_info[tgtside];
                chgp = &lp->f_info[chgside];

                /*
                 * if the file still exists on the changed side this is
                 * not a simple rename, and in fact the previous pass
                 * would have found it.
                 */
                if (chgp->f_mode != 0)
                        continue;

                /*
                 * the inode number for the new link on the changed
                 * side must match the inode number for the old link
                 * from the baseline.
                 */
                if (fcp->f_d_maj != ((srcdst == OPT_SRC) ? lp->f_d_maj
                                                        : lp->f_s_maj))
                        continue;
                if (fcp->f_d_min != ((srcdst == OPT_SRC) ? lp->f_d_min
                                                        : lp->f_s_min))
                        continue;
                if (fcp->f_ino != ((srcdst == OPT_SRC) ? lp->f_d_inum
                                                        : lp->f_s_inum))
                        continue;

                /* finding a file we are already linked to doesn't help */
                if ((ftp->f_d_maj == tgtp->f_d_maj) &&
                    (ftp->f_d_min == tgtp->f_d_min) &&
                    (ftp->f_ino   == tgtp->f_ino))
                        continue;

                /*
                 * there is a danger that we will confuse an
                 * inode reallocation with a rename.  We should
                 * only consider this to be a rename if the
                 * new file is identical to the old one
                 */
                basp = &lp->f_info[OPT_BASE];
                if (fcp->f_type != basp->f_type)
                        continue;
                if (fcp->f_size != basp->f_size)
                        continue;
                if (fcp->f_mode != basp->f_mode)
                        continue;
                if (fcp->f_uid != basp->f_uid)
                        continue;
                if (fcp->f_gid != basp->f_gid)
                        continue;

                if (opt_debug & DBG_ANAL)
                        fprintf(stderr, "ANAL: FIND RENAME %s and %s\n",
                                fp->f_fullname, lp->f_fullname);

                return (lp);
        }

        return (0);
}

/*
 * routine:
 *      has_other_links
 *
 * purpose:
 *      to determine whether or not there is more that one link to a
 *      particular file.  We are willing to delete a link to a file
 *      that has changed if we will still have other links to it.
 *      The trick here is that we only care about links under our
 *      dominion.
 *
 * parameters:
 *      file pointer to node we are interested in
 *      which side we are looking to additional links on
 *
 * returns:
 *      TRUE if there are multiple links
 *      FALSE if this is the only one we know of
 */
bool_t
has_other_links(struct file *fp, side_t srcdst)
{       struct file *lp;
        struct fileinfo *fip, *lip;

        fip = &fp->f_info[srcdst];

        /* if the link count is one, there couldn't be others   */
        if (fip->f_nlink < 2)
                return (FALSE);

        /* look for any other files for the same inode          */
        for (lp = changes; lp; lp = lp->f_rnext) {
                /* finding the same node doesn't count  */
                if (fp == lp)
                        continue;

                lip = &lp->f_info[srcdst];

                /*
                 * file must still exist on this side
                 */
                if (lip->f_mode == 0)
                        continue;

                /*
                 * if this is indeed a link, then the prospective file on
                 * the changed side will have the same dev/inum as the file
                 * we are looking for
                 */
                if (lip->f_d_maj != fip->f_d_maj)
                        continue;
                if (lip->f_d_min != fip->f_d_min)
                        continue;
                if (lip->f_ino != fip->f_ino)
                        continue;

                /*
                 * we have found at least one other link
                 */
                return (TRUE);
        }

        return (FALSE);
}

/*
 * routine:
 *      link_update
 *
 * purpose:
 *      to propoagate a stat change to all other file nodes that
 *      correspond to the same I-node on the changed side
 *
 * parameters:
 *      file pointer for the updated file
 *      which side was changed
 *
 * returns:
 *      void
 *
 * notes:
 *      if we have copied onto a file, we have copied onto all
 *      of its links, but since we do all stats before we do any
 *      copies, the stat information recently collected for links
 *      is no longer up-to-date, and this would result in incorrect
 *      reconciliation (redundant copies).
 *
 *      There is an assumption here that all links to a changed
 *      file will be in the change list.  This is true for almost
 *      all cases not involving restriction.  If we do fail to
 *      update the baseline for a file that was off the change list,
 *      the worst that is likely to happen is that we will think
 *      it changed later (but will almost surely find that both
 *      copies agree).
 */
void
link_update(struct file *fp, side_t which)
{       struct file *lp;

        for (lp = changes; lp; lp = lp->f_rnext) {
                /* finding the current entry doesn't count      */
                if (lp == fp)
                        continue;

                /* look for same i#, maj, min on changed side   */
                if (lp->f_info[which].f_ino != fp->f_info[which].f_ino)
                        continue;
                if (lp->f_info[which].f_d_maj != fp->f_info[which].f_d_maj)
                        continue;
                if (lp->f_info[which].f_d_min != fp->f_info[which].f_d_min)
                        continue;

                /*
                 * this appears to be another link to the same file
                 * so the updated stat information for one must be
                 * correct for the other.
                 */
                lp->f_info[which].f_type        = fp->f_info[which].f_type;
                lp->f_info[which].f_size        = fp->f_info[which].f_size;
                lp->f_info[which].f_mode        = fp->f_info[which].f_mode;
                lp->f_info[which].f_uid         = fp->f_info[which].f_uid;
                lp->f_info[which].f_gid         = fp->f_info[which].f_gid;
                lp->f_info[which].f_modtime     = fp->f_info[which].f_modtime;
                lp->f_info[which].f_modns       = fp->f_info[which].f_modns;
                lp->f_info[which].f_nlink       = fp->f_info[which].f_nlink;
                lp->f_info[which].f_rd_maj      = fp->f_info[which].f_rd_maj;
                lp->f_info[which].f_rd_min      = fp->f_info[which].f_rd_min;

                if (opt_debug & DBG_STAT)
                        fprintf(stderr,
                                "STAT: UPDATE LINK, file=%s, mod=%08lx.%08lx\n",
                                lp->f_name, lp->f_info[which].f_modtime,
                                lp->f_info[which].f_modns);
        }
}

/*
 * routine:
 *      queue_file
 *
 * purpose:
 *      append a file to the list of needed reconciliations
 *
 * parameters:
 *      pointer to file
 *
 * notes:
 *      when a request is appended to the reconciliation list,
 *      we fill in the full name.  We delayed this in hopes that
 *      it wouldn't be necessary (saving cycles and memory)
 *
 *      There is some funny business with modification times.
 *      In general, we queue files in order of the latest modification
 *      time so that propagations preserve relative ordering.  There
 *      are, however, a few important exceptions:
 *          1.  all directory creations happen at time zero,
 *              so that they are created before any files can
 *              be added to them.
 *          2.  all directory deletions happen at time infinity-depth,
 *              so that everything else can be removed before the
 *              directories themselves are removed.
 *          3.  all file deletions happen at time infinity-depth
 *              so that (in renames) the links will preceed the unlinks.
 */
static void
queue_file(struct file *fp)
{       struct file **pp, *np;

#define TIME_ZERO       0L              /* the earliest possible time   */
#define TIME_LONG       0x7FFFFFFF      /* the latest possible time     */

        /*
         * figure out the modification time for sequencing purposes
         */
        if ((fp->f_srcdiffs|fp->f_dstdiffs) & D_DELETE) {
                /*
                 * deletions are performed last, and depth first
                 */
                fp->f_modtime = TIME_LONG - fp->f_depth;
        } else if (fp->f_info[OPT_SRC].f_type != S_IFDIR &&
            fp->f_info[OPT_DST].f_type != S_IFDIR) {
                /*
                 * for most files we use the latest mod time
                 */
                fp->f_modtime = fp->f_info[OPT_SRC].f_modtime;
                fp->f_modns   = fp->f_info[OPT_SRC].f_modns;
                if (fp->f_modtime < fp->f_info[OPT_DST].f_modtime) {
                        fp->f_modtime = fp->f_info[OPT_DST].f_modtime;
                        fp->f_modns   = fp->f_info[OPT_DST].f_modns;
                }
        } else {
                /*
                 * new directory creations need to happen before anything
                 * else and are automatically sequenced in traversal order
                 */
                fp->f_modtime = TIME_ZERO;
        }

        /*
         * insertion is time ordered, and for equal times,
         * insertions is in (pre-order) traversal order
         */
        for (pp = &changes; (np = *pp) != 0; pp = &np->f_rnext) {
                if (fp->f_modtime > np->f_modtime)
                        continue;
                if (fp->f_modtime < np->f_modtime)
                        break;
                if (fp->f_modns < np->f_modns)
                        break;
        }

        fp->f_fullname = strdup(get_name(fp));
        fp->f_rnext = np;
        *pp = fp;
}


/*
 * routines:
 *      push_name/pop_name/get_name
 *
 * purpose:
 *      maintain a name stack so we can form name of a particular file
 *      as the concatenation of all of the names between it and the
 *      (know to be fully qualified) base directory.
 *
 * notes:
 *      we go to this trouble because most files never change and
 *      so we don't need to associate full names with every one.
 *      This stack is maintained during analysis, and if we decide
 *      to add a file to the reconciliation list, we can use the
 *      stack to generate a fully qualified name at that time.
 *
 *      we compress out '/./' when we return a name.  Given that the
 *      stack was built by a tree walk, the only place a /./ should
 *      appear is at the first level after the base ... but there
 *      are legitimate ways for them to appear there.
 *
 *      these names can get deep, so we dynamically size our name buffer
 */
static const char *namestack[ MAX_DEPTH + 1 ];
static int namedepth = 0;
static int namelen = 0;

void
push_name(const char *name)
{
        namestack[ namedepth++ ] = name;
        namelen += 2 + strlen(name);

        /* make sure we don't overflow our name stack   */
        if (namedepth >= MAX_DEPTH) {
                fprintf(stderr, gettext(ERR_deep), name);
                exit(ERR_OTHER);
        }
}

void
pop_name(void)
{
        namelen -= 2 + strlen(namestack[--namedepth]);
        namestack[ namedepth ] = 0;

#ifdef  DBG_ERRORS
        /* just a little sanity check here      */
        if (namedepth <= 0) {
                if (namedepth < 0) {
                        fprintf(stderr, "ASSERTION FAILURE: namedepth < 0\n");
                        exit(ERR_OTHER);
                } else if (namelen != 0) {
                        fprintf(stderr, "ASSERTION FAILURE: namelen != 0\n");
                        exit(ERR_OTHER);
                }
        }
#endif
}

char
*get_name(struct file *fp)
{       int i;
        static char *namebuf = 0;
        static int buflen = 0;

        /* make sure we have an adequate buffer */
        i = namelen + 1 + strlen(fp->f_name);
        if (buflen < i) {
                for (buflen = MAX_PATH; buflen < i; buflen += MAX_NAME);
                namebuf = (char *) realloc(namebuf, buflen);
        }

        /* assemble the name    */
        namebuf[0] = 0;
        for (i = 0; i < namedepth; i++) {
                if (strcmp(namestack[i], ".")) {
                        strcat(namebuf, namestack[i]);
                        strcat(namebuf, "/");
                }
        }

        strcat(namebuf, fp->f_name);

        return (namebuf);
}