root/usr/src/cmd/filesync/recon.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License, Version 1.0 only
 * (the "License").  You may not use this file except in compliance
 * with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright (c) 1995 Sun Microsystems, Inc.  All Rights Reserved
 *
 * module:
 *      recon.c
 *
 * purpose:
 *      process the reconciliation list, figure out exactly what the
 *      changes were, and what we should do about them.
 *
 * contents:
 *      reconcile ... (top level) process the reconciliation list
 *      samedata .... (static) do two files have the same contents
 *      samestuff ... (static) do two files have the same ownership/protection
 *      samecompare . (static) actually read and compare the contents
 *      samelink .... (static) do two symlinks have the same contents
 *      truncated ... (static) was one of the two copies truncted
 *      older ....... (static) which copy is older
 *      newer ....... (static) which copy is newer
 *      full_name ... generate a full path name for a file
 *
 * notes:
 *      If you only study one routine in this whole program, reconcile
 *      is that routine.  Everything else is just book keeping.
 *
 *      things were put onto the reconciliation list because analyze
 *      thought that they might have changed ... but up until now
 *      nobody has figured out what the changes really were, or even
 *      if there really were any changes.
 *
 *      queue_file has ordered the reconciliation list with directory
 *      creations first (depth ordered) and deletions last (inversely
 *      depth ordered).  all other changes have been ordered by mod time.
 */
#ident  "%W%    %E% SMI"

#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>

#include "filesync.h"
#include "database.h"
#include "messages.h"
#include "debug.h"

/*
 * local routines to figure out how the files really differ
 */
static bool_t samedata(struct file *);
static bool_t samestuff(struct file *);
static bool_t samecompare(struct file *);
static bool_t truncated(struct file *);
static bool_t samelink();
static side_t newer(struct file *);
static side_t older(struct file *);

/*
 * globals
 */
char    *srcname;       /* file we are emulating                */
char    *dstname;       /* file we are updating                 */

/*
 * routine:
 *      reconcile
 *
 * purpose:
 *      to perform the reconciliation action associated with a file
 *
 * parameters:
 *      file pointer
 *
 * returns:
 *      built up error mask
 *      updated statistics
 *
 * notes:
 *      The switch statement handles the obvious stuff.
 *      The TRUE side of the samedata test handles minor differences.
 *      The interesting stuff is in the FALSE side of the samedata test.
 *
 *      The desparation heuristics (in the diffmask&CONTENTS test) are
 *      not rigorously correct ... but they always try do the right thing
 *      with data, and only lose mode/ownership changes in relatively
 *      pathological cases.  But I claim that the benefits outweigh the
 *      risks, and most users will be pleased with the resulting decisions.
 *
 *      Another trick is in the deletion cases of the switch.  We
 *      normally won't allow an unlink that conflicts with data
 *      changes.  If there are multiple links to the file, however,
 *      we can make the changes and do the deletion.
 *
 *      The action routines do_{remove,rename,like,copy} handle all
 *      of their own statistics and status updating.  This routine
 *      only has to handle its own reconciliation failures (when we
 *      can't decide what to do).
 */
errmask_t
reconcile(struct file *fp)
{       errmask_t errs = 0;
        diffmask_t diffmask;

        if (opt_debug & DBG_RECON)
                fprintf(stderr, "RECO: %s flgs=%s, mtime=%08lx.%08lx\n",
                        fp->f_fullname,
                        showflags(fileflags, fp->f_flags),
                        fp->f_modtime, fp->f_modns);

        /*
         * form the fully qualified names for both files
         */
        srcname = full_name(fp, OPT_SRC, OPT_SRC);
        dstname = full_name(fp, OPT_DST, OPT_DST);

        /*
         * because they are so expensive to read and so troublesome
         * to set, we try to put off reading ACLs as long as possible.
         * If we haven't read them yet, we must read them now (so that
         * samestuff can compare them).
         */
        if (opt_acls == 0 && fp->f_info[ OPT_BASE ].f_numacls == 0) {
                if (get_acls(srcname, &fp->f_info[ OPT_SRC ]))
                        fp->f_srcdiffs |= D_FACLS;
                if (get_acls(dstname, &fp->f_info[ OPT_DST ]))
                        fp->f_dstdiffs |= D_FACLS;
        }

        /*
         * If a rename has been detected, we don't have to figure
         * it out, since both the rename-to and rename-from files
         * have already been designated.  When we encounter a rename-to
         * we should carry it out.  When we encounter a rename-from
         * we can ignore it, since it should be dealt with as a side
         * effect of processing the rename-to.
         */
        if ((fp->f_srcdiffs|fp->f_dstdiffs) & D_RENAME_FROM)
                return (0);

        if ((fp->f_srcdiffs|fp->f_dstdiffs) & D_RENAME_TO) {

                if (opt_verbose)
                        fprintf(stdout, gettext(V_renamed),
                                fp->f_previous->f_fullname, fp->f_name);

                if (fp->f_srcdiffs & D_RENAME_TO) {
                        errs = do_rename(fp, OPT_DST);
                        fp->f_srcdiffs &= D_MTIME | D_SIZE;
                } else if (fp->f_dstdiffs & D_RENAME_TO) {
                        errs = do_rename(fp, OPT_SRC);
                        fp->f_dstdiffs &= D_MTIME | D_SIZE;
                }

                if (errs != ERR_RESOLVABLE)
                        goto done;

                /*
                 * if any differences remain, then we may be dealing
                 * with contents changes in addition to a rename
                 */
                if ((fp->f_srcdiffs | fp->f_dstdiffs) == 0)
                        goto done;

                /*
                 * fall through to reconcile the data changes
                 */
        }

        /*
         * pull of the easy cases (non-conflict creations & deletions)
         */
        switch (fp->f_flags & (F_WHEREFOUND)) {
                case F_IN_BASELINE:     /* only exists in baseline      */
                case 0:                 /* only exists in rules         */
                        if (opt_verbose)
                                fprintf(stdout, gettext(V_nomore),
                                        fp->f_fullname);
                        fp->f_flags |= F_REMOVE;        /* fix baseline */
                        return (0);

                case F_IN_BASELINE|F_IN_SOURCE: /* deleted from dest    */
                        /*
                         * the basic principle here is that we are willing
                         * to do the deletion if:
                         *      no changes were made on the other side
                         * OR
                         *      we have been told to force in this direction
                         *
                         * we do, however, make an exception for files that
                         * will still have other links.  In this case, the
                         * (changed) data will still be accessable through
                         * another link and so we are willing to do the unlink
                         * inspite of conflicting changes (which may well
                         * have been introduced through another link.
                         *
                         * The jury is still out on this one
                         */
                        if (((fp->f_srcdiffs&D_IMPORTANT) == 0) ||
                                (opt_force == OPT_DST)          ||
                                has_other_links(fp, OPT_SRC)) {
                                if (opt_verbose)
                                        fprintf(stdout, gettext(V_deleted),
                                                fp->f_fullname, "dst");
                                errs = do_remove(fp, OPT_SRC);
                                goto done;
                        }

                        /* a deletion combined with changes             */
                        if (opt_verbose)
                                fprintf(stdout, gettext(V_delconf),
                                        fp->f_fullname);

                        /* if we are to resolve in favor of source      */
                        if (opt_force == OPT_SRC) {
                                errs = do_copy(fp, OPT_DST);
                                goto done;
                        }

                        fp->f_problem = gettext(PROB_del_change);
                        goto cant;

                case F_IN_BASELINE|F_IN_DEST:   /* deleted from src     */
                        /* just like previous case, w/sides reversed    */
                        if (((fp->f_dstdiffs&D_IMPORTANT) == 0) ||
                                (opt_force == OPT_SRC)          ||
                                has_other_links(fp, OPT_DST)) {
                                if (opt_verbose)
                                        fprintf(stdout, gettext(V_deleted),
                                                fp->f_fullname, "src");
                                errs = do_remove(fp, OPT_DST);
                                goto done;
                        }

                        /* a deletion combined with changes             */
                        if (opt_verbose)
                                fprintf(stdout, gettext(V_delconf),
                                        fp->f_fullname);

                        /* if we are to resolve in favor of destination */
                        if (opt_force == OPT_DST) {
                                errs = do_copy(fp, OPT_SRC);
                                goto done;
                        }

                        fp->f_problem = gettext(PROB_del_change);
                        goto cant;

                /*
                 * if something new shows up, and for some reason we cannot
                 * propagate it to the other side, we should suppress the
                 * file from the baseline, so it will show up as a new
                 * creation next time too.
                 */
                case F_IN_SOURCE:               /* created in src       */
                        if (opt_verbose)
                                fprintf(stdout, gettext(V_created),
                                        fp->f_fullname, "src");
                        errs = do_copy(fp, OPT_DST);
                        goto done;

                case F_IN_DEST:                 /* created in dest      */
                        if (opt_verbose)
                                fprintf(stdout, gettext(V_created),
                                        fp->f_fullname, "dst");
                        errs = do_copy(fp, OPT_SRC);
                        goto done;

                case F_IN_SOURCE|F_IN_DEST:     /* not in baseline      */
                        /*
                         * since we don't have a baseline, we cannot
                         * know which of the two copies should prevail
                         */
                        break;

                case F_IN_BASELINE|F_IN_SOURCE|F_IN_DEST:
                        /*
                         * we have a baseline where the two copies agreed,
                         * so maybe we can determine that only one of the
                         * two copies have changed ... but before we decide
                         * who should be the winner we should determine
                         * that the two copies are actually different.
                         */
                        break;
        }

        /*
         * if we have fallen out of the case statement, it is because
         * we have discovered a non-obvious situation where potentially
         * changed versions of the file exist on both sides.
         *
         * if the two copies turn out to be identical, this is simple
         */
        if (samedata(fp)) {
                if (samestuff(fp)) {
                        /* files are identical, just update baseline    */
                        if (opt_verbose)
                                fprintf(stdout, gettext(V_unchanged),
                                        fp->f_fullname);
                        update_info(fp, OPT_SRC);
                        goto done;
                } else {
                        /*
                         * contents agree but ownership/protection does
                         * not agree, so we have to bring these into
                         * agreement.  We can pick a winner if one
                         * side hasn't changed, or if the user has
                         * specified a force flag.
                         */
                        if (opt_verbose)
                                fprintf(stdout, gettext(V_modes),
                                        fp->f_fullname);

                        if (((fp->f_srcdiffs & D_ADMIN) == 0) ||
                                        (opt_force == OPT_DST)) {
                                errs = do_like(fp, OPT_SRC, TRUE);
                                goto done;
                        }

                        if (((fp->f_dstdiffs & D_ADMIN) == 0) ||
                                        (opt_force == OPT_SRC)) {
                                errs = do_like(fp, OPT_DST, TRUE);
                                goto done;
                        }
                }
                /* falls down to cant   */
        } else {
                /*
                 * The two files have different contents, so we have
                 * a potential conflict here.  If we know that only one
                 * side has changed, we go with that side.
                 */
                if (fp->f_dstdiffs == 0 || fp->f_srcdiffs == 0) {
                        if (opt_verbose)
                                fprintf(stdout, gettext(V_changed),
                                        fp->f_fullname);
                        errs = do_copy(fp, fp->f_srcdiffs ? OPT_DST : OPT_SRC);
                        goto done;
                }

                /*
                 * Both sides have changed, so we have a real conflict.
                 */
                if (opt_verbose)
                        fprintf(stdout,
                                gettext(truncated(fp) ?
                                                V_trunconf : V_different),
                                fp->f_fullname);

                /*
                 * See if the user has given us explicit instructions
                 * on how to resolve conflicts.  We may have been told
                 * to favor the older, the newer, the source, or the
                 * destination ... but the default is to leave the
                 * conflict unresolved.
                 */
                if (opt_force == OPT_OLD) {
                        errs = do_copy(fp, newer(fp));
                        goto done;
                }

                if (opt_force == OPT_NEW) {
                        errs = do_copy(fp, older(fp));
                        goto done;
                }

                if (opt_force != 0) {
                        errs = do_copy(fp, (opt_force == OPT_SRC) ?
                                                        OPT_DST : OPT_SRC);
                        goto done;
                }


                /*
                 * This is our last chance before giving up.
                 *
                 * We know that the files have different contents and
                 * that there were changes on both sides.  The only way
                 * we can safely handle this is if there were pure contents
                 * changes on one side and pure ownership changes on the
                 * other side.  In this case we can propagate the ownership
                 * one way and the contents the other way.
                 *
                 * We decide whether or not this is possible by ANDing
                 * together the changes on the two sides, and seeing
                 * if the changes were all orthogonal (none of the same
                 * things changed on both sides).
                 */
                diffmask = fp->f_srcdiffs & fp->f_dstdiffs;
                if ((diffmask & D_CONTENTS) == 0) {
                        /*
                         * if ownership changes were only made on one side
                         * (presumably the side that didn't have data changes)
                         * we can handle them separately.  In this case,
                         * ownership changes must be fixed first, because
                         * the subsequent do_copy will overwrite them.
                         */
                        if ((diffmask & D_ADMIN) == 0)
                                errs |= do_like(fp, (fp->f_srcdiffs&D_ADMIN) ?
                                                        OPT_DST : OPT_SRC,
                                                TRUE);

                        /*
                         * Now we can deal with the propagation of the data
                         * changes.  Note that any ownership/protection
                         * changes (from the other side) that have not been
                         * propagated yet are about to be lost.  The cases
                         * in which this might happen are all pathological
                         * and the consequences of losing the protection
                         * changes are (IMHO) minor when compared to the
                         * obviously correct data propagation.
                         */
                        errs |= do_copy(fp, (fp->f_srcdiffs&D_CONTENTS) ?
                                                OPT_DST : OPT_SRC);
                        goto done;
                }

                /*
                 * there are conflicting changes, nobody has told us how to
                 * resolve conflicts, and we cannot figure out how to merge
                 * the differences.
                 */
                fp->f_problem = gettext(PROB_different);
        }

cant:
        /*
         * I'm not smart enough to resolve this conflict automatically,
         * so I have no choice but to bounce it back to the user.
         */
        fp->f_flags |= F_CONFLICT;
        fp->f_base->b_unresolved++;
        errs |= ERR_UNRESOLVED;

done:
        /*
         * if we have a conflict and the file is not in the baseline,
         * then there was never any point at which the two copies were
         * in agreement, and we want to preserve the conflict for future
         * resolution.
         */
        if ((errs&ERR_UNRESOLVED) && (fp->f_flags & F_IN_BASELINE) == 0)
                if (fp->f_files == 0)
                        /*
                         * in most cases, this is most easily done by just
                         * excluding the file in question from the baseline
                         */
                        fp->f_flags |= F_REMOVE;
                else
                        /*
                         * but ... if the file in question is a directory
                         * with children, excluding it from the baseline
                         * would keep all of its children (even those with
                         * no conflicts) out of the baseline as well.  In
                         * This case, it is better to tell a lie and to
                         * manufacture a point of imaginary agreement
                         * in the baseline ... but one that is absurd enough
                         * that we will still see conflicts each time we run.
                         *
                         * recording a type of directory, and everything
                         * else as zero should be absurd enough.
                         */
                        fp->f_info[ OPT_BASE ].f_type = S_IFDIR;

        if (opt_debug & DBG_MISC)
                fprintf(stderr, "MISC: %s ERRS=%s\n", fp->f_fullname,
                        showflags(errmap, errs));

        return (errs);
}

/*
 * routine:
 *      newer
 *
 * purpose:
 *      determine which of two files is newer
 *
 * parameters:
 *      struct file
 *
 * returns:
 *      side_t (src/dest)
 */
static side_t
newer(struct file *fp)
{
        struct fileinfo *sp, *dp;

        sp = &fp->f_info[OPT_SRC];
        dp = &fp->f_info[OPT_DST];

        if (sp->f_modtime > dp->f_modtime)
                return (OPT_SRC);

        if (sp->f_modtime < dp->f_modtime)
                return (OPT_DST);

        if (sp->f_modns >= dp->f_modns)
                return (OPT_SRC);

        return (OPT_DST);
}

/*
 * routine:
 *      older
 *
 * purpose:
 *      determine which of two files is older
 *
 * parameters:
 *      struct file
 *
 * returns:
 *      side_t (src/dest)
 */
static side_t
older(struct file *fp)
{
        struct fileinfo *sp, *dp;

        sp = &fp->f_info[OPT_SRC];
        dp = &fp->f_info[OPT_DST];

        if (sp->f_modtime < dp->f_modtime)
                return (OPT_SRC);

        if (sp->f_modtime > dp->f_modtime)
                return (OPT_DST);

        if (sp->f_modns <= dp->f_modns)
                return (OPT_SRC);

        return (OPT_DST);
}

/*
 * routine:
 *      samedata
 *
 * purpose:
 *      determine whether or not two files contain the same data
 *
 * parameters:
 *      struct file
 *
 * returns:
 *      bool_t (true/false)
 */
static bool_t
samedata(struct file *fp)
{
        struct fileinfo *sp, *dp;

        sp = &fp->f_info[OPT_SRC];
        dp = &fp->f_info[OPT_DST];

        /* cheap test: types are different              */
        if (sp->f_type != dp->f_type)
                return (FALSE);

        /* cheap test: directories have same contents   */
        if (sp->f_type == S_IFDIR)
                return (TRUE);

        /* special files are compared via their maj/min */
        if ((sp->f_type == S_IFBLK) || (sp->f_type == S_IFCHR)) {
                if (sp->f_rd_maj != dp->f_rd_maj)
                        return (FALSE);
                if (sp->f_rd_min != dp->f_rd_min)
                        return (FALSE);
                return (TRUE);
        }

        /* symlinks are the same if their contents are the same */
        if (sp->f_type == S_IFLNK)
                return (samelink());

        /* cheap test: sizes are different              */
        if (fp->f_info[OPT_SRC].f_size != fp->f_info[OPT_DST].f_size)
                return (FALSE);

        /* expensive test: byte for byte comparison     */
        if (samecompare(fp) == 0)
                return (FALSE);

        return (TRUE);
}

/*
 * routine:
 *      samestuff
 *
 * purpose:
 *      determine whether or not two files have same owner/protection
 *
 * parameters:
 *      struct file
 *
 * returns:
 *      bool_t (true/false)
 */
static bool_t
samestuff(struct file *fp)
{       int same_mode, same_uid, same_gid, same_acl;
        struct fileinfo *sp, *dp;

        sp = &fp->f_info[OPT_SRC];
        dp = &fp->f_info[OPT_DST];

        same_mode = (sp->f_mode == dp->f_mode);
        same_uid = (sp->f_uid == dp->f_uid);
        same_gid = (sp->f_gid == dp->f_gid);
        same_acl = cmp_acls(sp, dp);

        /* if the are all the same, it is easy to tell the truth        */
        if (same_uid && same_gid && same_mode && same_acl)
                return (TRUE);

        /* note the nature of the conflict                              */
        if (!same_uid || !same_gid || !same_acl)
                fp->f_problem = gettext(PROB_ownership);
        else
                fp->f_problem = gettext(PROB_protection);

        return (FALSE);
}

/*
 * routine:
 *      samecompare
 *
 * purpose:
 *      do a byte-for-byte comparison of two files
 *
 * parameters:
 *      struct file
 *
 * returns:
 *      bool_t (true/false)
 */
static bool_t
samecompare(struct file *fp)
{       int sfd, dfd;
        int i, count;
        char srcbuf[ COPY_BSIZE ], dstbuf[ COPY_BSIZE ];
        bool_t same = TRUE;


        sfd = open(srcname, 0);
        if (sfd < 0)
                return (FALSE);

        dfd = open(dstname, 0);
        if (dfd < 0) {
                close(sfd);
                return (FALSE);
        }

        for (
        count = read(sfd, srcbuf, COPY_BSIZE);
        count > 0;
        count = read(sfd, srcbuf, COPY_BSIZE)) {

                /* do a matching read                           */
                if (read(dfd, dstbuf, COPY_BSIZE) != count) {
                        same = FALSE;
                        goto done;
                }

                /* do the comparison for this block             */
                for (i = 0; i < count; i++) {
                        if (srcbuf[i] != dstbuf[i]) {
                                same = FALSE;
                                goto done;
                        }
                }
        }

done:
        if (opt_debug & DBG_ANAL)
                fprintf(stderr, "ANAL: SAME=%d %s\n", same, fp->f_fullname);

        close(sfd);
        close(dfd);
        return (same);
}

/*
 * routine:
 *      truncated
 *
 * purpose:
 *      to determine whether or not a file has been truncated
 *
 * parameters:
 *      pointer to file structure
 *
 * returns:
 *      true/false
 */
static bool_t
truncated(struct file *fp)
{
        /* either source or destination must now be zero length */
        if (fp->f_info[OPT_SRC].f_size && fp->f_info[OPT_DST].f_size)
                return (FALSE);

        /* file must have originally had a non-zero length      */
        if (fp->f_info[OPT_BASE].f_size == 0)
                return (FALSE);

        /* file type must "normal" all around           */
        if (fp->f_info[OPT_BASE].f_type != S_IFREG)
                return (FALSE);
        if (fp->f_info[OPT_SRC].f_type != S_IFREG)
                return (FALSE);
        if (fp->f_info[OPT_DST].f_type != S_IFREG)
                return (FALSE);


        return (TRUE);
}

/*
 * routine:
 *      samelink
 *
 * purpose:
 *      to determine whether or not two symbolic links agree
 *
 * parameters:
 *      pointer to file structure
 *
 * returns:
 *      true/false
 */
static bool_t
samelink()
{       int i, srclen, dstlen;
        char srcbuf[ MAX_PATH ], dstbuf[ MAX_PATH ];


        /* read both copies of the link                 */
        srclen = readlink(srcname, srcbuf, sizeof (srcbuf));
        dstlen = readlink(dstname, dstbuf, sizeof (dstbuf));

        /* if they aren't the same length, they disagree        */
        if (srclen < 0 || dstlen < 0 || srclen != dstlen)
                return (FALSE);

        /* look for differences in contents                     */
        for (i = 0; i < srclen; i++)
                if (srcbuf[i] != dstbuf[i])
                        return (FALSE);

        return (TRUE);
}

/*
 * routine:
 *      full_name
 *
 * purpose:
 *      to figure out the fully qualified path name to a file on the
 *      reconciliation list.
 *
 * parameters:
 *      pointer to the file structure
 *      side indication for which base to use
 *      side indication for which buffer to use
 *
 * returns:
 *      pointer to a clobberable buffer
 *
 * notes:
 *      the zero'th buffer is used for renames and links, where
 *      we need the name of another file on the same side.
 */
char *
full_name(struct file *fp, side_t srcdst, side_t whichbuf)
{       static char *buffers[3];
        static int buflen = 0;
        char *p, *b;
        int l;

        /* see if the existing buffer is long enough    */
        b = (srcdst == OPT_SRC) ? fp->f_base->b_src_name
                                : fp->f_base->b_dst_name;

        /* see if the allocated buffer is long enough           */
        l = strlen(b) + strlen(fp->f_fullname) + 2;
        if (l > buflen) {
                /* figure out the next "nice" size to use       */
                for (buflen = MAX_PATH; buflen < l; buflen += MAX_NAME);

                /* reallocate all buffers to this size          */
                for (l = 0; l < 3; l++) {
                        buffers[l] = (char *) realloc(buffers[l], buflen);
                        if (buffers[l] == 0)
                                nomem("full name");
                }
        }

        /* assemble the name in the buffer and reurn it */
        p = buffers[whichbuf];
        strcpy(p, b);
        strcat(p, "/");
        strcat(p, fp->f_fullname);
        return (p);
}