root/usr.bin/diff/diffreg_new.c
/*
 * Copyright (c) 2018 Martin Pieuchot
 * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

#include <sys/types.h>
#include <sys/capsicum.h>
#ifndef DIFF_NO_MMAP
#include <sys/mman.h>
#endif
#include <sys/stat.h>

#include <capsicum_helpers.h>
#include <err.h>
#include <fcntl.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>

#include "pr.h"
#include "diff.h"
#include <arraylist.h>
#include <diff_main.h>
#include <diff_output.h>

const char *format_label(const char *, struct stat *);

enum diffreg_algo {
        DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE = 0,
        DIFFREG_ALGO_MYERS_THEN_PATIENCE = 1,
        DIFFREG_ALGO_PATIENCE = 2,
        DIFFREG_ALGO_NONE = 3,
};

int              diffreg_new(char *, char *, int, int);
FILE *           openfile(const char *, char **, struct stat *);

static const struct diff_algo_config myers_then_patience;
static const struct diff_algo_config myers_then_myers_divide;
static const struct diff_algo_config patience;
static const struct diff_algo_config myers_divide;

static const struct diff_algo_config myers_then_patience = {
        .impl = diff_algo_myers,
        .permitted_state_size = 1024 * 1024 * sizeof(int),
        .fallback_algo = &patience,
};

static const struct diff_algo_config myers_then_myers_divide =
        (struct diff_algo_config){
        .impl = diff_algo_myers,
        .permitted_state_size = 1024 * 1024 * sizeof(int),
        .fallback_algo = &myers_divide,
};

static const struct diff_algo_config patience = {
        .impl = diff_algo_patience,
        /* After subdivision, do Patience again: */
        .inner_algo = &patience,
        /* If subdivision failed, do Myers Divide et Impera: */
        .fallback_algo = &myers_then_myers_divide,
};

static const struct diff_algo_config myers_divide = {
        .impl = diff_algo_myers_divide,
        /* When division succeeded, start from the top: */
        .inner_algo = &myers_then_myers_divide,
        /* (fallback_algo = NULL implies diff_algo_none). */
};

static const struct diff_algo_config none = {
        .impl = diff_algo_none,
};

/* If the state for a forward-Myers is small enough, use Myers, otherwise first
 * do a Myers-divide. */
static const struct diff_config diff_config_myers_then_myers_divide = {
        .atomize_func = diff_atomize_text_by_line,
        .algo = &myers_then_myers_divide,
};

/* If the state for a forward-Myers is small enough, use Myers, otherwise first
 * do a Patience. */
static const struct diff_config diff_config_myers_then_patience = {
        .atomize_func = diff_atomize_text_by_line,
        .algo = &myers_then_patience,
};

/* Directly force Patience as a first divider of the source file. */
static const struct diff_config diff_config_patience = {
        .atomize_func = diff_atomize_text_by_line,
        .algo = &patience,
};

/* Directly force Patience as a first divider of the source file. */
static const struct diff_config diff_config_none = {
        .atomize_func = diff_atomize_text_by_line,
        .algo = &none,
};

const char *
format_label(const char *oldlabel, struct stat *stb)
{
        const char *time_format = "%Y-%m-%d %H:%M:%S";
        char *newlabel;
        char buf[256];
        char end[10];
        struct tm tm, *tm_ptr;
        int nsec = stb->st_mtim.tv_nsec;
        size_t newlabellen, timelen, endlen;
        tm_ptr = localtime_r(&stb->st_mtime, &tm);

        timelen = strftime(buf, 256, time_format, tm_ptr);
        endlen = strftime(end, 10, "%z", tm_ptr);

        /*
         * The new label is the length of the time, old label, timezone,
         * 9 characters for nanoseconds, and 4 characters for a period
         * and for formatting.
         */
        newlabellen = timelen + strlen(oldlabel) + endlen + 9 + 4;
        newlabel = calloc(newlabellen, sizeof(char));

        snprintf(newlabel, newlabellen ,"%s\t%s.%.9d %s\n",
                oldlabel, buf, nsec, end);

        return newlabel;
}

int
diffreg_new(char *file1, char *file2, int flags, int capsicum)
{
        char *str1, *str2;
        FILE *f1, *f2;
        struct pr *pr = NULL;
        struct stat st1, st2;
        struct diff_input_info info;
        struct diff_data left = {}, right = {};
        struct diff_result *result = NULL;
        bool force_text, have_binary;
        int rc, atomizer_flags, rflags, diff_flags = 0;
        int context_lines = diff_context;
        const struct diff_config *cfg;
        enum diffreg_algo algo;
        cap_rights_t rights_ro;
        int ret;

        algo = DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE;

        switch (algo) {
        default:
        case DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE:
                cfg = &diff_config_myers_then_myers_divide;
                break;
        case DIFFREG_ALGO_MYERS_THEN_PATIENCE:
                cfg = &diff_config_myers_then_patience;
                break;
        case DIFFREG_ALGO_PATIENCE:
                cfg = &diff_config_patience;
                break;
        case DIFFREG_ALGO_NONE:
                cfg = &diff_config_none;
                break;
        }

        f1 = openfile(file1, &str1, &st1);
        f2 = openfile(file2, &str2, &st2);

        if (flags & D_PAGINATION)
                pr = start_pr(file1, file2);

        if (capsicum) {
                cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK);
                if (caph_rights_limit(fileno(f1), &rights_ro) < 0)
                        err(2, "unable to limit rights on: %s", file1);
                if (caph_rights_limit(fileno(f2), &rights_ro) < 0)
                        err(2, "unable to limit rights on: %s", file2);
                if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) {
                        /* stdin has already been limited */
                        if (caph_limit_stderr() == -1)
                                err(2, "unable to limit stderr");
                        if (caph_limit_stdout() == -1)
                                err(2, "unable to limit stdout");
                } else if (caph_limit_stdio() == -1)
                                err(2, "unable to limit stdio");
                caph_cache_catpages();
                caph_cache_tzdata();
                if (caph_enter() < 0)
                        err(2, "unable to enter capability mode");
        }
        /*
         * If we have been given a label use that for the paths, if not format
         * the path with the files modification time.
         */
        info.flags = 0;
        info.left_path = (label[0] != NULL) ?
                label[0] : format_label(file1, &stb1);
        info.right_path = (label[1] != NULL) ?
                label[1] : format_label(file2, &stb2);

        if (flags & D_FORCEASCII)
                diff_flags |= DIFF_FLAG_FORCE_TEXT_DATA;
        if (flags & D_IGNOREBLANKS)
                diff_flags |= DIFF_FLAG_IGNORE_WHITESPACE;
        if (flags & D_PROTOTYPE)
                diff_flags |= DIFF_FLAG_SHOW_PROTOTYPES;

        ret = diff_atomize_file(&left, cfg, f1, (uint8_t *)str1, st1.st_size,
            diff_flags);
        if (ret != DIFF_RC_OK) {
                warnc(ret, "%s", file1);
                rc = D_ERROR;
                status |= 2;
                goto done;
        }
        ret = diff_atomize_file(&right, cfg, f2, (uint8_t *)str2, st2.st_size,
            diff_flags);
        if (ret != DIFF_RC_OK) {
                warnc(ret, "%s", file2);
                rc = D_ERROR;
                status |= 2;
                goto done;
        }

        result = diff_main(cfg, &left, &right);
        if (result->rc != DIFF_RC_OK) {
                rc = D_ERROR;
                status |= 2;
                goto done;
        }
        /*
         * If there wasn't an error, but we don't have any printable chunks
         * then the files must match.
         */
        if (!diff_result_contains_printable_chunks(result)) {
                rc = D_SAME;
                goto done;
        }

        atomizer_flags = (result->left->atomizer_flags | result->right->atomizer_flags);
        rflags = (result->left->root->diff_flags | result->right->root->diff_flags);
        force_text = (rflags & DIFF_FLAG_FORCE_TEXT_DATA);
        have_binary = (atomizer_flags & DIFF_ATOMIZER_FOUND_BINARY_DATA);

        if (have_binary && !force_text) {
                rc = D_BINARY;
                status |= 1;
                goto done;
        }

        if (color)
                diff_output_set_colors(color, del_code, add_code);
        if (diff_format == D_NORMAL) {
                rc = diff_output_plain(NULL, stdout, &info, result, false);
        } else if (diff_format == D_EDIT) {
                rc = diff_output_edscript(NULL, stdout, &info, result);
        } else {
                rc = diff_output_unidiff(NULL, stdout, &info, result,
                    context_lines);
        }
        if (rc != DIFF_RC_OK) {
                rc = D_ERROR;
                status |= 2;
        } else {
                rc = D_DIFFER;
                status |= 1;
        }
done:
        if (pr != NULL)
                stop_pr(pr);
        diff_result_free(result);
        diff_data_free(&left);
        diff_data_free(&right);
#ifndef DIFF_NO_MMAP
        if (str1)
                munmap(str1, st1.st_size);
        if (str2)
                munmap(str2, st2.st_size);
#endif
        fclose(f1);
        fclose(f2);

        return rc;
}

FILE *
openfile(const char *path, char **p, struct stat *st)
{
        FILE *f = NULL;

        if (strcmp(path, "-") == 0)
                f = stdin;
        else
                f = fopen(path, "r");

        if (f == NULL)
                err(2, "%s", path);

        if (fstat(fileno(f), st) == -1)
                err(2, "%s", path);

#ifndef DIFF_NO_MMAP
        *p = mmap(NULL, st->st_size, PROT_READ, MAP_PRIVATE, fileno(f), 0);
        if (*p == MAP_FAILED)
#endif
                *p = NULL; /* fall back on file I/O */

        return f;
}

bool
can_libdiff(int flags)
{
        /* libdiff's atomizer can only deal with files */
        if (!S_ISREG(stb1.st_mode) || !S_ISREG(stb2.st_mode))
                return false;

        /* Is this one of the supported input/output modes for diffreg_new? */
        if ((flags == 0 || !(flags & ~D_NEWALGO_FLAGS)) &&
                ignore_pats == NULL && (
                diff_format == D_NORMAL ||
#if 0
                diff_format == D_EDIT ||
#endif
                diff_format == D_UNIFIED) &&
                (diff_algorithm == D_DIFFMYERS || diff_algorithm == D_DIFFPATIENCE)) {
                return true;
        }

        /* Fallback to using stone. */
        return false;
}