root/usr.bin/locate/locate/util.c
/*      $OpenBSD: util.c,v 1.17 2023/04/28 20:22:35 tb Exp $
 *
 * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin.
 * Copyright (c) 1989, 1993
 *      The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * James A. Woods.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */


#include <stdlib.h>
#include <string.h>
#include <err.h>
#include <stdio.h>
#include <limits.h>

#include "locate.h"

char    **colon(char **, char*, char*);
char    *patprep(char *);
int     getwm(caddr_t);
int     getwf(FILE *);
int     check_bigram_char(int);

/*
 * Validate bigram chars. If the test failed the database is corrupt
 * or the database is obviously not a locate database.
 */
int
check_bigram_char(int ch)
{
        /* legal bigram: 0, ASCII_MIN ... ASCII_MAX */
        if (ch == 0 ||
            (ch >= ASCII_MIN && ch <= ASCII_MAX))
                return(ch);

        (void)fprintf(stderr, "locate database header corrupt, bigram ");
        (void)fprintf(stderr, "char outside 0, %d-%d: %d\n",
            ASCII_MIN, ASCII_MAX, ch);
        exit(1);
}

/* split a colon separated string into a char vector
 *
 * "bla:foo" -> {"bla", "foo"}
 * "bla:"    -> {"bla", dot}
 * "bla"     -> {"bla"}
 * ""        -> do nothing
 *
 */
char **
colon(char **dbv, char *path, char *dot)
{
        int vlen, slen;
        char *c, *ch, *p;
        char **pv;

        if (dbv == NULL) {
                if ((dbv = malloc(sizeof(*dbv))) == NULL)
                        err(1, "malloc");
                *dbv = NULL;
        }

        /* empty string */
        if (*path == '\0') {
                (void)fprintf(stderr, "empty database name, ignored\n");
                return(dbv);
        }

        /* length of string vector */
        for (vlen = 0, pv = dbv; *pv != NULL; pv++, vlen++)
                ;

        for (ch = c = path; ; ch++) {
                if (*ch == ':' ||
                    (!*ch && !(*(ch - 1) == ':' && ch == 1+ path))) {
                        char **newdbv;

                        /* single colon -> dot */
                        if (ch == c)
                                p = dot;
                        else {
                                /* a string */
                                slen = ch - c;
                                if ((p = calloc(sizeof(char), slen + 1))
                                    == NULL)
                                        err(1, "malloc");
                                bcopy(c, p, slen);
                                *(p + slen) = '\0';
                        }
                        /* increase dbv with element p */
                        if ((newdbv = reallocarray(dbv, vlen + 2,
                            sizeof(*newdbv))) == NULL)
                                err(1, "realloc");
                        dbv = newdbv;
                        *(dbv + vlen) = p;
                        *(dbv + ++vlen) = NULL;
                        c = ch + 1;
                }
                if (*ch == '\0')
                        break;
        }
        return (dbv);
}


/*
 * extract last glob-free subpattern in name for fast pre-match; prepend
 * '\0' for backwards match; return end of new pattern
 */
static char globfree[100];

char *
patprep(char *name)
{
        char *endmark, *p, *subp;

        subp = globfree;
        *subp++ = '\0';   /* set first element to '\0' */
        p = name + strlen(name) - 1;

        /* skip trailing metacharacters */
        for (; p >= name; p--)
                if (strchr(LOCATE_REG, *p) == NULL)
                        break;

        /*
         * check if maybe we are in a character class
         *
         * 'foo.[ch]'
         *        |----< p
         */
        if (p >= name &&
            (strchr(p, '[') != NULL || strchr(p, ']') != NULL)) {
                for (p = name; *p != '\0'; p++)
                        if (*p == ']' || *p == '[')
                                break;
                p--;

                /*
                 * cannot find a non-meta character, give up
                 * '*\*[a-z]'
                 *    |-------< p
                 */
                if (p >= name && strchr(LOCATE_REG, *p) != NULL)
                        p = name - 1;
        }

        if (p < name)
                /* only meta chars: "???", force '/' search */
                *subp++ = '/';

        else {
                for (endmark = p; p >= name; p--)
                        if (strchr(LOCATE_REG, *p) != NULL)
                                break;
                for (++p;
                    (p <= endmark) && subp < (globfree + sizeof(globfree));)
                        *subp++ = *p++;
        }
        *subp = '\0';
        return(--subp);
}


/*
 * Read integer from mmap pointer.
 * Essential a simple  ``return *(int *)p'' but avoid sigbus
 * for integer alignment.
 *
 * Convert network byte order to host byte order if necessary.
 */

int
getwm(caddr_t p)
{
        union {
                char buf[sizeof(int)];
                int i;
        } u;
        int i;

        for (i = 0; i < sizeof(int); i++)
                u.buf[i] = *p++;

        i = u.i;

        if (i > PATH_MAX || i < -(PATH_MAX)) {
                i = ntohl(i);
                if (i > PATH_MAX || i < -(PATH_MAX)) {
                        (void)fprintf(stderr,
                            "integer out of +-PATH_MAX (%d): %d\n",
                            PATH_MAX, i);
                        exit(1);
                }
        }
        return(i);
}

/*
 * Read integer from stream.
 *
 * Convert network byte order to host byte order if necessary.
 * So we can read on FreeBSD/i386 (little endian) a locate database
 * which was built on SunOS/sparc (big endian).
 */

int
getwf(FILE *fp)
{
        int word;

        word = getw(fp);

        if (word > PATH_MAX || word < -(PATH_MAX)) {
                word = ntohl(word);
                if (word > PATH_MAX || word < -(PATH_MAX)) {
                        (void)fprintf(stderr,
                            "integer out of +-PATH_MAX (%d): %d\n",
                            PATH_MAX, word);
                        exit(1);
                }
        }
        return(word);
}