root/usr/src/cmd/filesync/files.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License, Version 1.0 only
 * (the "License").  You may not use this file except in compliance
 * with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright (c) 1995 Sun Microsystems, Inc.  All Rights Reserved
 *
 * module:
 *      files.c
 *
 * purpose:
 *      routines to examine and manipulate file names
 *
 * contents:
 *      qualify ... ensure that a name is fully qualified
 *      expand  ... expand env variables within a string or file name
 *      noblanks .. ensure that a name contains no embdded unescaped blanks
 *      lex ....... a lexer that can handle escaped/embedded blanks
 *      wildcards . see whether or not a name contains wild cards
 *      prefix .... does one string begin with another
 *      suffix .... does one string end with another
 *      contains .. does one string contain another
 *
 *      cannonize (static) ...  compress redundant "." and ".." out of name
 *
 * notes:
 *      we are interested in embedded blanks because international
 *      character sets and non-unix file systems can both contain
 *      the byte 0x20.  Thus, whenever we record a filename in
 *      file, we must be careful to escape any embedded blanks that
 *      cause trouble when we re-lex that file later.
 */
#ident  "%W%    %E% SMI"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <unistd.h>

#include "filesync.h"
#include "messages.h"

static void cannonize(char *name);

/*
 * routine:
 *      qualify
 *
 * purpose:
 *      to fully qualify a name
 *
 * parameters:
 *      name to be qualified
 *
 * returns:
 *      either original pointer or copy to a new (malloced) buffer
 *
 * notes:
 *      someday I may conclude that I should always make a copy
 *      so that the caller can know that it is safe to free the parm
 *
 *      I thought about this and concluded that there is never a need
 *      to fully qualify a string containing variables.  If the string
 *      came from the command line, the variables were already expanded
 *      and if it came from the rules data base it is required to already
 *      be fully qualified.
 */
char *
qualify(char *name)
{
        char namebuf[ MAX_PATH ];

        /* in the simple case, the parameter is already there */
        if (*name == '/') {
                cannonize(name);
                return (name);
        }

        /* things that begin with variables get the benefit of the doubt */
        if (*name == '$') {
                cannonize(name);
                return (name);
        }

        /* start with the current working directory     */
        if (getcwd(namebuf, sizeof (namebuf)) == 0) {
                fprintf(stderr, gettext(ERR_nocwd), name);
                exit(ERR_OTHER);
        }

        /* make sure we have room for our file name     */
        if ((strlen(namebuf) + strlen(name) + 2) >= sizeof (namebuf)) {
                fprintf(stderr, gettext(ERR_longname), name);
                exit(ERR_OTHER);
        }

        /* append the specified file name to it */
        strcat(namebuf, "/");
        strcat(namebuf, name);

        /* filter out redundant dots    */
        cannonize(namebuf);

        if (opt_debug & DBG_VARS)
                fprintf(stderr, "VARS: QUALIFY %s to %s\n", name, namebuf);

        /* and return a newly malloc'd copy     */
        return (strdup(namebuf));
}

/*
 * routine:
 *      expand
 *
 * purpose:
 *      to expand variable names within a string
 *
 * parameters:
 *      string to be expanded.  Variable references always begin
 *      with a $ and are delimited by parens or curleys.
 *
 * returns:
 *      either original pointer or a copy to a new (malloced) buffer
 *
 * notes:
 *      someday I may conclude that I should always make a copy
 *      so that the caller can know that it is safe to free the parm
 *
 *      someday I may decide to support escape conventions for embedding
 *      $(){} in file names, but I suspec that day will never come.
 *
 *      I thought about this and concluded there was no reason to
 *      fully qualify these names, because the only names that should
 *      need qualification are src/dst lines from the command line,
 *      and the shell should have handled those for me.  Once something
 *      makes it into the database, it is expected to be fully qualified
 *      already.
 *
 *      We are limited to producing strings of length MAX_PATH or less
 *      and variable names of length MAX_NAME or less.  In practice,
 *      these limitations should not be a problem.
 */
char *
expand(char *name)
{       const char *s;
        char *p, *v;
        char delim;
        char namebuf[ MAX_PATH ];
        char varbuf[ MAX_NAME ];

        /* first see if there are no variables to be bound */
        for (s = name; *s && *s != '$'; s++);
        if (*s == 0)
                return (name);

        /* move through the string, copying and expanding       */
        for (s = name, p = namebuf; *s; s++) {

                /* check for overflow   */
                if (p >= &namebuf[ MAX_PATH ]) {
                        fprintf(stderr, gettext(ERR_longname), name);
                        exit(ERR_OTHER);
                }

                /* normal characters, we just copy              */
                if (*s != '$') {
                        *p++ = *s;
                        continue;
                }

                /* figure out how the variable name is delimited */
                delim = *++s;
                if (delim == '(') {
                        delim = ')';
                        s++;
                } else if (delim == '{') {
                        delim = '}';
                        s++;
                } else
                        delim = 0;

                /* copy the variable name up to the closing delimiter */
                for (v = varbuf; *s; s++) {
                        if (isalnum(*s) || (*s == '_') ||
                                (delim && *s != delim))
                                *v++ = *s;
                        else
                                break;

                        /* make sure we don't overflow var name buffer  */
                        if (v >= &varbuf[MAX_NAME - 1]) {
                                *v = 0;
                                fprintf(stderr, gettext(ERR_longname), varbuf);
                                exit(ERR_OTHER);
                        }
                }

                *v = 0;

                /* FIX THIS ... there must be a more elegant way */
                /* we may have to back up because s will be bumped */
                if (delim == 0 || *s != delim)
                        s--;

                /* look up the variable                         */
                v = getenv(varbuf);
                if (v == 0 || *v == 0) {
                        fprintf(stderr, gettext(ERR_undef), varbuf);
                        return (0);
                }

                /* copy the variable into the buffer            */
                while (*v)
                        *p++ = *v++;
        }

        /* null terminate the copy      */
        *p = 0;

        /* compress out any redundant dots and dot-dots */
        cannonize(namebuf);

        if (opt_debug & DBG_VARS)
                fprintf(stderr, "VARS: EXPAND %s to %s\n", name, namebuf);

        /* and return a newly malloc'd copy     */
        return (strdup(namebuf));
}

/*
 * routine:
 *      noblanks
 *
 * purpose:
 *      to ensure that a name contains no unescaped embedded blanks
 *
 * parameters:
 *      pointer to name
 *
 * returns:
 *      pointer to name or pointer to buffer containing escaped version of name
 *
 * notes:
 *      this routine can be called on full file names, and so can
 *      conceivably require an arbitrarily large buffer.
 */
const char *
noblanks(const char *name)
{
        const char *s;
        char *p;
        static char *namebuf = 0;
        static int buflen = 0;
        int l;

        /* first see if there are no embedded blanks    */
        for (s = name; *s && *s != ' '; s++);
        if (*s == 0)
                return (name);

        /* make sure we have a buffer large enough for the worst case   */
        l = 4 + (2*strlen(name));
        for (buflen = MAX_PATH; buflen < l; buflen += MAX_NAME);
        namebuf = (char *) realloc(namebuf, buflen);

        /* quote the name, and copy it, escaping quotes */
        p = namebuf;
        *p++ = '"';

        for (s = name; *s; s++) {
                if (*s == '"' || *s == '\\')
                        *p++ = '\\';
                *p++ = *s;
        }

        *p++ = '"';
        *p = 0;

        return (namebuf);
}

/*
 * routine:
 *      lex
 *
 * purpose:
 *      my own version of strtok that handles quoting and escaping
 *
 * parameters:
 *      FILE structure for file to read (0 for same string, same file)
 *
 * returns:
 *      pointer to next token
 *
 * notes:
 *      this routine makes no changes to the string it is passed,
 *      copying tokens into a static buffer.
 *
 *      this routine handles continuation lines after reading and
 *      before the lexing even starts.  This limits continued lines
 *      to a length of MAX_LINE, but keeps everything else very simple.
 *      We also, therefore, limit tokens to a maximum length of MAX_LINE.
 */
int lex_linenum;                /* line number in current input file    */

char *
lex(FILE *file)
{       char c, delim;
        char *p;
        char *s;
        static char *savep;
        static char namebuf[ MAX_LINE ];
        static char inbuf[ MAX_LINE ];

        if (file) {                     /* read a new line              */
                p = inbuf + sizeof (inbuf);

                /* read the next input line, with all continuations     */
                for (s = inbuf; savep = fgets(s, p - s, file); ) {
                        lex_linenum++;

                        /* go find the last character of the input line */
                        while (*s && s[1])
                                s++;
                        if (*s == '\n')
                                s--;

                        /* see whether or not we need a continuation    */
                        if (s < inbuf || *s != '\\')
                                break;

                        continue;
                }

                if (savep == 0)
                        return (0);

                s = inbuf;
        } else {                        /* continue with old line       */
                if (savep == 0)
                        return (0);
                s = savep;
        }
        savep = 0;

        /* skip over leading white space        */
        while (isspace(*s))
                s++;
        if (*s == 0)
                return (0);

        /* see if this is a quoted string       */
        c = *s;
        if (c == '\'' || c == '"') {
                delim = c;
                s++;
        } else
                delim = 0;

        /* copy the token into the buffer       */
        for (p = namebuf; (c = *s) != 0; s++) {
                /* literal escape               */
                if (c == '\\') {
                        s++;
                        *p++ = *s;
                        continue;
                }

                /* closing delimiter            */
                if (c == delim) {
                        s++;
                        break;
                }

                /* delimiting white space       */
                if (delim == 0 && isspace(c))
                        break;

                /* ordinary characters          */
                *p++ = *s;
        }


        /* remember where we left off           */
        savep = *s ? s : 0;

        /* null terminate and return the buffer */
        *p = 0;
        return (namebuf);
}

/*
 * routine:
 *      wildcards
 *
 * purpose:
 *      determine whether or not there are any wild cards in a name
 *
 * parameters:
 *      name to be checked
 *
 * returns:
 *      true/false
 *
 * notes:
 *      we use this to take shortcuts
 */
bool_t
wildcards(const char *name)
{       const char *s;
        int literal = 0;

        for (s = name; *s; s++)
                if (literal)
                        switch (*s) {
                                case '\'':      /* end of literal string */
                                        literal = 0;
                                        continue;
                                case '\\':      /* escape next character */
                                        s++;
                                        continue;
                        }
                else
                        switch (*s) {
                                case '\'':      /* literal string       */
                                        literal = 1;
                                        continue;
                                case '\\':      /* escape next character */
                                        s++;
                                        continue;
                                case '*':
                                case '[':
                                case '{':
                                case '?':
                                        /* any of these is a wild card  */
                                        return (TRUE);
                        }

        return (FALSE);
}

/*
 * routine:
 *      cannonize
 *
 * purpose:
 *      to compress redundant dots out of a path
 *
 * parameters:
 *      file name in an editable buffer
 *
 * returns:
 *      void
 *
 * notes:
 *      because we compress the string in place, there is no danger
 *      of our overflowing any fixed sized buffer.
 */
static void
cannonize(char *name)
{       char *s, *p;

        /* leading dot-slashes  */
        for (s = name; *s == '.' && s[1] == '/'; strcpy(s, &s[2]));

        for (s = name; *s; s++) {
                /* interesting things happen after slashes      */
                if (*s != '/')
                        continue;

                /* embedded dot-slashes */
                while (s[1] == '.' && s[2] == '/')
                        strcpy(&s[1], &s[3]);

                /* embedded slash-dot-dot-slash */
                if (strncmp(s, "/../", 4) == 0) {
                        /* scan backwards to eliminate last directory */
                        for (p = s-1; p > name && *p != '/'; p--);

                        if (p < name)
                                p = name;
                        strcpy(p, &s[3]);
                }

                continue;
        }
}

/*
 * routine:
 *      prefix
 *
 * purpose:
 *      determine whether or not one string begins with another
 *
 * parameters:
 *      string to be tested
 *      suspected prefix
 *
 * returns:
 *      no      0
 *      yes     pointer character after prefix
 */
const char *
prefix(const char *s, const char *p)
{
        while (*p)
                if (*p++ != *s++)
                        return (0);

        return (s);
}

/*
 * routine:
 *      suffix
 *
 * purpose:
 *      determine whether or not one string ends with another
 *
 * parameters:
 *      string to be tested
 *      suspected suffix
 *
 * returns:
 *      true/false
 */
bool_t
suffix(const char *str, const char *suf)
{       const char *s;

        /* go to where the alleged suffix would start */
        for (s = str; *s; s++);
        s -= strlen(suf);
        if (s < str)
                return (FALSE);

        /* see if the string ends with the suffix */
        while (*suf)
                if (*suf++ != *s++)
                        return (FALSE);

        return (TRUE);
}

/*
 * routine:
 *      contains
 *
 * purpose:
 *      determine whether or not one string contains another
 *
 * parameters:
 *      string to be checked
 *      pattern we are seeking
 *
 * returns:
 *      true/false
 */
bool_t
contains(const char *str, const char *pat)
{       const char *s, *p;

        while (*str) {
                if (*str++ == *pat) {
                        for (s = str, p = &pat[1]; *s == *p; s++, p++)
                                if (p[1] == 0)
                                        return (TRUE);
                }
        }

        return (FALSE);
}