root/usr.bin/vi/ex/ex_subst.c
/*      $OpenBSD: ex_subst.c,v 1.31 2023/06/23 15:06:45 millert Exp $   */

/*-
 * Copyright (c) 1992, 1993, 1994
 *      The Regents of the University of California.  All rights reserved.
 * Copyright (c) 1992, 1993, 1994, 1995, 1996
 *      Keith Bostic.  All rights reserved.
 *
 * See the LICENSE file for redistribution information.
 */

#include "config.h"

#include <sys/queue.h>
#include <sys/time.h>

#include <bitstring.h>
#include <ctype.h>
#include <errno.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#include "../common/common.h"
#include "../vi/vi.h"

#define MAXIMUM(a, b)   (((a) > (b)) ? (a) : (b))

#define SUB_FIRST       0x01            /* The 'r' flag isn't reasonable. */
#define SUB_MUSTSETR    0x02            /* The 'r' flag is required. */

static int re_conv(SCR *, char **, size_t *, int *);
static int re_sub(SCR *, char *, char **, size_t *, size_t *, regmatch_t [10]);
static int re_tag_conv(SCR *, char **, size_t *, int *);
static int s(SCR *, EXCMD *, char *, regex_t *, u_int);

/*
 * ex_s --
 *      [line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]]
 *
 *      Substitute on lines matching a pattern.
 *
 * PUBLIC: int ex_s(SCR *, EXCMD *);
 */
int
ex_s(SCR *sp, EXCMD *cmdp)
{
        regex_t *re;
        size_t blen, len;
        u_int flags;
        int delim;
        char *bp, *ptrn, *rep, *p, *t;

        /*
         * Skip leading white space.
         *
         * !!!
         * Historic vi allowed any non-alphanumeric to serve as the
         * substitution command delimiter.
         *
         * !!!
         * If the arguments are empty, it's the same as &, i.e. we
         * repeat the last substitution.
         */
        if (cmdp->argc == 0)
                goto subagain;
        for (p = cmdp->argv[0]->bp,
            len = cmdp->argv[0]->len; len > 0; --len, ++p) {
                if (!isblank(*p))
                        break;
        }
        if (len == 0)
subagain:       return (ex_subagain(sp, cmdp));

        delim = *p++;
        if (isalnum(delim) || delim == '\\')
                return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR));

        /*
         * !!!
         * The full-blown substitute command reset the remembered
         * state of the 'c' and 'g' suffices.
         */
        sp->c_suffix = sp->g_suffix = 0;

        /*
         * Get the pattern string, toss escaping characters.
         *
         * !!!
         * Historic vi accepted any of the following forms:
         *
         *      :s/abc/def/             change "abc" to "def"
         *      :s/abc/def              change "abc" to "def"
         *      :s/abc/                 delete "abc"
         *      :s/abc                  delete "abc"
         *
         * QUOTING NOTE:
         *
         * Only toss an escaping character if it escapes a delimiter.
         * This means that "s/A/\\\\f" replaces "A" with "\\f".  It
         * would be nice to be more regular, i.e. for each layer of
         * escaping a single escaping character is removed, but that's
         * not how the historic vi worked.
         */
        for (ptrn = t = p;;) {
                if (p[0] == '\0' || p[0] == delim) {
                        if (p[0] == delim)
                                ++p;
                        /*
                         * !!!
                         * Nul terminate the pattern string -- it's passed
                         * to regcomp which doesn't understand anything else.
                         */
                        *t = '\0';
                        break;
                }
                if (p[0] == '\\') {
                        if (p[1] == delim)
                                ++p;
                        else if (p[1] == '\\')
                                *t++ = *p++;
                }
                *t++ = *p++;
        }

        /*
         * If the pattern string is empty, use the last RE (not just the
         * last substitution RE).
         */
        if (*ptrn == '\0') {
                if (sp->re == NULL) {
                        ex_emsg(sp, NULL, EXM_NOPREVRE);
                        return (1);
                }

                /* Re-compile the RE if necessary. */
                if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
                    sp->re, sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH))
                        return (1);
                flags = 0;
        } else {
                /*
                 * !!!
                 * Compile the RE.  Historic practice is that substitutes set
                 * the search direction as well as both substitute and search
                 * RE's.  We compile the RE twice, as we don't want to bother
                 * ref counting the pattern string and (opaque) structure.
                 */
                if (re_compile(sp, ptrn, t - ptrn,
                    &sp->re, &sp->re_len, &sp->re_c, RE_C_SEARCH))
                        return (1);
                if (re_compile(sp, ptrn, t - ptrn,
                    &sp->subre, &sp->subre_len, &sp->subre_c, RE_C_SUBST))
                        return (1);
                
                flags = SUB_FIRST;
                sp->searchdir = FORWARD;
        }
        re = &sp->re_c;

        /*
         * Get the replacement string.
         *
         * The special character & (\& if O_MAGIC not set) matches the
         * entire RE.  No handling of & is required here, it's done by
         * re_sub().
         *
         * The special character ~ (\~ if O_MAGIC not set) inserts the
         * previous replacement string into this replacement string.
         * Count ~'s to figure out how much space we need.  We could
         * special case nonexistent last patterns or whether or not
         * O_MAGIC is set, but it's probably not worth the effort.
         *
         * QUOTING NOTE:
         *
         * Only toss an escaping character if it escapes a delimiter or
         * if O_MAGIC is set and it escapes a tilde.
         *
         * !!!
         * If the entire replacement pattern is "%", then use the last
         * replacement pattern.  This semantic was added to vi in System
         * V and then percolated elsewhere, presumably around the time
         * that it was added to their version of ed(1).
         */
        if (p[0] == '\0' || p[0] == delim) {
                if (p[0] == delim)
                        ++p;
                free(sp->repl);
                sp->repl = NULL;
                sp->repl_len = 0;
        } else if (p[0] == '%' && (p[1] == '\0' || p[1] == delim))
                p += p[1] == delim ? 2 : 1;
        else {
                for (rep = p, len = 0;
                    p[0] != '\0' && p[0] != delim; ++p, ++len)
                        if (p[0] == '~')
                                len += sp->repl_len;
                GET_SPACE_RET(sp, bp, blen, len);
                for (t = bp, len = 0, p = rep;;) {
                        if (p[0] == '\0' || p[0] == delim) {
                                if (p[0] == delim)
                                        ++p;
                                break;
                        }
                        if (p[0] == '\\') {
                                if (p[1] == delim)
                                        ++p;
                                else if (p[1] == '\\') {
                                        *t++ = *p++;
                                        ++len;
                                } else if (p[1] == '~') {
                                        ++p;
                                        if (!O_ISSET(sp, O_MAGIC))
                                                goto tilde;
                                }
                        } else if (p[0] == '~' && O_ISSET(sp, O_MAGIC)) {
tilde:                          ++p;
                                memcpy(t, sp->repl, sp->repl_len);
                                t += sp->repl_len;
                                len += sp->repl_len;
                                continue;
                        }
                        *t++ = *p++;
                        ++len;
                }
                if ((sp->repl_len = len) != 0) {
                        free(sp->repl);
                        if ((sp->repl = malloc(len)) == NULL) {
                                msgq(sp, M_SYSERR, NULL);
                                FREE_SPACE(sp, bp, blen);
                                return (1);
                        }
                        memcpy(sp->repl, bp, len);
                }
                FREE_SPACE(sp, bp, blen);
        }
        return (s(sp, cmdp, p, re, flags));
}

/*
 * ex_subagain --
 *      [line [,line]] & [cgr] [count] [#lp]]
 *
 *      Substitute using the last substitute RE and replacement pattern.
 *
 * PUBLIC: int ex_subagain(SCR *, EXCMD *);
 */
int
ex_subagain(SCR *sp, EXCMD *cmdp)
{
        if (sp->subre == NULL) {
                ex_emsg(sp, NULL, EXM_NOPREVRE);
                return (1);
        }
        if (!F_ISSET(sp, SC_RE_SUBST) && re_compile(sp,
            sp->subre, sp->subre_len, NULL, NULL, &sp->subre_c, RE_C_SUBST))
                return (1);
        return (s(sp,
            cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->subre_c, 0));
}

/*
 * ex_subtilde --
 *      [line [,line]] ~ [cgr] [count] [#lp]]
 *
 *      Substitute using the last RE and last substitute replacement pattern.
 *
 * PUBLIC: int ex_subtilde(SCR *, EXCMD *);
 */
int
ex_subtilde(SCR *sp, EXCMD *cmdp)
{
        if (sp->re == NULL) {
                ex_emsg(sp, NULL, EXM_NOPREVRE);
                return (1);
        }
        if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
            sp->re, sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH))
                return (1);
        return (s(sp,
            cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->re_c, 0));
}

/*
 * s --
 * Do the substitution.  This stuff is *really* tricky.  There are lots of
 * special cases, and general nastiness.  Don't mess with it unless you're
 * pretty confident.
 * 
 * The nasty part of the substitution is what happens when the replacement
 * string contains newlines.  It's a bit tricky -- consider the information
 * that has to be retained for "s/f\(o\)o/^M\1^M\1/".  The solution here is
 * to build a set of newline offsets which we use to break the line up later,
 * when the replacement is done.  Don't change it unless you're *damned*
 * confident.
 */
#define NEEDNEWLINE(sp) {                                               \
        if ((sp)->newl_len == (sp)->newl_cnt) {                         \
                (sp)->newl_len += 25;                                   \
                REALLOCARRAY((sp), (sp)->newl,                          \
                    (sp)->newl_len, sizeof(size_t));                    \
                if ((sp)->newl == NULL) {                               \
                        (sp)->newl_len = 0;                             \
                        return (1);                                     \
                }                                                       \
        }                                                               \
}

#define BUILD(sp, l, len) {                                             \
        if (lbclen + (len) > lblen) {                                   \
                lblen += MAXIMUM(lbclen + (len), 256);                  \
                REALLOC((sp), lb, lblen);                               \
                if (lb == NULL) {                                       \
                        lbclen = 0;                                     \
                        return (1);                                     \
                }                                                       \
        }                                                               \
        memcpy(lb + lbclen, (l), (len));                                \
        lbclen += (len);                                                \
}

#define NEEDSP(sp, len, pnt) {                                          \
        if (lbclen + (len) > lblen) {                                   \
                lblen += MAXIMUM(lbclen + (len), 256);                  \
                REALLOC((sp), lb, lblen);                               \
                if (lb == NULL) {                                       \
                        lbclen = 0;                                     \
                        return (1);                                     \
                }                                                       \
                (pnt) = lb + lbclen;                                    \
        }                                                               \
}

static int
s(SCR *sp, EXCMD *cmdp, char *s, regex_t *re, u_int flags)
{
        EVENT ev;
        MARK from, to;
        TEXTH tiq;
        recno_t elno, lno, slno;
        regmatch_t match[10];
        size_t blen, cnt, last, lbclen, lblen, len, llen;
        size_t offset, saved_offset, scno;
        int lflag, nflag, pflag, rflag;
        int didsub, do_eol_match, eflags, nempty, eval;
        int linechanged, matched, quit, rval;
        unsigned long ul;
        char *bp, *lb;

        NEEDFILE(sp, cmdp);

        slno = sp->lno;
        scno = sp->cno;

        /*
         * !!!
         * Historically, the 'g' and 'c' suffices were always toggled as flags,
         * so ":s/A/B/" was the same as ":s/A/B/ccgg".  If O_EDCOMPATIBLE was
         * not set, they were initialized to 0 for all substitute commands.  If
         * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user
         * specified substitute/replacement patterns (see ex_s()).
         */
        if (!O_ISSET(sp, O_EDCOMPATIBLE))
                sp->c_suffix = sp->g_suffix = 0;

        /*
         * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but
         * it only displayed the last change.  I'd disallow them, but they are
         * useful in combination with the [v]global commands.  In the current
         * model the problem is combining them with the 'c' flag -- the screen
         * would have to flip back and forth between the confirm screen and the
         * ex print screen, which would be pretty awful.  We do display all
         * changes, though, for what that's worth.
         *
         * !!!
         * Historic vi was fairly strict about the order of "options", the
         * count, and "flags".  I'm somewhat fuzzy on the difference between
         * options and flags, anyway, so this is a simpler approach, and we
         * just take it them in whatever order the user gives them.  (The ex
         * usage statement doesn't reflect this.)
         */
        lflag = nflag = pflag = rflag = 0;
        if (s == NULL)
                goto noargs;
        for (lno = OOBLNO; *s != '\0'; ++s)
                switch (*s) {
                case ' ':
                case '\t':
                        continue;
                case '+':
                        ++cmdp->flagoff;
                        break;
                case '-':
                        --cmdp->flagoff;
                        break;
                case '0': case '1': case '2': case '3': case '4':
                case '5': case '6': case '7': case '8': case '9':
                        if (lno != OOBLNO)
                                goto usage;
                        errno = 0;
                        if ((ul = strtoul(s, &s, 10)) >= UINT_MAX)
                                errno = ERANGE;
                        if (*s == '\0')         /* Loop increment correction. */
                                --s;
                        if (errno == ERANGE) {
                                if (ul >= UINT_MAX)
                                        msgq(sp, M_ERR, "Count overflow");
                                else
                                        msgq(sp, M_SYSERR, NULL);
                                return (1);
                        }
                        lno = (recno_t)ul;
                        /*
                         * In historic vi, the count was inclusive from the
                         * second address.
                         */
                        cmdp->addr1.lno = cmdp->addr2.lno;
                        cmdp->addr2.lno += lno - 1;
                        if (!db_exist(sp, cmdp->addr2.lno) &&
                            db_last(sp, &cmdp->addr2.lno))
                                return (1);
                        break;
                case '#':
                        nflag = 1;
                        break;
                case 'c':
                        sp->c_suffix = !sp->c_suffix;

                        /* Ex text structure initialization. */
                        if (F_ISSET(sp, SC_EX)) {
                                memset(&tiq, 0, sizeof(TEXTH));
                                TAILQ_INIT(&tiq);
                        }
                        break;
                case 'g':
                        sp->g_suffix = !sp->g_suffix;
                        break;
                case 'l':
                        lflag = 1;
                        break;
                case 'p':
                        pflag = 1;
                        break;
                case 'r':
                        if (LF_ISSET(SUB_FIRST)) {
                                msgq(sp, M_ERR,
                    "Regular expression specified; r flag meaningless");
                                return (1);
                        }
                        if (!F_ISSET(sp, SC_RE_SEARCH)) {
                                ex_emsg(sp, NULL, EXM_NOPREVRE);
                                return (1);
                        }
                        rflag = 1;
                        re = &sp->re_c;
                        break;
                default:
                        goto usage;
                }

        if (*s != '\0' || (!rflag && LF_ISSET(SUB_MUSTSETR))) {
usage:          ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE);
                return (1);
        }

noargs: if (F_ISSET(sp, SC_VI) && sp->c_suffix && (lflag || nflag || pflag)) {
                msgq(sp, M_ERR,
"The #, l and p flags may not be combined with the c flag in vi mode");
                return (1);
        }

        /*
         * bp:          if interactive, line cache
         * blen:        if interactive, line cache length
         * lb:          build buffer pointer.
         * lbclen:      current length of built buffer.
         * lblen;       length of build buffer.
         */
        bp = lb = NULL;
        blen = lbclen = lblen = 0;

        /* For each line... */
        for (matched = quit = 0, lno = cmdp->addr1.lno,
            elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) {

                /* Someone's unhappy, time to stop. */
                if (INTERRUPTED(sp))
                        break;

                /* Get the line. */
                if (db_get(sp, lno, DBG_FATAL, &s, &llen))
                        goto err;

                /*
                 * Make a local copy if doing confirmation -- when calling
                 * the confirm routine we're likely to lose the cached copy.
                 */
                if (sp->c_suffix) {
                        if (bp == NULL) {
                                GET_SPACE_RET(sp, bp, blen, llen);
                        } else
                                ADD_SPACE_RET(sp, bp, blen, llen);
                        memcpy(bp, s, llen);
                        s = bp;
                }

                /* Start searching from the beginning. */
                offset = 0;
                len = llen;

                /* Reset the build buffer offset. */
                lbclen = 0;

                /* Reset empty match test variable. */
                nempty = -1;

                /*
                 * We don't want to have to do a setline if the line didn't
                 * change -- keep track of whether or not this line changed.
                 * If doing confirmations, don't want to keep setting the
                 * line if change is refused -- keep track of substitutions.
                 */
                didsub = linechanged = 0;

                /* New line, do an EOL match. */
                do_eol_match = 1;

                /* It's not nul terminated, but we pretend it is. */
                eflags = REG_STARTEND;

                /* The search area is from s + offset to the EOL.  */
nextmatch:      match[0].rm_so = offset;
                match[0].rm_eo = llen;

                /* Get the next match. */
                eval = regexec(re, (char *)s, 10, match, eflags);

                /*
                 * There wasn't a match or if there was an error, deal with
                 * it.  If there was a previous match in this line, resolve
                 * the changes into the database.  Otherwise, just move on.
                 */
                if (eval == REG_NOMATCH)
                        goto endmatch;
                if (eval != 0) {
                        re_error(sp, eval, re);
                        goto err;
                }
                matched = 1;

                /* Only the first search can match an anchored expression. */
                eflags |= REG_NOTBOL;

                /*
                 * !!!
                 * It's possible to match 0-length strings -- for example, the
                 * command s;a*;X;, when matched against the string "aabb" will
                 * result in "XbXbX", i.e. the matches are "aa", the space
                 * between the b's and the space between the b's and the end of
                 * the string.  There is a similar space between the beginning
                 * of the string and the a's.  The rule that we use (because vi
                 * historically used it) is that any 0-length match, occurring
                 * immediately after a match, is ignored.  Otherwise, the above
                 * example would have resulted in "XXbXbX".  Another example is
                 * incorrectly using " *" to replace groups of spaces with one
                 * space.
                 *
                 * If the match is empty and at the same place as the end of the
                 * previous match, ignore the match and move forward.  If
                 * there's no more characters in the string, we were
                 * attempting to match after the last character, so quit.
                 */
                if (match[0].rm_so == nempty && match[0].rm_eo == nempty) {
                        nempty = -1;
                        if (len == 0)
                                goto endmatch;
                        BUILD(sp, s + offset, 1)
                        ++offset;
                        --len;
                        goto nextmatch;
                }

                /* Confirm change. */
                if (sp->c_suffix) {
                        /*
                         * Set the cursor position for confirmation.  Note,
                         * if we matched on a '$', the cursor may be past
                         * the end of line.
                         */
                        from.lno = to.lno = lno;
                        from.cno = match[0].rm_so;
                        to.cno = match[0].rm_eo;
                        /*
                         * Both ex and vi have to correct for a change before
                         * the first character in the line.
                         */
                        if (llen == 0)
                                from.cno = to.cno = 0;
                        if (F_ISSET(sp, SC_VI)) {
                                /*
                                 * Only vi has to correct for a change after
                                 * the last character in the line.
                                 *
                                 * XXX
                                 * It would be nice to change the vi code so
                                 * that we could display a cursor past EOL.
                                 */
                                if (to.cno >= llen)
                                        to.cno = llen - 1;
                                if (from.cno >= llen)
                                        from.cno = llen - 1;

                                sp->lno = from.lno;
                                sp->cno = from.cno;
                                if (vs_refresh(sp, 1))
                                        goto err;

                                vs_update(sp, "Confirm change? [n]", NULL);

                                if (v_event_get(sp, &ev, 0, 0))
                                        goto err;
                                switch (ev.e_event) {
                                case E_CHARACTER:
                                        break;
                                case E_EOF:
                                case E_ERR:
                                case E_INTERRUPT:
                                        goto lquit;
                                default:
                                        v_event_err(sp, &ev);
                                        goto lquit;
                                }
                        } else {
                                const int flags =
                                    O_ISSET(sp, O_NUMBER) ? E_C_HASH : 0;
                                if (ex_print(sp, cmdp, &from, &to, flags) ||
                                    ex_scprint(sp, &from, &to))
                                        goto lquit;
                                if (ex_txt(sp, &tiq, 0, TXT_CR))
                                        goto err;
                                ev.e_c = TAILQ_FIRST(&tiq)->lb[0];
                        }

                        switch (ev.e_c) {
                        case CH_YES:
                                break;
                        default:
                        case CH_NO:
                                didsub = 0;
                                BUILD(sp, s + offset, match[0].rm_eo - offset);
                                goto skip;
                        case CH_QUIT:
                                /* Set the quit/interrupted flags. */
lquit:                          quit = 1;
                                F_SET(sp->gp, G_INTERRUPTED);

                                /*
                                 * Resolve any changes, then return to (and
                                 * exit from) the main loop.
                                 */
                                goto endmatch;
                        }
                }

                /*
                 * Set the cursor to the last position changed, converting
                 * from 1-based to 0-based.
                 */
                sp->lno = lno;
                sp->cno = match[0].rm_so;

                /* Copy the bytes before the match into the build buffer. */
                BUILD(sp, s + offset, match[0].rm_so - offset);

                /* Substitute the matching bytes. */
                didsub = 1;
                if (re_sub(sp, s, &lb, &lbclen, &lblen, match))
                        goto err;

                /* Set the change flag so we know this line was modified. */
                linechanged = 1;

                /* Move past the matched bytes. */
skip:           offset = match[0].rm_eo;
                len = llen - match[0].rm_eo;

                /* A match cannot be followed by an empty pattern. */
                nempty = match[0].rm_eo;

                /*
                 * If doing a global change with confirmation, we have to
                 * update the screen.  The basic idea is to store the line
                 * so the screen update routines can find it, and restart.
                 */
                if (didsub && sp->c_suffix && sp->g_suffix) {
                        /*
                         * The new search offset will be the end of the
                         * modified line.
                         */
                        saved_offset = lbclen;

                        /* Copy the rest of the line. */
                        if (len)
                                BUILD(sp, s + offset, len)

                        /* Set the new offset. */
                        offset = saved_offset;

                        /* Store inserted lines, adjusting the build buffer. */
                        last = 0;
                        if (sp->newl_cnt) {
                                for (cnt = 0;
                                    cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
                                        if (db_insert(sp, lno,
                                            lb + last, sp->newl[cnt] - last))
                                                goto err;
                                        last = sp->newl[cnt] + 1;
                                        ++sp->rptlines[L_ADDED];
                                }
                                lbclen -= last;
                                offset -= last;
                                sp->newl_cnt = 0;
                        }

                        /* Store and retrieve the line. */
                        if (db_set(sp, lno, lb + last, lbclen))
                                goto err;
                        if (db_get(sp, lno, DBG_FATAL, &s, &llen))
                                goto err;
                        ADD_SPACE_RET(sp, bp, blen, llen)
                        memcpy(bp, s, llen);
                        s = bp;
                        len = llen - offset;

                        /* Restart the build. */
                        lbclen = 0;
                        BUILD(sp, s, offset);

                        /*
                         * If we haven't already done the after-the-string
                         * match, do one.  Set REG_NOTEOL so the '$' pattern
                         * only matches once.
                         */
                        if (!do_eol_match)
                                goto endmatch;
                        if (offset == len) {
                                do_eol_match = 0;
                                eflags |= REG_NOTEOL;
                        }
                        goto nextmatch;
                }

                /*
                 * If it's a global:
                 *
                 * If at the end of the string, do a test for the after
                 * the string match.  Set REG_NOTEOL so the '$' pattern
                 * only matches once.
                 */
                if (sp->g_suffix && do_eol_match) {
                        if (len == 0) {
                                do_eol_match = 0;
                                eflags |= REG_NOTEOL;
                        }
                        goto nextmatch;
                }

endmatch:       if (!linechanged)
                        continue;

                /* Copy any remaining bytes into the build buffer. */
                if (len)
                        BUILD(sp, s + offset, len)

                /* Store inserted lines, adjusting the build buffer. */
                last = 0;
                if (sp->newl_cnt) {
                        for (cnt = 0;
                            cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
                                if (db_insert(sp,
                                    lno, lb + last, sp->newl[cnt] - last))
                                        goto err;
                                last = sp->newl[cnt] + 1;
                                ++sp->rptlines[L_ADDED];
                        }
                        lbclen -= last;
                        sp->newl_cnt = 0;
                }

                /* Store the changed line. */
                if (db_set(sp, lno, lb + last, lbclen))
                        goto err;

                /* Update changed line counter. */
                if (sp->rptlchange != lno) {
                        sp->rptlchange = lno;
                        ++sp->rptlines[L_CHANGED];
                }

                /*
                 * !!!
                 * Display as necessary.  Historic practice is to only
                 * display the last line of a line split into multiple
                 * lines.
                 */
                if (lflag || nflag || pflag) {
                        from.lno = to.lno = lno;
                        from.cno = to.cno = 0;
                        if (lflag)
                                (void)ex_print(sp, cmdp, &from, &to, E_C_LIST);
                        if (nflag)
                                (void)ex_print(sp, cmdp, &from, &to, E_C_HASH);
                        if (pflag)
                                (void)ex_print(sp, cmdp, &from, &to, E_C_PRINT);
                }
        }

        /*
         * !!!
         * Historically, vi attempted to leave the cursor at the same place if
         * the substitution was done at the current cursor position.  Otherwise
         * it moved it to the first non-blank of the last line changed.  There
         * were some problems: for example, :s/$/foo/ with the cursor on the
         * last character of the line left the cursor on the last character, or
         * the & command with multiple occurrences of the matching string in the
         * line usually left the cursor in a fairly random position.
         *
         * We try to do the same thing, with the exception that if the user is
         * doing substitution with confirmation, we move to the last line about
         * which the user was consulted, as opposed to the last line that they
         * actually changed.  This prevents a screen flash if the user doesn't
         * change many of the possible lines.
         */
        if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) {
                sp->cno = 0;
                (void)nonblank(sp, sp->lno, &sp->cno);
        }

        /*
         * If not in a global command, and nothing matched, say so.
         * Else, if none of the lines displayed, put something up.
         */
        rval = 0;
        if (!matched) {
                if (!F_ISSET(sp, SC_EX_GLOBAL)) {
                        msgq(sp, M_ERR, "No match found");
                        goto err;
                }
        } else if (!lflag && !nflag && !pflag)
                F_SET(cmdp, E_AUTOPRINT);

        if (0) {
err:            rval = 1;
        }

        if (bp != NULL)
                FREE_SPACE(sp, bp, blen);
        free(lb);
        return (rval);
}

/*
 * re_compile --
 *      Compile the RE.
 *
 * PUBLIC: int re_compile(SCR *,
 * PUBLIC:     char *, size_t, char **, size_t *, regex_t *, u_int);
 */
int
re_compile(SCR *sp, char *ptrn, size_t plen, char **ptrnp, size_t *lenp,
    regex_t *rep, u_int flags)
{
        size_t len;
        int reflags, replaced, rval;
        char *p;

        /* Set RE flags. */
        reflags = 0;
        if (!LF_ISSET(RE_C_TAG)) {
                if (O_ISSET(sp, O_EXTENDED))
                        reflags |= REG_EXTENDED;
                if (O_ISSET(sp, O_IGNORECASE))
                        reflags |= REG_ICASE;
                if (O_ISSET(sp, O_ICLOWER)) {
                        for (p = ptrn, len = plen; len > 0; ++p, --len)
                                if (isupper(*p))
                                        break;
                        if (len == 0)
                                reflags |= REG_ICASE;
                }
        }

        /* If we're replacing a saved value, clear the old one. */
        if (LF_ISSET(RE_C_SEARCH) && F_ISSET(sp, SC_RE_SEARCH)) {
                regfree(&sp->re_c);
                F_CLR(sp, SC_RE_SEARCH);
        }
        if (LF_ISSET(RE_C_SUBST) && F_ISSET(sp, SC_RE_SUBST)) {
                regfree(&sp->subre_c);
                F_CLR(sp, SC_RE_SUBST);
        }

        /*
         * If we're saving the string, it's a pattern we haven't seen before,
         * so convert the vi-style RE's to POSIX 1003.2 RE's.  Save a copy for
         * later recompilation.   Free any previously saved value.
         */
        if (ptrnp != NULL) {
                if (LF_ISSET(RE_C_TAG)) {
                        if (re_tag_conv(sp, &ptrn, &plen, &replaced))
                                return (1);
                } else
                        if (re_conv(sp, &ptrn, &plen, &replaced))
                                return (1);

                /* Discard previous pattern. */
                free(*ptrnp);
                *ptrnp = NULL;
                if (lenp != NULL)
                        *lenp = plen;

                /*
                 * Copy the string into allocated memory.
                 *
                 * XXX
                 * Regcomp isn't 8-bit clean, so the pattern is nul-terminated
                 * for now.  There's just no other solution.  
                 */
                MALLOC(sp, *ptrnp, plen + 1);
                if (*ptrnp != NULL) {
                        memcpy(*ptrnp, ptrn, plen);
                        (*ptrnp)[plen] = '\0';
                }

                /* Free up conversion-routine-allocated memory. */
                if (replaced)
                        FREE_SPACE(sp, ptrn, 0);

                if (*ptrnp == NULL)
                        return (1);

                ptrn = *ptrnp;
        }

        /*
         * XXX
         * Regcomp isn't 8-bit clean, so we just lost if the pattern
         * contained a nul.  Bummer!
         */
        if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) {
                if (!LF_ISSET(RE_C_SILENT))
                        re_error(sp, rval, rep); 
                return (1);
        }

        if (LF_ISSET(RE_C_SEARCH))
                F_SET(sp, SC_RE_SEARCH);
        if (LF_ISSET(RE_C_SUBST))
                F_SET(sp, SC_RE_SUBST);

        return (0);
}

/*
 * re_conv --
 *      Convert vi's regular expressions into something that the
 *      the POSIX 1003.2 RE functions can handle.
 *
 * There are two conversions we make to make vi's RE's (specifically
 * the global, search, and substitute patterns) work with POSIX RE's.
 * We assume that \<ptrn\> does "word" searches, which is non-standard
 * but supported by most regexp libraries..
 *
 * 1: If O_MAGIC is not set, strip backslashes from the magic character
 *    set (.[*~) that have them, and add them to the ones that don't.
 * 2: If O_MAGIC is not set, the string "\~" is replaced with the text
 *    from the last substitute command's replacement string.  If O_MAGIC
 *    is set, it's the string "~".
 *
 * !!!/XXX
 * This doesn't exactly match the historic behavior of vi because we do
 * the ~ substitution before calling the RE engine, so magic characters
 * in the replacement string will be expanded by the RE engine, and they
 * weren't historically.  It's a bug.
 */
static int
re_conv(SCR *sp, char **ptrnp, size_t *plenp, int *replacedp)
{
        size_t blen, len, needlen;
        int magic;
        char *bp, *p, *t;

        /*
         * First pass through, we figure out how much space we'll need.
         * We do it in two passes, on the grounds that most of the time
         * the user is doing a search and won't have magic characters.
         * That way we can skip most of the memory allocation and copies.
         */
        magic = 0;
        for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len)
                switch (*p) {
                case '\\':
                        if (len > 1) {
                                --len;
                                switch (*++p) {
                                case '~':
                                        if (!O_ISSET(sp, O_MAGIC)) {
                                                magic = 1;
                                                needlen += sp->repl_len;
                                        }
                                        break;
                                case '.':
                                case '[':
                                case '*':
                                        if (!O_ISSET(sp, O_MAGIC)) {
                                                magic = 1;
                                                needlen += 1;
                                        }
                                        break;
                                default:
                                        needlen += 2;
                                }
                        } else
                                needlen += 1;
                        break;
                case '~':
                        if (O_ISSET(sp, O_MAGIC)) {
                                magic = 1;
                                needlen += sp->repl_len;
                        }
                        break;
                case '.':
                case '[':
                case '*':
                        if (!O_ISSET(sp, O_MAGIC)) {
                                magic = 1;
                                needlen += 2;
                        }
                        break;
                default:
                        needlen += 1;
                        break;
                }

        if (!magic) {
                *replacedp = 0;
                return (0);
        }

        /* Get enough memory to hold the final pattern. */
        *replacedp = 1;
        GET_SPACE_RET(sp, bp, blen, needlen);

        for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len)
                switch (*p) {
                case '\\':
                        if (len > 1) {
                                --len;
                                switch (*++p) {
                                case '~':
                                        if (O_ISSET(sp, O_MAGIC))
                                                *t++ = '~';
                                        else {
                                                memcpy(t,
                                                    sp->repl, sp->repl_len);
                                                t += sp->repl_len;
                                        }
                                        break;
                                case '.':
                                case '[':
                                case '*':
                                        if (O_ISSET(sp, O_MAGIC))
                                                *t++ = '\\';
                                        *t++ = *p;
                                        break;
                                default:
                                        *t++ = '\\';
                                        *t++ = *p;
                                }
                        } else
                                *t++ = '\\';
                        break;
                case '~':
                        if (O_ISSET(sp, O_MAGIC)) {
                                memcpy(t, sp->repl, sp->repl_len);
                                t += sp->repl_len;
                        } else
                                *t++ = '~';
                        break;
                case '.':
                case '[':
                case '*':
                        if (!O_ISSET(sp, O_MAGIC))
                                *t++ = '\\';
                        *t++ = *p;
                        break;
                default:
                        *t++ = *p;
                        break;
                }

        *ptrnp = bp;
        *plenp = t - bp;
        return (0);
}

/*
 * re_tag_conv --
 *      Convert a tags search path into something that the POSIX
 *      1003.2 RE functions can handle.
 */
static int
re_tag_conv(SCR *sp, char **ptrnp, size_t *plenp, int *replacedp)
{
        size_t blen, len;
        int lastdollar;
        char *bp, *p, *t;

        len = *plenp;

        /* Max memory usage is 2 times the length of the string. */
        *replacedp = 1;
        GET_SPACE_RET(sp, bp, blen, len * 2);

        p = *ptrnp;
        t = bp;

        /* If the last character is a '/' or '?', we just strip it. */
        if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?'))
                --len;

        /* If the next-to-last or last character is a '$', it's magic. */
        if (len > 0 && p[len - 1] == '$') {
                --len;
                lastdollar = 1;
        } else
                lastdollar = 0;

        /* If the first character is a '/' or '?', we just strip it. */
        if (len > 0 && (p[0] == '/' || p[0] == '?')) {
                ++p;
                --len;
        }

        /* If the first or second character is a '^', it's magic. */
        if (p[0] == '^') {
                *t++ = *p++;
                --len;
        }

        /*
         * Escape every other magic character we can find, meanwhile stripping
         * the backslashes ctags inserts when escaping the search delimiter
         * characters.
         */
        for (; len > 0; --len) {
                if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) {
                        ++p;
                        --len;
                } else if (strchr("^.[]$*", p[0]))
                        *t++ = '\\';
                *t++ = *p++;
                if (len == 0)
                        break;
        }
        if (lastdollar)
                *t++ = '$';

        *ptrnp = bp;
        *plenp = t - bp;
        return (0);
}

/*
 * re_error --
 *      Report a regular expression error.
 *
 * PUBLIC: void re_error(SCR *, int, regex_t *);
 */
void
re_error(SCR *sp, int errcode, regex_t *preg)
{
        size_t s;
        char *oe;

        s = regerror(errcode, preg, "", 0);
        if ((oe = malloc(s)) == NULL)
                msgq(sp, M_SYSERR, NULL);
        else {
                (void)regerror(errcode, preg, oe, s);
                msgq(sp, M_ERR, "RE error: %s", oe);
                free(oe);
        }
}

/*
 * re_sub --
 *      Do the substitution for a regular expression.
 */
static int
re_sub(SCR *sp, char *ip, char **lbp, size_t *lbclenp, size_t *lblenp,
    regmatch_t match[10])
{
        enum { C_NOTSET, C_LOWER, C_ONELOWER, C_ONEUPPER, C_UPPER } conv;
        size_t lbclen, lblen;           /* Local copies. */
        size_t mlen;                    /* Match length. */
        size_t rpl;                     /* Remaining replacement length. */
        char *rp;                       /* Replacement pointer. */
        int ch;
        int no;                         /* Match replacement offset. */
        char *p, *t;                    /* Buffer pointers. */
        char *lb;                       /* Local copies. */

        lb = *lbp;                      /* Get local copies. */
        lbclen = *lbclenp;
        lblen = *lblenp;

        /*
         * QUOTING NOTE:
         *
         * There are some special sequences that vi provides in the
         * replacement patterns.
         *       & string the RE matched (\& if nomagic set)
         *      \# n-th regular subexpression
         *      \E end \U, \L conversion
         *      \e end \U, \L conversion
         *      \l convert the next character to lower-case
         *      \L convert to lower-case, until \E, \e, or end of replacement
         *      \u convert the next character to upper-case
         *      \U convert to upper-case, until \E, \e, or end of replacement
         *
         * Otherwise, since this is the lowest level of replacement, discard
         * all escaping characters.  This (hopefully) matches historic practice.
         */
#define OUTCH(ch, nltrans) {                                            \
        CHAR_T __ch = (ch);                                             \
        u_int __value = KEY_VAL(sp, __ch);                              \
        if ((nltrans) && (__value == K_CR || __value == K_NL)) {        \
                NEEDNEWLINE(sp);                                        \
                sp->newl[sp->newl_cnt++] = lbclen;                      \
        } else if (conv != C_NOTSET) {                                  \
                switch (conv) {                                         \
                case C_ONELOWER:                                        \
                        conv = C_NOTSET;                                \
                        /* FALLTHROUGH */                               \
                case C_LOWER:                                           \
                        if (isupper(__ch))                              \
                                __ch = tolower(__ch);                   \
                        break;                                          \
                case C_ONEUPPER:                                        \
                        conv = C_NOTSET;                                \
                        /* FALLTHROUGH */                               \
                case C_UPPER:                                           \
                        if (islower(__ch))                              \
                                __ch = toupper(__ch);                   \
                        break;                                          \
                default:                                                \
                        abort();                                        \
                }                                                       \
        }                                                               \
        NEEDSP(sp, 1, p);                                               \
        *p++ = __ch;                                                    \
        ++lbclen;                                                       \
}
        conv = C_NOTSET;
        for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) {
                switch (ch = *rp++) {
                case '&':
                        if (O_ISSET(sp, O_MAGIC)) {
                                no = 0;
                                goto subzero;
                        }
                        break;
                case '\\':
                        if (rpl == 0)
                                break;
                        --rpl;
                        switch (ch = *rp) {
                        case '&':
                                ++rp;
                                if (!O_ISSET(sp, O_MAGIC)) {
                                        no = 0;
                                        goto subzero;
                                }
                                break;
                        case '0': case '1': case '2': case '3': case '4':
                        case '5': case '6': case '7': case '8': case '9':
                                no = *rp++ - '0';
subzero:                        if (match[no].rm_so == -1 ||
                                    match[no].rm_eo == -1)
                                        break;
                                mlen = match[no].rm_eo - match[no].rm_so;
                                for (t = ip + match[no].rm_so; mlen--; ++t)
                                        OUTCH(*t, 0);
                                continue;
                        case 'e':
                        case 'E':
                                ++rp;
                                conv = C_NOTSET;
                                continue;
                        case 'l':
                                ++rp;
                                conv = C_ONELOWER;
                                continue;
                        case 'L':
                                ++rp;
                                conv = C_LOWER;
                                continue;
                        case 'u':
                                ++rp;
                                conv = C_ONEUPPER;
                                continue;
                        case 'U':
                                ++rp;
                                conv = C_UPPER;
                                continue;
                        default:
                                ++rp;
                                break;
                        }
                }
                OUTCH(ch, 1);
        }

        *lbp = lb;                      /* Update caller's information. */
        *lbclenp = lbclen;
        *lblenp = lblen;
        return (0);
}