root/lib/libc/regex/engine.c
/*-
 * SPDX-License-Identifier: BSD-3-Clause
 *
 * Copyright (c) 1992, 1993, 1994 Henry Spencer.
 * Copyright (c) 1992, 1993, 1994
 *      The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * Henry Spencer.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <stdbool.h>

/*
 * The matching engine and friends.  This file is #included by regexec.c
 * after suitable #defines of a variety of macros used herein, so that
 * different state representations can be used without duplicating masses
 * of code.
 */

#ifdef SNAMES
#define stepback sstepback
#define matcher smatcher
#define walk    swalk
#define dissect sdissect
#define backref sbackref
#define step    sstep
#define print   sprint
#define at      sat
#define match   smat
#endif
#ifdef LNAMES
#define stepback lstepback
#define matcher lmatcher
#define walk    lwalk
#define dissect ldissect
#define backref lbackref
#define step    lstep
#define print   lprint
#define at      lat
#define match   lmat
#endif
#ifdef MNAMES
#define stepback mstepback
#define matcher mmatcher
#define walk    mwalk
#define dissect mdissect
#define backref mbackref
#define step    mstep
#define print   mprint
#define at      mat
#define match   mmat
#endif

/* another structure passed up and down to avoid zillions of parameters */
struct match {
        struct re_guts *g;
        int eflags;
        regmatch_t *pmatch;     /* [nsub+1] (0 element unused) */
        const char *offp;       /* offsets work from here */
        const char *beginp;     /* start of string -- virtual NUL precedes */
        const char *endp;       /* end of string -- virtual NUL here */
        const char *coldp;      /* can be no match starting before here */
        const char **lastpos;   /* [nplus+1] */
        STATEVARS;
        states st;              /* current states */
        states fresh;           /* states for a fresh start */
        states tmp;             /* temporary */
        states empty;           /* empty set of states */
        mbstate_t mbs;          /* multibyte conversion state */
};

/* ========= begin header generated by ./mkh ========= */
#ifdef __cplusplus
extern "C" {
#endif

/* === engine.c === */
static int matcher(struct re_guts *g, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
static const char *dissect(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
static const char *backref(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, sopno lev, int);
static const char *walk(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, bool fast);
static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_t ch, states aft, int sflags);
#define MAX_RECURSION   100
#define BOL     (OUT-1)
#define EOL     (BOL-1)
#define BOLEOL  (BOL-2)
#define NOTHING (BOL-3)
#define BOW     (BOL-4)
#define EOW     (BOL-5)
#define BADCHAR (BOL-6)
#define NWBND   (BOL-7)
#define NONCHAR(c)      ((c) <= OUT)
/* sflags */
#define SBOS    0x0001
#define SEOS    0x0002

#ifdef REDEBUG
static void print(struct match *m, const char *caption, states st, int ch, FILE *d);
#endif
#ifdef REDEBUG
static void at(struct match *m, const char *title, const char *start, const char *stop, sopno startst, sopno stopst);
#endif
#ifdef REDEBUG
static const char *pchar(int ch);
#endif

#ifdef __cplusplus
}
#endif
/* ========= end header generated by ./mkh ========= */

#ifdef REDEBUG
#define SP(t, s, c)     print(m, t, s, c, stdout)
#define AT(t, p1, p2, s1, s2)   at(m, t, p1, p2, s1, s2)
#define NOTE(str)       { if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }
#else
#define SP(t, s, c)     /* nothing */
#define AT(t, p1, p2, s1, s2)   /* nothing */
#define NOTE(s) /* nothing */
#endif

/*
 * Given a multibyte string pointed to by start, step back nchar characters
 * from current position pointed to by cur.
 */
static const char *
stepback(const char *start, const char *cur, int nchar)
{
        const char *ret;
        int wc, mbc;
        mbstate_t mbs;
        size_t clen;

        if (MB_CUR_MAX == 1)
                return ((cur - nchar) > start ? cur - nchar : NULL);

        ret = cur;
        for (wc = nchar; wc > 0; wc--) {
                for (mbc = 1; mbc <= MB_CUR_MAX; mbc++) {
                        if ((ret - mbc) < start)
                                return (NULL);
                        memset(&mbs, 0, sizeof(mbs));
                        clen = mbrtowc(NULL, ret - mbc, mbc, &mbs);
                        if (clen != (size_t)-1 && clen != (size_t)-2)
                                break;
                }
                if (mbc > MB_CUR_MAX)
                        return (NULL);
                ret -= mbc;
        }

        return (ret);
}

/*
 - matcher - the actual matching engine
 == static int matcher(struct re_guts *g, const char *string, \
 ==     size_t nmatch, regmatch_t pmatch[], int eflags);
 */
static int                      /* 0 success, REG_NOMATCH failure */
matcher(struct re_guts *g,
        const char *string,
        size_t nmatch,
        regmatch_t pmatch[],
        int eflags)
{
        const char *endp;
        size_t i;
        struct match mv;
        struct match *m = &mv;
        const char *dp = NULL;
        const sopno gf = g->firststate+1;       /* +1 for OEND */
        const sopno gl = g->laststate;
        const char *start;
        const char *stop;
        /* Boyer-Moore algorithms variables */
        const char *pp;
        int cj, mj;
        const char *mustfirst;
        const char *mustlast;
        int *matchjump;
        int *charjump;

        /* simplify the situation where possible */
        if (g->cflags&REG_NOSUB)
                nmatch = 0;
        if (eflags&REG_STARTEND) {
                start = string + pmatch[0].rm_so;
                stop = string + pmatch[0].rm_eo;
        } else {
                start = string;
                stop = start + strlen(start);
        }
        if (stop < start)
                return(REG_INVARG);

        /* prescreening; this does wonders for this rather slow code */
        if (g->must != NULL) {
                if (g->charjump != NULL && g->matchjump != NULL) {
                        mustfirst = g->must;
                        mustlast = g->must + g->mlen - 1;
                        charjump = g->charjump;
                        matchjump = g->matchjump;
                        pp = mustlast;
                        for (dp = start+g->mlen-1; dp < stop;) {
                                /* Fast skip non-matches */
                                while (dp < stop && charjump[(int)*dp])
                                        dp += charjump[(int)*dp];

                                if (dp >= stop)
                                        break;

                                /* Greedy matcher */
                                /* We depend on not being used for
                                 * for strings of length 1
                                 */
                                while (*--dp == *--pp && pp != mustfirst);

                                if (*dp == *pp)
                                        break;

                                /* Jump to next possible match */
                                mj = matchjump[pp - mustfirst];
                                cj = charjump[(int)*dp];
                                dp += (cj < mj ? mj : cj);
                                pp = mustlast;
                        }
                        if (pp != mustfirst)
                                return(REG_NOMATCH);
                } else {
                        for (dp = start; dp < stop; dp++)
                                if (*dp == g->must[0] &&
                                    stop - dp >= g->mlen &&
                                    memcmp(dp, g->must, (size_t)g->mlen) == 0)
                                        break;
                        if (dp == stop)         /* we didn't find g->must */
                                return(REG_NOMATCH);
                }
        }

        /* match struct setup */
        m->g = g;
        m->eflags = eflags;
        m->pmatch = NULL;
        m->lastpos = NULL;
        m->offp = string;
        m->beginp = start;
        m->endp = stop;
        STATESETUP(m, 4);
        SETUP(m->st);
        SETUP(m->fresh);
        SETUP(m->tmp);
        SETUP(m->empty);
        CLEAR(m->empty);
        ZAPSTATE(&m->mbs);

        /* Adjust start according to moffset, to speed things up */
        if (dp != NULL && g->moffset > -1) {
                const char *nstart;

                nstart = stepback(start, dp, g->moffset);
                if (nstart != NULL)
                        start = nstart;
        }

        SP("mloop", m->st, *start);

        /* this loop does only one repetition except for backrefs */
        for (;;) {
                endp = walk(m, start, stop, gf, gl, true);
                if (endp == NULL) {             /* a miss */
                        if (m->pmatch != NULL)
                                free((char *)m->pmatch);
                        if (m->lastpos != NULL)
                                free((char *)m->lastpos);
                        STATETEARDOWN(m);
                        return(REG_NOMATCH);
                }
                if (nmatch == 0 && !g->backrefs)
                        break;          /* no further info needed */

                /* where? */
                assert(m->coldp != NULL);
                for (;;) {
                        NOTE("finding start");
                        endp = walk(m, m->coldp, stop, gf, gl, false);
                        if (endp != NULL)
                                break;
                        assert(m->coldp < m->endp);
                        m->coldp += XMBRTOWC(NULL, m->coldp,
                            m->endp - m->coldp, &m->mbs, 0);
                }
                if (nmatch == 1 && !g->backrefs)
                        break;          /* no further info needed */

                /* oh my, he wants the subexpressions... */
                if (m->pmatch == NULL)
                        m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
                                                        sizeof(regmatch_t));
                if (m->pmatch == NULL) {
                        STATETEARDOWN(m);
                        return(REG_ESPACE);
                }
                for (i = 1; i <= m->g->nsub; i++)
                        m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
                if (!g->backrefs && !(m->eflags&REG_BACKR)) {
                        NOTE("dissecting");
                        dp = dissect(m, m->coldp, endp, gf, gl);
                } else {
                        if (g->nplus > 0 && m->lastpos == NULL)
                                m->lastpos = malloc((g->nplus+1) *
                                                sizeof(const char *));
                        if (g->nplus > 0 && m->lastpos == NULL) {
                                free(m->pmatch);
                                STATETEARDOWN(m);
                                return(REG_ESPACE);
                        }
                        NOTE("backref dissect");
                        dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
                }
                if (dp != NULL)
                        break;

                /* uh-oh... we couldn't find a subexpression-level match */
                assert(g->backrefs);    /* must be back references doing it */
                assert(g->nplus == 0 || m->lastpos != NULL);
                for (;;) {
                        if (dp != NULL || endp <= m->coldp)
                                break;          /* defeat */
                        NOTE("backoff");
                        endp = walk(m, m->coldp, endp-1, gf, gl, false);
                        if (endp == NULL)
                                break;          /* defeat */
                        /* try it on a shorter possibility */
#ifndef NDEBUG
                        for (i = 1; i <= m->g->nsub; i++) {
                                assert(m->pmatch[i].rm_so == -1);
                                assert(m->pmatch[i].rm_eo == -1);
                        }
#endif
                        NOTE("backoff dissect");
                        dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
                }
                assert(dp == NULL || dp == endp);
                if (dp != NULL)         /* found a shorter one */
                        break;

                /* despite initial appearances, there is no match here */
                NOTE("false alarm");
                /* recycle starting later */
                start = m->coldp + XMBRTOWC(NULL, m->coldp,
                    stop - m->coldp, &m->mbs, 0);
                assert(start <= stop);
        }

        /* fill in the details if requested */
        if (nmatch > 0) {
                pmatch[0].rm_so = m->coldp - m->offp;
                pmatch[0].rm_eo = endp - m->offp;
        }
        if (nmatch > 1) {
                assert(m->pmatch != NULL);
                for (i = 1; i < nmatch; i++)
                        if (i <= m->g->nsub)
                                pmatch[i] = m->pmatch[i];
                        else {
                                pmatch[i].rm_so = -1;
                                pmatch[i].rm_eo = -1;
                        }
        }

        if (m->pmatch != NULL)
                free((char *)m->pmatch);
        if (m->lastpos != NULL)
                free((char *)m->lastpos);
        STATETEARDOWN(m);
        return(0);
}

/*
 - dissect - figure out what matched what, no back references
 == static const char *dissect(struct match *m, const char *start, \
 ==     const char *stop, sopno startst, sopno stopst);
 */
static const char *             /* == stop (success) always */
dissect(struct match *m,
        const char *start,
        const char *stop,
        sopno startst,
        sopno stopst)
{
        int i;
        sopno ss;               /* start sop of current subRE */
        sopno es;               /* end sop of current subRE */
        const char *sp;         /* start of string matched by it */
        const char *stp;        /* string matched by it cannot pass here */
        const char *rest;       /* start of rest of string */
        const char *tail;       /* string unmatched by rest of RE */
        sopno ssub;             /* start sop of subsubRE */
        sopno esub;             /* end sop of subsubRE */
        const char *ssp;        /* start of string matched by subsubRE */
        const char *sep;        /* end of string matched by subsubRE */
        const char *oldssp;     /* previous ssp */
        const char *dp __unused;

        AT("diss", start, stop, startst, stopst);
        sp = start;
        for (ss = startst; ss < stopst; ss = es) {
                /* identify end of subRE */
                es = ss;
                switch (OP(m->g->strip[es])) {
                case OPLUS_:
                case OQUEST_:
                        es += OPND(m->g->strip[es]);
                        break;
                case OCH_:
                        while (OP(m->g->strip[es]) != (sop)O_CH)
                                es += OPND(m->g->strip[es]);
                        break;
                }
                es++;

                /* figure out what it matched */
                switch (OP(m->g->strip[ss])) {
                case OEND:
                        assert(nope);
                        break;
                case OCHAR:
                        sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0);
                        break;
                case OBOL:
                case OEOL:
                case OBOW:
                case OEOW:
                case OBOS:
                case OEOS:
                case OWBND:
                case ONWBND:
                        break;
                case OANY:
                case OANYOF:
                        sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0);
                        break;
                case OBACK_:
                case O_BACK:
                        assert(nope);
                        break;
                /* cases where length of match is hard to find */
                case OQUEST_:
                        stp = stop;
                        for (;;) {
                                /* how long could this one be? */
                                rest = walk(m, sp, stp, ss, es, false);
                                assert(rest != NULL);   /* it did match */
                                /* could the rest match the rest? */
                                tail = walk(m, rest, stop, es, stopst, false);
                                if (tail == stop)
                                        break;          /* yes! */
                                /* no -- try a shorter match for this one */
                                stp = rest - 1;
                                assert(stp >= sp);      /* it did work */
                        }
                        ssub = ss + 1;
                        esub = es - 1;
                        /* did innards match? */
                        if (walk(m, sp, rest, ssub, esub, false) != NULL) {
                                dp = dissect(m, sp, rest, ssub, esub);
                                assert(dp == rest);
                        } else          /* no */
                                assert(sp == rest);
                        sp = rest;
                        break;
                case OPLUS_:
                        stp = stop;
                        for (;;) {
                                /* how long could this one be? */
                                rest = walk(m, sp, stp, ss, es, false);
                                assert(rest != NULL);   /* it did match */
                                /* could the rest match the rest? */
                                tail = walk(m, rest, stop, es, stopst, false);
                                if (tail == stop)
                                        break;          /* yes! */
                                /* no -- try a shorter match for this one */
                                stp = rest - 1;
                                assert(stp >= sp);      /* it did work */
                        }
                        ssub = ss + 1;
                        esub = es - 1;
                        ssp = sp;
                        oldssp = ssp;
                        for (;;) {      /* find last match of innards */
                                sep = walk(m, ssp, rest, ssub, esub, false);
                                if (sep == NULL || sep == ssp)
                                        break;  /* failed or matched null */
                                oldssp = ssp;   /* on to next try */
                                ssp = sep;
                        }
                        if (sep == NULL) {
                                /* last successful match */
                                sep = ssp;
                                ssp = oldssp;
                        }
                        assert(sep == rest);    /* must exhaust substring */
                        assert(walk(m, ssp, sep, ssub, esub, false) == rest);
                        dp = dissect(m, ssp, sep, ssub, esub);
                        assert(dp == sep);
                        sp = rest;
                        break;
                case OCH_:
                        stp = stop;
                        for (;;) {
                                /* how long could this one be? */
                                rest = walk(m, sp, stp, ss, es, false);
                                assert(rest != NULL);   /* it did match */
                                /* could the rest match the rest? */
                                tail = walk(m, rest, stop, es, stopst, false);
                                if (tail == stop)
                                        break;          /* yes! */
                                /* no -- try a shorter match for this one */
                                stp = rest - 1;
                                assert(stp >= sp);      /* it did work */
                        }
                        ssub = ss + 1;
                        esub = ss + OPND(m->g->strip[ss]) - 1;
                        assert(OP(m->g->strip[esub]) == OOR1);
                        for (;;) {      /* find first matching branch */
                                if (walk(m, sp, rest, ssub, esub, false) == rest)
                                        break;  /* it matched all of it */
                                /* that one missed, try next one */
                                assert(OP(m->g->strip[esub]) == OOR1);
                                esub++;
                                assert(OP(m->g->strip[esub]) == OOR2);
                                ssub = esub + 1;
                                esub += OPND(m->g->strip[esub]);
                                if (OP(m->g->strip[esub]) == (sop)OOR2)
                                        esub--;
                                else
                                        assert(OP(m->g->strip[esub]) == O_CH);
                        }
                        dp = dissect(m, sp, rest, ssub, esub);
                        assert(dp == rest);
                        sp = rest;
                        break;
                case O_PLUS:
                case O_QUEST:
                case OOR1:
                case OOR2:
                case O_CH:
                        assert(nope);
                        break;
                case OLPAREN:
                        i = OPND(m->g->strip[ss]);
                        assert(0 < i && i <= m->g->nsub);
                        m->pmatch[i].rm_so = sp - m->offp;
                        break;
                case ORPAREN:
                        i = OPND(m->g->strip[ss]);
                        assert(0 < i && i <= m->g->nsub);
                        m->pmatch[i].rm_eo = sp - m->offp;
                        break;
                default:                /* uh oh */
                        assert(nope);
                        break;
                }
        }

        assert(sp == stop);
        return(sp);
}

#define ISBOW(m, sp)                                    \
    (sp < m->endp && ISWORD(*sp) &&                     \
    ((sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||    \
    (sp > m->offp && !ISWORD(*(sp-1)))))
#define ISEOW(m, sp)                                    \
    (((sp == m->endp && !(m->eflags&REG_NOTEOL)) ||     \
    (sp < m->endp && *sp == '\n' &&                     \
    (m->g->cflags&REG_NEWLINE)) ||                      \
    (sp < m->endp && !ISWORD(*sp)) ) &&                 \
    (sp > m->beginp && ISWORD(*(sp-1))))                \

/*
 - backref - figure out what matched what, figuring in back references
 == static const char *backref(struct match *m, const char *start, \
 ==     const char *stop, sopno startst, sopno stopst, sopno lev);
 */
static const char *             /* == stop (success) or NULL (failure) */
backref(struct match *m,
        const char *start,
        const char *stop,
        sopno startst,
        sopno stopst,
        sopno lev,              /* PLUS nesting level */
        int rec)
{
        int i;
        sopno ss;               /* start sop of current subRE */
        const char *sp;         /* start of string matched by it */
        sopno ssub;             /* start sop of subsubRE */
        sopno esub;             /* end sop of subsubRE */
        const char *ssp;        /* start of string matched by subsubRE */
        const char *dp;
        size_t len;
        int hard;
        sop s;
        regoff_t offsave;
        cset *cs;
        wint_t wc;

        AT("back", start, stop, startst, stopst);
        sp = start;

        /* get as far as we can with easy stuff */
        hard = 0;
        for (ss = startst; !hard && ss < stopst; ss++)
                switch (OP(s = m->g->strip[ss])) {
                case OCHAR:
                        if (sp == stop)
                                return(NULL);
                        sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR);
                        if (wc != OPND(s))
                                return(NULL);
                        break;
                case OANY:
                        if (sp == stop)
                                return(NULL);
                        sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR);
                        if (wc == BADCHAR)
                                return (NULL);
                        break;
                case OANYOF:
                        if (sp == stop)
                                return (NULL);
                        cs = &m->g->sets[OPND(s)];
                        sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR);
                        if (wc == BADCHAR || !CHIN(cs, wc))
                                return(NULL);
                        break;
                case OBOS:
                        if (sp == m->beginp && (m->eflags & REG_NOTBOL) == 0)
                                { /* yes */ }
                        else
                                return(NULL);
                        break;
                case OEOS:
                        if (sp == m->endp && (m->eflags & REG_NOTEOL) == 0)
                                { /* yes */ }
                        else
                                return(NULL);
                        break;
                case OBOL:
                        if ((sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
                            (sp > m->offp && sp < m->endp &&
                            *(sp-1) == '\n' && (m->g->cflags&REG_NEWLINE)))
                                { /* yes */ }
                        else
                                return(NULL);
                        break;
                case OEOL:
                        if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
                                        (sp < m->endp && *sp == '\n' &&
                                                (m->g->cflags&REG_NEWLINE)) )
                                { /* yes */ }
                        else
                                return(NULL);
                        break;
                case OWBND:
                        if (ISBOW(m, sp) || ISEOW(m, sp))
                                { /* yes */ }
                        else
                                return(NULL);
                        break;
                case ONWBND:
                        if (((sp == m->beginp) && !ISWORD(*sp)) ||
                            (sp == m->endp && !ISWORD(*(sp - 1))))
                                { /* yes, beginning/end of subject */ }
                        else if (ISWORD(*(sp - 1)) == ISWORD(*sp))
                                { /* yes, beginning/end of subject */ }
                        else
                                return(NULL);
                        break;
                case OBOW:
                        if (ISBOW(m, sp))
                                { /* yes */ }
                        else
                                return(NULL);
                        break;
                case OEOW:
                        if (ISEOW(m, sp))
                                { /* yes */ }
                        else
                                return(NULL);
                        break;
                case O_QUEST:
                        break;
                case OOR1:      /* matches null but needs to skip */
                        ss++;
                        s = m->g->strip[ss];
                        do {
                                assert(OP(s) == OOR2);
                                ss += OPND(s);
                        } while (OP(s = m->g->strip[ss]) != (sop)O_CH);
                        /* note that the ss++ gets us past the O_CH */
                        break;
                default:        /* have to make a choice */
                        hard = 1;
                        break;
                }
        if (!hard) {            /* that was it! */
                if (sp != stop)
                        return(NULL);
                return(sp);
        }
        ss--;                   /* adjust for the for's final increment */

        /* the hard stuff */
        AT("hard", sp, stop, ss, stopst);
        s = m->g->strip[ss];
        switch (OP(s)) {
        case OBACK_:            /* the vilest depths */
                i = OPND(s);
                assert(0 < i && i <= m->g->nsub);
                if (m->pmatch[i].rm_eo == -1)
                        return(NULL);
                assert(m->pmatch[i].rm_so != -1);
                len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
                if (len == 0 && rec++ > MAX_RECURSION)
                        return(NULL);
                assert(stop - m->beginp >= len);
                if (sp > stop - len)
                        return(NULL);   /* not enough left to match */
                ssp = m->offp + m->pmatch[i].rm_so;
                if (memcmp(sp, ssp, len) != 0)
                        return(NULL);
                while (m->g->strip[ss] != (sop)SOP(O_BACK, i))
                        ss++;
                return(backref(m, sp+len, stop, ss+1, stopst, lev, rec));
        case OQUEST_:           /* to null or not */
                dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
                if (dp != NULL)
                        return(dp);     /* not */
                return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec));
        case OPLUS_:
                assert(m->lastpos != NULL);
                assert(lev+1 <= m->g->nplus);
                m->lastpos[lev+1] = sp;
                return(backref(m, sp, stop, ss+1, stopst, lev+1, rec));
        case O_PLUS:
                if (sp == m->lastpos[lev])      /* last pass matched null */
                        return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
                /* try another pass */
                m->lastpos[lev] = sp;
                dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec);
                if (dp == NULL)
                        return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
                else
                        return(dp);
        case OCH_:              /* find the right one, if any */
                ssub = ss + 1;
                esub = ss + OPND(s) - 1;
                assert(OP(m->g->strip[esub]) == OOR1);
                for (;;) {      /* find first matching branch */
                        dp = backref(m, sp, stop, ssub, esub, lev, rec);
                        if (dp != NULL)
                                return(dp);
                        /* that one missed, try next one */
                        if (OP(m->g->strip[esub]) == (sop)O_CH)
                                return(NULL);   /* there is none */
                        esub++;
                        assert(OP(m->g->strip[esub]) == (sop)OOR2);
                        ssub = esub + 1;
                        esub += OPND(m->g->strip[esub]);
                        if (OP(m->g->strip[esub]) == (sop)OOR2)
                                esub--;
                        else
                                assert(OP(m->g->strip[esub]) == O_CH);
                }
                /* NOTREACHED */
                break;
        case OLPAREN:           /* must undo assignment if rest fails */
                i = OPND(s);
                assert(0 < i && i <= m->g->nsub);
                offsave = m->pmatch[i].rm_so;
                m->pmatch[i].rm_so = sp - m->offp;
                dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
                if (dp != NULL)
                        return(dp);
                m->pmatch[i].rm_so = offsave;
                return(NULL);
        case ORPAREN:           /* must undo assignment if rest fails */
                i = OPND(s);
                assert(0 < i && i <= m->g->nsub);
                offsave = m->pmatch[i].rm_eo;
                m->pmatch[i].rm_eo = sp - m->offp;
                dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
                if (dp != NULL)
                        return(dp);
                m->pmatch[i].rm_eo = offsave;
                return(NULL);
        default:                /* uh oh */
                assert(nope);
                break;
        }

        /* "can't happen" */
        assert(nope);
        /* NOTREACHED */
        return "shut up gcc";
}

/*
 - walk - step through the string either quickly or slowly
 == static const char *walk(struct match *m, const char *start, \
 ==     const char *stop, sopno startst, sopno stopst, bool fast);
 */
static const char * /* where it ended, or NULL */
walk(struct match *m, const char *start, const char *stop, sopno startst,
        sopno stopst, bool fast)
{
        states st = m->st;
        states fresh = m->fresh;
        states empty = m->empty;
        states tmp = m->tmp;
        const char *p = start;
        wint_t c;
        wint_t lastc;           /* previous c */
        wint_t flagch;
        int i, sflags;
        const char *matchp;     /* last p at which a match ended */
        size_t clen;

        sflags = 0;
        AT("slow", start, stop, startst, stopst);
        CLEAR(st);
        SET1(st, startst);
        SP("sstart", st, *p);
        st = step(m->g, startst, stopst, st, NOTHING, st, sflags);
        if (fast)
                ASSIGN(fresh, st);
        matchp = NULL;
        if (start == m->offp || (start == m->beginp && !(m->eflags&REG_NOTBOL)))
                c = OUT;
        else {
                /*
                 * XXX Wrong if the previous character was multi-byte.
                 * Newline never is (in encodings supported by FreeBSD),
                 * so this only breaks the ISWORD tests below.
                 */
                c = (uch)*(start - 1);
        }
        for (;;) {
                /* next character */
                lastc = c;
                sflags = 0;
                if (p == m->endp) {
                        c = OUT;
                        clen = 0;
                } else
                        clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR);

                if (fast && EQ(st, fresh))
                        matchp = p;

                /* is there an EOL and/or BOL between lastc and c? */
                flagch = '\0';
                i = 0;
                if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
                                (lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
                        flagch = BOL;
                        i = m->g->nbol;
                }
                if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
                                (c == OUT && !(m->eflags&REG_NOTEOL)) ) {
                        flagch = (flagch == BOL) ? BOLEOL : EOL;
                        i += m->g->neol;
                }
                if (lastc == OUT && (m->eflags & REG_NOTBOL) == 0) {
                        sflags |= SBOS;
                        /* Step one more for BOS. */
                        i++;
                }
                if (c == OUT && (m->eflags & REG_NOTEOL) == 0) {
                        sflags |= SEOS;
                        /* Step one more for EOS. */
                        i++;
                }
                if (i != 0) {
                        for (; i > 0; i--)
                                st = step(m->g, startst, stopst, st, flagch, st,
                                    sflags);
                        SP("sboleol", st, c);
                }

                /* how about a word boundary? */
                if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
                                        (c != OUT && ISWORD(c)) ) {
                        flagch = BOW;
                }
                if ( (lastc != OUT && ISWORD(lastc)) &&
                                (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
                        flagch = EOW;
                }
                if (flagch == BOW || flagch == EOW) {
                        st = step(m->g, startst, stopst, st, flagch, st, sflags);
                        SP("sboweow", st, c);
                }
                if (lastc != OUT && c != OUT &&
                    ISWORD(lastc) == ISWORD(c)) {
                        flagch = NWBND;
                } else if ((lastc == OUT && !ISWORD(c)) ||
                    (c == OUT && !ISWORD(lastc))) {
                        flagch = NWBND;
                }
                if (flagch == NWBND) {
                        st = step(m->g, startst, stopst, st, flagch, st, sflags);
                        SP("snwbnd", st, c);
                }

                /* are we done? */
                if (ISSET(st, stopst)) {
                        if (fast)
                                break;
                        else
                                matchp = p;
                }
                if (EQ(st, empty) || p == stop || clen > (size_t)(stop - p))
                        break;          /* NOTE BREAK OUT */

                /* no, we must deal with this character */
                ASSIGN(tmp, st);
                if (fast)
                        ASSIGN(st, fresh);
                else
                        ASSIGN(st, empty);
                assert(c != OUT);
                st = step(m->g, startst, stopst, tmp, c, st, sflags);
                SP("saft", st, c);
                assert(EQ(step(m->g, startst, stopst, st, NOTHING, st, sflags),
                    st));
                p += clen;
        }

        if (fast) {
                assert(matchp != NULL);
                m->coldp = matchp;
                if (ISSET(st, stopst))
                        return (p + XMBRTOWC(NULL, p, stop - p, &m->mbs, 0));
                else
                        return (NULL);
        } else
                return (matchp);
}

/*
 - step - map set of states reachable before char to set reachable after
 == static states step(struct re_guts *g, sopno start, sopno stop, \
 ==     states bef, int ch, states aft);
 == #define     BOL     (OUT-1)
 == #define     EOL     (BOL-1)
 == #define     BOLEOL  (BOL-2)
 == #define     NOTHING (BOL-3)
 == #define     BOW     (BOL-4)
 == #define     EOW     (BOL-5)
 == #define     BADCHAR (BOL-6)
 == #define     NONCHAR(c)      ((c) <= OUT)
 */
static states
step(struct re_guts *g,
        sopno start,            /* start state within strip */
        sopno stop,             /* state after stop state within strip */
        states bef,             /* states reachable before */
        wint_t ch,              /* character or NONCHAR code */
        states aft,             /* states already known reachable after */
        int sflags)             /* state flags */
{
        cset *cs;
        sop s;
        sopno pc;
        onestate here;          /* note, macros know this name */
        sopno look;
        int i;

        for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
                s = g->strip[pc];
                switch (OP(s)) {
                case OEND:
                        assert(pc == stop-1);
                        break;
                case OCHAR:
                        /* only characters can match */
                        assert(!NONCHAR(ch) || ch != OPND(s));
                        if (ch == OPND(s))
                                FWD(aft, bef, 1);
                        break;
                case OBOS:
                        if ((ch == BOL || ch == BOLEOL) && (sflags & SBOS) != 0)
                                FWD(aft, bef, 1);
                        break;
                case OEOS:
                        if ((ch == EOL || ch == BOLEOL) && (sflags & SEOS) != 0)
                                FWD(aft, bef, 1);
                        break;
                case OBOL:
                        if (ch == BOL || ch == BOLEOL)
                                FWD(aft, bef, 1);
                        break;
                case OEOL:
                        if (ch == EOL || ch == BOLEOL)
                                FWD(aft, bef, 1);
                        break;
                case OBOW:
                        if (ch == BOW)
                                FWD(aft, bef, 1);
                        break;
                case OEOW:
                        if (ch == EOW)
                                FWD(aft, bef, 1);
                        break;
                case OWBND:
                        if (ch == BOW || ch == EOW)
                                FWD(aft, bef, 1);
                        break;
                case ONWBND:
                        if (ch == NWBND)
                                FWD(aft, aft, 1);
                        break;
                case OANY:
                        if (!NONCHAR(ch))
                                FWD(aft, bef, 1);
                        break;
                case OANYOF:
                        cs = &g->sets[OPND(s)];
                        if (!NONCHAR(ch) && CHIN(cs, ch))
                                FWD(aft, bef, 1);
                        break;
                case OBACK_:            /* ignored here */
                case O_BACK:
                        FWD(aft, aft, 1);
                        break;
                case OPLUS_:            /* forward, this is just an empty */
                        FWD(aft, aft, 1);
                        break;
                case O_PLUS:            /* both forward and back */
                        FWD(aft, aft, 1);
                        i = ISSETBACK(aft, OPND(s));
                        BACK(aft, aft, OPND(s));
                        if (!i && ISSETBACK(aft, OPND(s))) {
                                /* oho, must reconsider loop body */
                                pc -= OPND(s) + 1;
                                INIT(here, pc);
                        }
                        break;
                case OQUEST_:           /* two branches, both forward */
                        FWD(aft, aft, 1);
                        FWD(aft, aft, OPND(s));
                        break;
                case O_QUEST:           /* just an empty */
                        FWD(aft, aft, 1);
                        break;
                case OLPAREN:           /* not significant here */
                case ORPAREN:
                        FWD(aft, aft, 1);
                        break;
                case OCH_:              /* mark the first two branches */
                        FWD(aft, aft, 1);
                        assert(OP(g->strip[pc+OPND(s)]) == (sop)OOR2);
                        FWD(aft, aft, OPND(s));
                        break;
                case OOR1:              /* done a branch, find the O_CH */
                        if (ISSTATEIN(aft, here)) {
                                for (look = 1;
                                    OP(s = g->strip[pc+look]) != (sop)O_CH;
                                    look += OPND(s))
                                        assert(OP(s) == (sop)OOR2);
                                FWD(aft, aft, look + 1);
                        }
                        break;
                case OOR2:              /* propagate OCH_'s marking */
                        FWD(aft, aft, 1);
                        if (OP(g->strip[pc+OPND(s)]) != (sop)O_CH) {
                                assert(OP(g->strip[pc+OPND(s)]) == (sop)OOR2);
                                FWD(aft, aft, OPND(s));
                        }
                        break;
                case O_CH:              /* just empty */
                        FWD(aft, aft, 1);
                        break;
                default:                /* ooooops... */
                        assert(nope);
                        break;
                }
        }

        return(aft);
}

#ifdef REDEBUG
/*
 - print - print a set of states
 == #ifdef REDEBUG
 == static void print(struct match *m, const char *caption, states st, \
 ==     int ch, FILE *d);
 == #endif
 */
static void
print(struct match *m,
        const char *caption,
        states st,
        int ch,
        FILE *d)
{
        struct re_guts *g = m->g;
        sopno i;
        int first = 1;

        if (!(m->eflags&REG_TRACE))
                return;

        fprintf(d, "%s", caption);
        if (ch != '\0')
                fprintf(d, " %s", pchar(ch));
        for (i = 0; i < g->nstates; i++)
                if (ISSET(st, i)) {
                        fprintf(d, "%s%lu", (first) ? "\t" : ", ", i);
                        first = 0;
                }
        fprintf(d, "\n");
}

/*
 - at - print current situation
 == #ifdef REDEBUG
 == static void at(struct match *m, const char *title, const char *start, \
 ==                      const char *stop, sopno startst, sopno stopst);
 == #endif
 */
static void
at(     struct match *m,
        const char *title,
        const char *start,
        const char *stop,
        sopno startst,
        sopno stopst)
{
        if (!(m->eflags&REG_TRACE))
                return;

        printf("%s %s-", title, pchar(*start));
        printf("%s ", pchar(*stop));
        printf("%ld-%ld\n", (long)startst, (long)stopst);
}

#ifndef PCHARDONE
#define PCHARDONE       /* never again */
/*
 - pchar - make a character printable
 == #ifdef REDEBUG
 == static const char *pchar(int ch);
 == #endif
 *
 * Is this identical to regchar() over in debug.c?  Well, yes.  But a
 * duplicate here avoids having a debugging-capable regexec.o tied to
 * a matching debug.o, and this is convenient.  It all disappears in
 * the non-debug compilation anyway, so it doesn't matter much.
 */
static const char *             /* -> representation */
pchar(int ch)
{
        static char pbuf[10];

        if (isprint((uch)ch) || ch == ' ')
                sprintf(pbuf, "%c", ch);
        else
                sprintf(pbuf, "\\%o", ch);
        return(pbuf);
}
#endif
#endif

#undef  stepback
#undef  matcher
#undef  walk
#undef  dissect
#undef  backref
#undef  step
#undef  print
#undef  at
#undef  match