#define DEBUG
#include <ctype.h>
#include <limits.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "awk.h"
#include "awkgram.tab.h"
#define MAXLIN 22
#define type(v) (v)->nobj
#define info(v) (v)->ntype
#define left(v) (v)->narg[0]
#define right(v) (v)->narg[1]
#define parent(v) (v)->nnext
#define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
#define ELEAF case EMPTYRE:
#define UNARY case STAR: case PLUS: case QUEST:
int *setvec;
int *tmpset;
int maxsetvec = 0;
int rtok;
int rlxval;
static const uschar *rlxstr;
static const uschar *prestr;
static const uschar *lastre;
static const uschar *lastatom;
static const uschar *starttok;
static const uschar *basestr;
static const uschar *firstbasestr;
static int setcnt;
static int poscnt;
const char *patbeg;
int patlen;
#define NFA 128
fa *fatab[NFA];
int nfatab = 0;
static int entry_cmp(const void *l, const void *r);
static int get_gototab(fa*, int, int);
static int set_gototab(fa*, int, int, int);
static void clear_gototab(fa*, int);
static int *
intalloc(size_t n, const char *f)
{
int *p = (int *) calloc(n, sizeof(int));
if (p == NULL)
overflo(f);
return p;
}
static void
allocsetvec(const char *f)
{
maxsetvec = MAXLIN;
setvec = (int *) reallocarray(setvec, maxsetvec, sizeof(*setvec));
tmpset = (int *) reallocarray(tmpset, maxsetvec, sizeof(*tmpset));
if (setvec == NULL || tmpset == NULL)
overflo(f);
}
static void
resizesetvec(const char *f)
{
setvec = (int *) reallocarray(setvec, maxsetvec, 4 * sizeof(*setvec));
tmpset = (int *) reallocarray(tmpset, maxsetvec, 4 * sizeof(*tmpset));
if (setvec == NULL || tmpset == NULL)
overflo(f);
maxsetvec *= 4;
}
static void
resize_state(fa *f, int state)
{
gtt *p;
uschar *p2;
int **p3;
int i, new_count;
if (++state < f->state_count)
return;
new_count = state + 10;
p = (gtt *) reallocarray(f->gototab, new_count, sizeof(gtt));
if (p == NULL)
goto out;
f->gototab = p;
p2 = (uschar *) reallocarray(f->out, new_count, sizeof(f->out[0]));
if (p2 == NULL)
goto out;
f->out = p2;
p3 = (int **) reallocarray(f->posns, new_count, sizeof(f->posns[0]));
if (p3 == NULL)
goto out;
f->posns = p3;
for (i = f->state_count; i < new_count; ++i) {
f->gototab[i].entries = (gtte *) calloc(NCHARS, sizeof(gtte));
if (f->gototab[i].entries == NULL)
goto out;
f->gototab[i].allocated = NCHARS;
f->gototab[i].inuse = 0;
f->out[i] = 0;
f->posns[i] = NULL;
}
f->state_count = new_count;
return;
out:
overflo(__func__);
}
fa *makedfa(const char *s, bool anchor)
{
int i, use, nuse;
fa *pfa;
static int now = 1;
if (setvec == NULL) {
allocsetvec(__func__);
}
if (compile_time != RUNNING)
return mkdfa(s, anchor);
for (i = 0; i < nfatab; i++)
if (fatab[i]->anchor == anchor
&& strcmp((const char *) fatab[i]->restr, s) == 0) {
fatab[i]->use = now++;
return fatab[i];
}
pfa = mkdfa(s, anchor);
if (nfatab < NFA) {
fatab[nfatab] = pfa;
fatab[nfatab]->use = now++;
nfatab++;
return pfa;
}
use = fatab[0]->use;
nuse = 0;
for (i = 1; i < nfatab; i++)
if (fatab[i]->use < use) {
use = fatab[i]->use;
nuse = i;
}
freefa(fatab[nuse]);
fatab[nuse] = pfa;
pfa->use = now++;
return pfa;
}
fa *mkdfa(const char *s, bool anchor)
{
Node *p, *p1;
fa *f;
firstbasestr = (const uschar *) s;
basestr = firstbasestr;
p = reparse(s);
p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
poscnt = 0;
penter(p1);
if ((f = (fa *) calloc(1, sizeof(fa) + poscnt * sizeof(rrow))) == NULL)
overflo(__func__);
f->accept = poscnt-1;
cfoll(f, p1);
freetr(p1);
resize_state(f, 1);
f->posns[0] = intalloc(*(f->re[0].lfollow), __func__);
f->posns[1] = intalloc(1, __func__);
*f->posns[1] = 0;
f->initstat = makeinit(f, anchor);
f->anchor = anchor;
f->restr = (uschar *) tostring(s);
if (firstbasestr != basestr) {
if (basestr)
xfree(basestr);
}
return f;
}
int makeinit(fa *f, bool anchor)
{
int i, k;
f->curstat = 2;
f->out[2] = 0;
k = *(f->re[0].lfollow);
xfree(f->posns[2]);
f->posns[2] = intalloc(k + 1, __func__);
for (i = 0; i <= k; i++) {
(f->posns[2])[i] = (f->re[0].lfollow)[i];
}
if ((f->posns[2])[1] == f->accept)
f->out[2] = 1;
clear_gototab(f, 2);
f->curstat = cgoto(f, 2, HAT);
if (anchor) {
*f->posns[2] = k-1;
for (i = 0; i < k; i++) {
(f->posns[0])[i] = (f->posns[2])[i];
}
f->out[0] = f->out[2];
if (f->curstat != 2)
--(*f->posns[f->curstat]);
}
return f->curstat;
}
void penter(Node *p)
{
switch (type(p)) {
ELEAF
LEAF
info(p) = poscnt;
poscnt++;
break;
UNARY
penter(left(p));
parent(left(p)) = p;
break;
case CAT:
case OR:
penter(left(p));
penter(right(p));
parent(left(p)) = p;
parent(right(p)) = p;
break;
case ZERO:
break;
default:
FATAL("can't happen: unknown type %d in penter", type(p));
break;
}
}
void freetr(Node *p)
{
switch (type(p)) {
ELEAF
LEAF
xfree(p);
break;
UNARY
case ZERO:
freetr(left(p));
xfree(p);
break;
case CAT:
case OR:
freetr(left(p));
freetr(right(p));
xfree(p);
break;
default:
FATAL("can't happen: unknown type %d in freetr", type(p));
break;
}
}
static int
hexstr(const uschar **pp, int max)
{
const uschar *p;
int n = 0;
int i;
for (i = 0, p = *pp; i < max && isxdigit(*p); i++, p++) {
if (isdigit(*p))
n = 16 * n + *p - '0';
else if (*p >= 'a' && *p <= 'f')
n = 16 * n + *p - 'a' + 10;
else if (*p >= 'A' && *p <= 'F')
n = 16 * n + *p - 'A' + 10;
}
*pp = p;
return n;
}
#define isoctdigit(c) ((c) >= '0' && (c) <= '7')
int quoted(const uschar **pp)
{
const uschar *p = *pp;
int c;
switch ((c = *p++)) {
case 't':
c = '\t';
break;
case 'n':
c = '\n';
break;
case 'f':
c = '\f';
break;
case 'r':
c = '\r';
break;
case 'b':
c = '\b';
break;
case 'v':
c = '\v';
break;
case 'a':
c = '\a';
break;
case '\\':
c = '\\';
break;
case 'x':
c = hexstr(&p, 2);
break;
case 'u':
c = hexstr(&p, 8);
break;
default:
if (isoctdigit(c)) {
int n = c - '0';
if (isoctdigit(*p)) {
n = 8 * n + *p++ - '0';
if (isoctdigit(*p))
n = 8 * n + *p++ - '0';
}
c = n;
}
}
*pp = p;
return c;
}
int *cclenter(const char *argp)
{
int i, c, c2;
int n;
const uschar *p = (const uschar *) argp;
int *bp, *retp;
static int *buf = NULL;
static int bufsz = 100;
if (buf == NULL && (buf = (int *) calloc(bufsz, sizeof(int))) == NULL)
FATAL("out of space for character class [%.10s...] 1", p);
bp = buf;
for (i = 0; *p != 0; ) {
n = u8_rune(&c, (const char *) p);
p += n;
if (c == '\\') {
c = quoted(&p);
} else if (c == '-' && i > 0 && bp[-1] != 0) {
if (*p != 0) {
c = bp[-1];
n = u8_rune(&c2, (const char *) p);
p += n;
if (c2 == '\\')
c2 = quoted(&p);
if (c > c2) {
bp--;
i--;
continue;
}
while (c < c2) {
if (i >= bufsz) {
buf = (int *) reallocarray(buf, bufsz, 2 * sizeof(int));
if (buf == NULL)
FATAL("out of space for character class [%.10s...] 2", p);
bufsz *= 2;
bp = buf + i;
}
*bp++ = ++c;
i++;
}
continue;
}
}
if (i >= bufsz) {
buf = (int *) reallocarray(buf, bufsz, 2 * sizeof(int));
if (buf == NULL)
FATAL("out of space for character class [%.10s...] 2", p);
bufsz *= 2;
bp = buf + i;
}
*bp++ = c;
i++;
}
*bp = 0;
retp = (int *) calloc(bp-buf+1, sizeof(int));
for (i = 0; i < bp-buf+1; i++)
retp[i] = buf[i];
return retp;
}
void overflo(const char *s)
{
FATAL("regular expression too big: out of space in %.30s...", s);
}
void cfoll(fa *f, Node *v)
{
int i;
int *p;
switch (type(v)) {
ELEAF
LEAF
f->re[info(v)].ltype = type(v);
f->re[info(v)].lval.np = right(v);
while (f->accept >= maxsetvec) {
resizesetvec(__func__);
}
for (i = 0; i <= f->accept; i++)
setvec[i] = 0;
setcnt = 0;
follow(v);
p = intalloc(setcnt + 1, __func__);
f->re[info(v)].lfollow = p;
*p = setcnt;
for (i = f->accept; i >= 0; i--)
if (setvec[i] == 1)
*++p = i;
break;
UNARY
cfoll(f,left(v));
break;
case CAT:
case OR:
cfoll(f,left(v));
cfoll(f,right(v));
break;
case ZERO:
break;
default:
FATAL("can't happen: unknown type %d in cfoll", type(v));
}
}
int first(Node *p)
{
int b, lp;
switch (type(p)) {
ELEAF
LEAF
lp = info(p);
while (setcnt >= maxsetvec || lp >= maxsetvec) {
resizesetvec(__func__);
}
if (type(p) == EMPTYRE) {
setvec[lp] = 0;
return(0);
}
if (setvec[lp] != 1) {
setvec[lp] = 1;
setcnt++;
}
if (type(p) == CCL && (*(int *) right(p)) == 0)
return(0);
return(1);
case PLUS:
if (first(left(p)) == 0)
return(0);
return(1);
case STAR:
case QUEST:
first(left(p));
return(0);
case CAT:
if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
return(1);
case OR:
b = first(right(p));
if (first(left(p)) == 0 || b == 0) return(0);
return(1);
case ZERO:
return 0;
}
FATAL("can't happen: unknown type %d in first", type(p));
return(-1);
}
void follow(Node *v)
{
Node *p;
if (type(v) == FINAL)
return;
p = parent(v);
switch (type(p)) {
case STAR:
case PLUS:
first(v);
follow(p);
return;
case OR:
case QUEST:
follow(p);
return;
case CAT:
if (v == left(p)) {
if (first(right(p)) == 0) {
follow(p);
return;
}
} else
follow(p);
return;
}
}
int member(int c, int *sarg)
{
int *s = (int *) sarg;
while (*s)
if (c == *s++)
return(1);
return(0);
}
static void resize_gototab(fa *f, int state)
{
size_t new_size = f->gototab[state].allocated * 2;
gtte *p = (gtte *) realloc(f->gototab[state].entries, new_size * sizeof(gtte));
if (p == NULL)
overflo(__func__);
size_t orig_size = f->gototab[state].allocated;
memset(p + orig_size, 0, orig_size * sizeof(gtte));
f->gototab[state].allocated = new_size;
f->gototab[state].entries = p;
}
static int get_gototab(fa *f, int state, int ch)
{
gtte key;
gtte *item;
key.ch = ch;
key.state = 0;
item = (gtte *) bsearch(& key, f->gototab[state].entries,
f->gototab[state].inuse, sizeof(gtte),
entry_cmp);
if (item == NULL)
return 0;
else
return item->state;
}
static int entry_cmp(const void *l, const void *r)
{
const gtte *left, *right;
left = (const gtte *) l;
right = (const gtte *) r;
return left->ch - right->ch;
}
static int set_gototab(fa *f, int state, int ch, int val)
{
if (f->gototab[state].inuse == 0) {
f->gototab[state].entries[0].ch = ch;
f->gototab[state].entries[0].state = val;
f->gototab[state].inuse++;
return val;
} else if ((unsigned)ch > f->gototab[state].entries[f->gototab[state].inuse-1].ch) {
gtt *tab = & f->gototab[state];
if (tab->inuse + 1 >= tab->allocated)
resize_gototab(f, state);
f->gototab[state].entries[f->gototab[state].inuse].ch = ch;
f->gototab[state].entries[f->gototab[state].inuse].state = val;
f->gototab[state].inuse++;
return val;
} else {
gtte key;
gtte *item;
key.ch = ch;
key.state = 0;
item = (gtte *) bsearch(& key, f->gototab[state].entries,
f->gototab[state].inuse, sizeof(gtte),
entry_cmp);
if (item != NULL) {
item->state = val;
return item->state;
}
}
gtt *tab = & f->gototab[state];
if (tab->inuse + 1 >= tab->allocated)
resize_gototab(f, state);
f->gototab[state].entries[tab->inuse].ch = ch;
f->gototab[state].entries[tab->inuse].state = val;
++tab->inuse;
qsort(f->gototab[state].entries,
f->gototab[state].inuse, sizeof(gtte), entry_cmp);
return val;
}
static void clear_gototab(fa *f, int state)
{
memset(f->gototab[state].entries, 0,
f->gototab[state].allocated * sizeof(gtte));
f->gototab[state].inuse = 0;
}
int match(fa *f, const char *p0)
{
int s, ns;
int n;
int rune;
const uschar *p = (const uschar *) p0;
s = f->initstat;
assert (s < f->state_count);
if (f->out[s])
return(1);
do {
n = u8_rune(&rune, (const char *) p);
if ((ns = get_gototab(f, s, rune)) != 0)
s = ns;
else
s = cgoto(f, s, rune);
if (f->out[s])
return(1);
if (*p == 0)
break;
p += n;
} while (1);
return(0);
}
int pmatch(fa *f, const char *p0)
{
int s, ns;
int n;
int rune;
const uschar *p = (const uschar *) p0;
const uschar *q;
s = f->initstat;
assert(s < f->state_count);
patbeg = (const char *)p;
patlen = -1;
do {
q = p;
do {
if (f->out[s])
patlen = q-p;
n = u8_rune(&rune, (const char *) q);
if ((ns = get_gototab(f, s, rune)) != 0)
s = ns;
else
s = cgoto(f, s, rune);
assert(s < f->state_count);
if (s == 1) {
if (patlen >= 0) {
patbeg = (const char *) p;
return(1);
}
else
goto nextin;
}
if (*q == 0)
break;
q += n;
} while (1);
q++;
if (f->out[s])
patlen = q-p-1;
if (patlen >= 0) {
patbeg = (const char *) p;
return(1);
}
nextin:
s = 2;
if (*p == 0)
break;
n = u8_rune(&rune, (const char *) p);
p += n;
} while (1);
return (0);
}
int nematch(fa *f, const char *p0)
{
int s, ns;
int n;
int rune;
const uschar *p = (const uschar *) p0;
const uschar *q;
s = f->initstat;
assert(s < f->state_count);
patbeg = (const char *)p;
patlen = -1;
while (*p) {
q = p;
do {
if (f->out[s])
patlen = q-p;
n = u8_rune(&rune, (const char *) q);
if ((ns = get_gototab(f, s, rune)) != 0)
s = ns;
else
s = cgoto(f, s, rune);
if (s == 1) {
if (patlen > 0) {
patbeg = (const char *) p;
return(1);
} else
goto nnextin;
}
if (*q == 0)
break;
q += n;
} while (1);
q++;
if (f->out[s])
patlen = q-p-1;
if (patlen > 0 ) {
patbeg = (const char *) p;
return(1);
}
nnextin:
s = 2;
p++;
}
return (0);
}
bool fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum)
{
char *i, *j, *k, *buf = *pbuf;
int bufsize = *pbufsize;
int c, n, ns, s;
s = pfa->initstat;
patlen = 0;
i = j = k = buf;
do {
if (k - j < (int)awk_mb_cur_max) {
if (k + awk_mb_cur_max > buf + bufsize) {
char *obuf = buf;
adjbuf(&buf, &bufsize,
bufsize + awk_mb_cur_max,
quantum, 0, "fnematch");
*pbufsize = bufsize;
if (obuf != buf) {
i = buf + (i - obuf);
j = buf + (j - obuf);
k = buf + (k - obuf);
*pbuf = buf;
if (patlen)
patbeg = buf + (patbeg - obuf);
}
}
for (n = awk_mb_cur_max ; n > 0; n--) {
*k++ = (c = getc(f)) != EOF ? c : 0;
if (c == EOF) {
if (ferror(f))
FATAL("fnematch: getc error");
break;
}
}
}
j += u8_rune(&c, j);
if ((ns = get_gototab(pfa, s, c)) != 0)
s = ns;
else
s = cgoto(pfa, s, c);
if (pfa->out[s]) {
patbeg = i;
patlen = j - i;
if (c == 0)
patlen--;
}
if (c && s != 1)
continue;
if (patlen)
break;
i += u8_rune(&c, i);
if (c == 0)
break;
j = i;
s = 2;
} while (1);
if (patlen) {
do
if (*--k && ungetc(*k, f) == EOF)
FATAL("unable to ungetc '%c'", *k);
while (k > patbeg + patlen);
*k = '\0';
return true;
}
else
return false;
}
Node *reparse(const char *p)
{
Node *np;
DPRINTF("reparse <%s>\n", p);
lastre = prestr = (const uschar *) p;
rtok = relex();
if (rtok == '\0') {
return(op2(EMPTYRE, NIL, NIL));
}
np = regexp();
if (rtok != '\0')
FATAL("syntax error in regular expression %s at %s", lastre, prestr);
return(np);
}
Node *regexp(void)
{
return (alt(concat(primary())));
}
Node *primary(void)
{
Node *np;
int savelastatom;
switch (rtok) {
case CHAR:
lastatom = starttok;
np = op2(CHAR, NIL, itonp(rlxval));
rtok = relex();
return (unary(np));
case ALL:
rtok = relex();
return (unary(op2(ALL, NIL, NIL)));
case EMPTYRE:
rtok = relex();
return (unary(op2(EMPTYRE, NIL, NIL)));
case DOT:
lastatom = starttok;
rtok = relex();
return (unary(op2(DOT, NIL, NIL)));
case CCL:
np = op2(CCL, NIL, (Node*) cclenter((const char *) rlxstr));
lastatom = starttok;
rtok = relex();
return (unary(np));
case NCCL:
np = op2(NCCL, NIL, (Node *) cclenter((const char *) rlxstr));
lastatom = starttok;
rtok = relex();
return (unary(np));
case '^':
rtok = relex();
return (unary(op2(CHAR, NIL, itonp(HAT))));
case '$':
rtok = relex();
return (unary(op2(CHAR, NIL, NIL)));
case '(':
lastatom = starttok;
savelastatom = starttok - basestr;
rtok = relex();
if (rtok == ')') {
rtok = relex();
return unary(op2(CCL, NIL, (Node *) cclenter("")));
}
np = regexp();
if (rtok == ')') {
lastatom = basestr + savelastatom;
rtok = relex();
return (unary(np));
}
else
FATAL("syntax error in regular expression %s at %s", lastre, prestr);
default:
FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
}
return 0;
}
Node *concat(Node *np)
{
switch (rtok) {
case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
return (concat(op2(CAT, np, primary())));
case EMPTYRE:
rtok = relex();
return (concat(op2(CAT, op2(CCL, NIL, (Node *) cclenter("")),
primary())));
}
return (np);
}
Node *alt(Node *np)
{
if (rtok == OR) {
rtok = relex();
return (alt(op2(OR, np, concat(primary()))));
}
return (np);
}
Node *unary(Node *np)
{
switch (rtok) {
case STAR:
rtok = relex();
return (unary(op2(STAR, np, NIL)));
case PLUS:
rtok = relex();
return (unary(op2(PLUS, np, NIL)));
case QUEST:
rtok = relex();
return (unary(op2(QUEST, np, NIL)));
case ZERO:
rtok = relex();
return (unary(op2(ZERO, np, NIL)));
default:
return (np);
}
}
#ifndef HAS_ISBLANK
int (xisblank)(int c)
{
return c==' ' || c=='\t';
}
#endif
static const struct charclass {
const char *cc_name;
int cc_namelen;
int (*cc_func)(int);
} charclasses[] = {
{ "alnum", 5, isalnum },
{ "alpha", 5, isalpha },
#ifndef HAS_ISBLANK
{ "blank", 5, xisblank },
#else
{ "blank", 5, isblank },
#endif
{ "cntrl", 5, iscntrl },
{ "digit", 5, isdigit },
{ "graph", 5, isgraph },
{ "lower", 5, islower },
{ "print", 5, isprint },
{ "punct", 5, ispunct },
{ "space", 5, isspace },
{ "upper", 5, isupper },
{ "xdigit", 6, isxdigit },
{ NULL, 0, NULL },
};
#define REPEAT_SIMPLE 0
#define REPEAT_PLUS_APPENDED 1
#define REPEAT_WITH_Q 2
#define REPEAT_ZERO 3
static int
replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom,
int atomlen, int firstnum, int secondnum, int special_case)
{
int i, j;
uschar *buf = NULL;
int ret = 1;
int init_q = (firstnum == 0);
int n_q_reps = secondnum-firstnum;
int prefix_length = reptok - basestr;
int suffix_length = strlen((const char *) reptok) - reptoklen;
int size = prefix_length + suffix_length;
if (firstnum > 1) {
size += atomlen*(firstnum-1);
}
if (special_case == REPEAT_PLUS_APPENDED) {
size++;
} else if (special_case == REPEAT_WITH_Q) {
size += init_q + (atomlen+1)* (n_q_reps-init_q);
} else if (special_case == REPEAT_ZERO) {
size += 2;
}
if ((buf = (uschar *) malloc(size + 1)) == NULL)
FATAL("out of space in reg expr %.10s..", lastre);
memcpy(buf, basestr, prefix_length);
j = prefix_length;
if (special_case == REPEAT_ZERO) {
j -= atomlen;
buf[j++] = '(';
buf[j++] = ')';
}
for (i = 1; i < firstnum; i++) {
memcpy(&buf[j], atom, atomlen);
j += atomlen;
}
if (special_case == REPEAT_PLUS_APPENDED) {
buf[j++] = '+';
} else if (special_case == REPEAT_WITH_Q) {
if (init_q)
buf[j++] = '?';
for (i = init_q; i < n_q_reps; i++) {
memcpy(&buf[j], atom, atomlen);
j += atomlen;
buf[j++] = '?';
}
}
memcpy(&buf[j], reptok+reptoklen, suffix_length);
j += suffix_length;
buf[j] = '\0';
if (firstbasestr != basestr) {
if (basestr)
xfree(basestr);
}
basestr = buf;
prestr = buf + prefix_length;
if (special_case == REPEAT_ZERO) {
prestr -= atomlen;
ret++;
}
return ret;
}
static int repeat(const uschar *reptok, int reptoklen, const uschar *atom,
int atomlen, int firstnum, int secondnum)
{
if (atom == NULL)
return 0;
if (secondnum < 0) {
if (firstnum < 2) {
FATAL("internal error");
} else {
return replace_repeat(reptok, reptoklen, atom, atomlen,
firstnum, secondnum, REPEAT_PLUS_APPENDED);
}
} else if (firstnum == secondnum) {
if (firstnum == 0) {
return replace_repeat(reptok, reptoklen, atom, atomlen,
firstnum, secondnum, REPEAT_ZERO);
} else {
return replace_repeat(reptok, reptoklen, atom, atomlen,
firstnum, secondnum, REPEAT_SIMPLE);
}
} else if (firstnum < secondnum) {
return replace_repeat(reptok, reptoklen, atom, atomlen,
firstnum, secondnum, REPEAT_WITH_Q);
} else {
FATAL("internal error");
}
return 0;
}
int relex(void)
{
int c, n;
int cflag;
static uschar *buf = NULL;
static int bufsz = 100;
uschar *bp;
const struct charclass *cc;
int i;
int num, m;
bool commafound, digitfound;
const uschar *startreptok;
static int parens = 0;
rescan:
starttok = prestr;
if ((n = u8_rune(&rlxval, (const char *) prestr)) > 1) {
prestr += n;
starttok = prestr;
return CHAR;
}
switch (c = *prestr++) {
case '|': return OR;
case '*': return STAR;
case '+': return PLUS;
case '?': return QUEST;
case '.': return DOT;
case '\0': prestr--; return '\0';
case '^':
case '$':
return c;
case '(':
parens++;
return c;
case ')':
if (parens) {
parens--;
return c;
}
rlxval = c;
return CHAR;
case '\\':
rlxval = quoted(&prestr);
return CHAR;
default:
rlxval = c;
return CHAR;
case '[':
if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL)
FATAL("out of space in reg expr %.10s..", lastre);
bp = buf;
if (*prestr == '^') {
cflag = 1;
prestr++;
}
else
cflag = 0;
n = 5 * strlen((const char *) prestr)+1;
if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1"))
FATAL("out of space for reg expr %.10s...", lastre);
for (; ; ) {
if ((n = u8_rune(&rlxval, (const char *) prestr)) > 1) {
for (i = 0; i < n; i++)
*bp++ = *prestr++;
continue;
}
if ((c = *prestr++) == '\\') {
*bp++ = '\\';
if ((c = *prestr++) == '\0')
FATAL("nonterminated character class %.20s...", lastre);
*bp++ = c;
} else if (c == '[' && *prestr == ':') {
for (cc = charclasses; cc->cc_name; cc++)
if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
break;
if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
prestr[2 + cc->cc_namelen] == ']') {
prestr += cc->cc_namelen + 3;
for (i = 1; i <= UCHAR_MAX; i++) {
if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "relex2"))
FATAL("out of space for reg expr %.10s...", lastre);
if (cc->cc_func(i)) {
if (i == '\\') {
*bp++ = '\\';
n++;
}
*bp++ = i;
n++;
}
}
} else
*bp++ = c;
} else if (c == '[' && *prestr == '.') {
char collate_char;
prestr++;
collate_char = *prestr++;
if (*prestr == '.' && prestr[1] == ']') {
prestr += 2;
if (*prestr == ']') {
prestr++;
rlxval = collate_char;
return CHAR;
}
}
} else if (c == '[' && *prestr == '=') {
char equiv_char;
prestr++;
equiv_char = *prestr++;
if (*prestr == '=' && prestr[1] == ']') {
prestr += 2;
if (*prestr == ']') {
prestr++;
rlxval = equiv_char;
return CHAR;
}
}
} else if (c == '\0') {
FATAL("nonterminated character class %.20s", lastre);
} else if (bp == buf) {
*bp++ = c;
} else if (c == ']') {
*bp++ = 0;
rlxstr = (uschar *) tostring((char *) buf);
if (cflag == 0)
return CCL;
else
return NCCL;
} else
*bp++ = c;
}
break;
case '{':
if (isdigit(*(prestr))) {
num = 0;
n = -1; m = -1;
commafound = false;
digitfound = false;
startreptok = prestr-1;
} else {
rlxval = c;
return CHAR;
}
for (; ; ) {
if ((c = *prestr++) == '}') {
if (commafound) {
if (digitfound) {
m = num;
if (m < n)
FATAL("illegal repetition expression: class %.20s",
lastre);
if (n == 0 && m == 1) {
return QUEST;
}
} else {
if (n == 0)
return STAR;
else if (n == 1)
return PLUS;
}
} else {
if (digitfound) {
n = num;
m = num;
} else {
FATAL("illegal repetition expression: class %.20s",
lastre);
}
}
if (repeat(starttok, prestr-starttok, lastatom,
startreptok - lastatom, n, m) > 0) {
if (n == 0 && m == 0) {
return ZERO;
}
goto rescan;
}
return PLUS;
} else if (c == '\0') {
FATAL("nonterminated character class %.20s",
lastre);
} else if (isdigit(c)) {
num = 10 * num + c - '0';
if (num > _POSIX_RE_DUP_MAX)
FATAL("repetition count %.20s too large",
lastre);
digitfound = true;
} else if (c == ',') {
if (commafound)
FATAL("illegal repetition expression: class %.20s",
lastre);
commafound = true;
n = num;
digitfound = false;
num = 0;
} else {
FATAL("illegal repetition expression: class %.20s",
lastre);
}
}
break;
}
}
int cgoto(fa *f, int s, int c)
{
int *p, *q;
int i, j, k;
while (f->accept >= maxsetvec) {
resizesetvec(__func__);
}
for (i = 0; i <= f->accept; i++)
setvec[i] = 0;
setcnt = 0;
resize_state(f, s);
p = f->posns[s];
for (i = 1; i <= *p; i++) {
if ((k = f->re[p[i]].ltype) != FINAL) {
if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
|| (k == DOT && c != 0 && c != HAT)
|| (k == ALL && c != 0)
|| (k == EMPTYRE && c != 0)
|| (k == CCL && member(c, (int *) f->re[p[i]].lval.rp))
|| (k == NCCL && !member(c, (int *) f->re[p[i]].lval.rp) && c != 0 && c != HAT)) {
q = f->re[p[i]].lfollow;
for (j = 1; j <= *q; j++) {
if (q[j] >= maxsetvec) {
resizesetvec(__func__);
}
if (setvec[q[j]] == 0) {
setcnt++;
setvec[q[j]] = 1;
}
}
}
}
}
tmpset[0] = setcnt;
j = 1;
for (i = f->accept; i >= 0; i--)
if (setvec[i]) {
tmpset[j++] = i;
}
resize_state(f, f->curstat > s ? f->curstat : s);
for (i = 1; i <= f->curstat; i++) {
p = f->posns[i];
if ((k = tmpset[0]) != p[0])
goto different;
for (j = 1; j <= k; j++)
if (tmpset[j] != p[j])
goto different;
if (c != HAT)
set_gototab(f, s, c, i);
return i;
different:;
}
++(f->curstat);
resize_state(f, f->curstat);
clear_gototab(f, f->curstat);
xfree(f->posns[f->curstat]);
p = intalloc(setcnt + 1, __func__);
f->posns[f->curstat] = p;
if (c != HAT)
set_gototab(f, s, c, f->curstat);
for (i = 0; i <= setcnt; i++)
p[i] = tmpset[i];
if (setvec[f->accept])
f->out[f->curstat] = 1;
else
f->out[f->curstat] = 0;
return f->curstat;
}
void freefa(fa *f)
{
int i;
if (f == NULL)
return;
for (i = 0; i < f->state_count; i++)
xfree(f->gototab[i].entries);
xfree(f->gototab);
for (i = 0; i <= f->curstat; i++)
xfree(f->posns[i]);
for (i = 0; i <= f->accept; i++) {
xfree(f->re[i].lfollow);
if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
xfree(f->re[i].lval.np);
}
xfree(f->restr);
xfree(f->out);
xfree(f->posns);
xfree(f->gototab);
xfree(f);
}