root/src/system/libroot/posix/musl/regex/fnmatch.c
/*
 * An implementation of what I call the "Sea of Stars" algorithm for
 * POSIX fnmatch(). The basic idea is that we factor the pattern into
 * a head component (which we match first and can reject without ever
 * measuring the length of the string), an optional tail component
 * (which only exists if the pattern contains at least one star), and
 * an optional "sea of stars", a set of star-separated components
 * between the head and tail. After the head and tail matches have
 * been removed from the input string, the components in the "sea of
 * stars" are matched sequentially by searching for their first
 * occurrence past the end of the previous match.
 *
 * - Rich Felker, April 2012
 */

#include <string.h>
#include <fnmatch.h>
#include <stdlib.h>
#include <wchar.h>
#include <wctype.h>

#define END 0
#define UNMATCHABLE -2
#define BRACKET -3
#define QUESTION -4
#define STAR -5

static int str_next(const char *str, size_t n, size_t *step)
{
        if (!n) {
                *step = 0;
                return 0;
        }
        if (str[0] >= 128U) {
                wchar_t wc;
                int k = mbtowc(&wc, str, n);
                if (k<0) {
                        *step = 1;
                        return -1;
                }
                *step = k;
                return wc;
        }
        *step = 1;
        return str[0];
}

static int pat_next(const char *pat, size_t m, size_t *step, int flags)
{
        int esc = 0;
        if (!m || !*pat) {
                *step = 0;
                return END;
        }
        *step = 1;
        if (pat[0]=='\\' && pat[1] && !(flags & FNM_NOESCAPE)) {
                *step = 2;
                pat++;
                esc = 1;
                goto escaped;
        }
        if (pat[0]=='[') {
                size_t k = 1;
                if (k<m) if (pat[k] == '^' || pat[k] == '!') k++;
                if (k<m) if (pat[k] == ']') k++;
                for (; k<m && pat[k] && pat[k]!=']'; k++) {
                        if (k+1<m && pat[k+1] && pat[k]=='[' && (pat[k+1]==':' || pat[k+1]=='.' || pat[k+1]=='=')) {
                                int z = pat[k+1];
                                k+=2;
                                if (k<m && pat[k]) k++;
                                while (k<m && pat[k] && (pat[k-1]!=z || pat[k]!=']')) k++;
                                if (k==m || !pat[k]) break;
                        }
                }
                if (k==m || !pat[k]) {
                        *step = 1;
                        return '[';
                }
                *step = k+1;
                return BRACKET;
        }
        if (pat[0] == '*')
                return STAR;
        if (pat[0] == '?')
                return QUESTION;
escaped:
        if (pat[0] >= 128U) {
                wchar_t wc;
                int k = mbtowc(&wc, pat, m);
                if (k<0) {
                        *step = 0;
                        return UNMATCHABLE;
                }
                *step = k + esc;
                return wc;
        }
        return pat[0];
}

static int casefold(int k)
{
        int c = towupper(k);
        return c == k ? towlower(k) : c;
}

static int match_bracket(const char *p, int k, int kfold)
{
        wchar_t wc;
        int inv = 0;
        p++;
        if (*p=='^' || *p=='!') {
                inv = 1;
                p++;
        }
        if (*p==']') {
                if (k==']') return !inv;
                p++;
        } else if (*p=='-') {
                if (k=='-') return !inv;
                p++;
        }
        wc = p[-1];
        for (; *p != ']'; p++) {
                if (p[0]=='-' && p[1]!=']') {
                        wchar_t wc2;
                        int l = mbtowc(&wc2, p+1, 4);
                        if (l < 0) return 0;
                        if (wc <= wc2)
                                if ((unsigned)k-wc <= wc2-wc ||
                                    (unsigned)kfold-wc <= wc2-wc)
                                        return !inv;
                        p += l-1;
                        continue;
                }
                if (p[0]=='[' && (p[1]==':' || p[1]=='.' || p[1]=='=')) {
                        const char *p0 = p+2;
                        int z = p[1];
                        p+=3;
                        while (p[-1]!=z || p[0]!=']') p++;
                        if (z == ':' && p-1-p0 < 16) {
                                char buf[16];
                                memcpy(buf, p0, p-1-p0);
                                buf[p-1-p0] = 0;
                                if (iswctype(k, wctype(buf)) ||
                                    iswctype(kfold, wctype(buf)))
                                        return !inv;
                        }
                        continue;
                }
                if (*p < 128U) {
                        wc = (unsigned char)*p;
                } else {
                        int l = mbtowc(&wc, p, 4);
                        if (l < 0) return 0;
                        p += l-1;
                }
                if (wc==k || wc==kfold) return !inv;
        }
        return inv;
}

static int fnmatch_internal(const char *pat, size_t m, const char *str, size_t n, int flags)
{
        const char *p, *ptail, *endpat;
        const char *s, *stail, *endstr;
        size_t pinc, sinc, tailcnt=0;
        int c, k, kfold;

        if (flags & FNM_PERIOD) {
                if (*str == '.' && *pat != '.')
                        return FNM_NOMATCH;
        }
        for (;;) {
                switch ((c = pat_next(pat, m, &pinc, flags))) {
                case UNMATCHABLE:
                        return FNM_NOMATCH;
                case STAR:
                        pat++;
                        m--;
                        break;
                default:
                        k = str_next(str, n, &sinc);
                        if (k <= 0)
                                return (c==END) ? 0 : FNM_NOMATCH;
                        str += sinc;
                        n -= sinc;
                        kfold = flags & FNM_CASEFOLD ? casefold(k) : k;
                        if (c == BRACKET) {
                                if (!match_bracket(pat, k, kfold))
                                        return FNM_NOMATCH;
                        } else if (c != QUESTION && k != c && kfold != c) {
                                return FNM_NOMATCH;
                        }
                        pat+=pinc;
                        m-=pinc;
                        continue;
                }
                break;
        }

        /* Compute real pat length if it was initially unknown/-1 */
        m = strnlen(pat, m);
        endpat = pat + m;

        /* Find the last * in pat and count chars needed after it */
        for (p=ptail=pat; p<endpat; p+=pinc) {
                switch (pat_next(p, endpat-p, &pinc, flags)) {
                case UNMATCHABLE:
                        return FNM_NOMATCH;
                case STAR:
                        tailcnt=0;
                        ptail = p+1;
                        break;
                default:
                        tailcnt++;
                        break;
                }
        }

        /* Past this point we need not check for UNMATCHABLE in pat,
         * because all of pat has already been parsed once. */

        /* Compute real str length if it was initially unknown/-1 */
        n = strnlen(str, n);
        endstr = str + n;
        if (n < tailcnt) return FNM_NOMATCH;

        /* Find the final tailcnt chars of str, accounting for UTF-8.
         * On illegal sequences we may get it wrong, but in that case
         * we necessarily have a matching failure anyway. */
        for (s=endstr; s>str && tailcnt; tailcnt--) {
                if (s[-1] < 128U) s--;
                else while ((unsigned char)*--s-0x80U<0x40 && s>str);
        }
        if (tailcnt) return FNM_NOMATCH;
        stail = s;

        /* Check that the pat and str tails match */
        p = ptail;
        for (;;) {
                c = pat_next(p, endpat-p, &pinc, flags);
                p += pinc;
                if ((k = str_next(s, endstr-s, &sinc)) <= 0) {
                        if (c != END) return FNM_NOMATCH;
                        break;
                }
                s += sinc;
                kfold = flags & FNM_CASEFOLD ? casefold(k) : k;
                if (c == BRACKET) {
                        if (!match_bracket(p-pinc, k, kfold))
                                return FNM_NOMATCH;
                } else if (c != QUESTION && k != c && kfold != c) {
                        return FNM_NOMATCH;
                }
        }

        /* We're all done with the tails now, so throw them out */
        endstr = stail;
        endpat = ptail;

        /* Match pattern components until there are none left */
        while (pat<endpat) {
                p = pat;
                s = str;
                for (;;) {
                        c = pat_next(p, endpat-p, &pinc, flags);
                        p += pinc;
                        /* Encountering * completes/commits a component */
                        if (c == STAR) {
                                pat = p;
                                str = s;
                                break;
                        }
                        k = str_next(s, endstr-s, &sinc);
                        if (!k)
                                return FNM_NOMATCH;
                        kfold = flags & FNM_CASEFOLD ? casefold(k) : k;
                        if (c == BRACKET) {
                                if (!match_bracket(p-pinc, k, kfold))
                                        break;
                        } else if (c != QUESTION && k != c && kfold != c) {
                                break;
                        }
                        s += sinc;
                }
                if (c == STAR) continue;
                /* If we failed, advance str, by 1 char if it's a valid
                 * char, or past all invalid bytes otherwise. */
                k = str_next(str, endstr-str, &sinc);
                if (k > 0) str += sinc;
                else for (str++; str_next(str, endstr-str, &sinc)<0; str++);
        }

        return 0;
}

int fnmatch(const char *pat, const char *str, int flags)
{
        const char *s, *p;
        size_t inc;
        int c;
        if (flags & FNM_PATHNAME) for (;;) {
                for (s=str; *s && *s!='/'; s++);
                for (p=pat; (c=pat_next(p, -1, &inc, flags))!=END && c!='/'; p+=inc);
                if (c!=*s && (!*s || !(flags & FNM_LEADING_DIR)))
                        return FNM_NOMATCH;
                if (fnmatch_internal(pat, p-pat, str, s-str, flags))
                        return FNM_NOMATCH;
                if (!c) return 0;
                str = s+1;
                pat = p+inc;
        } else if (flags & FNM_LEADING_DIR) {
                for (s=str; *s; s++) {
                        if (*s != '/') continue;
                        if (!fnmatch_internal(pat, -1, str, s-str, flags))
                                return 0;
                }
        }
        return fnmatch_internal(pat, -1, str, -1, flags);
}