root/src/kits/shared/RegExp.cpp
/*
 * Copyright 2013, Ingo Weinhold, ingo_weinhold@gmx.de.
 * Copyright 2013, Rene Gollent, rene@gollent.com.
 * Distributed under the terms of the MIT License.
 */


#include <RegExp.h>

#include <new>

#include <regex.h>

#include <String.h>

#include <Referenceable.h>


// #pragma mark - RegExp::Data


struct RegExp::Data : public BReferenceable {
        Data(const char* pattern, PatternType patternType, bool caseSensitive)
                :
                BReferenceable()
        {
                // convert the shell pattern to a regular expression
                BString patternString;
                if (patternType == PATTERN_TYPE_WILDCARD) {
                        while (*pattern != '\0') {
                                char c = *pattern++;
                                switch (c) {
                                        case '?':
                                                patternString += '.';
                                                continue;
                                        case '*':
                                                patternString += ".*";
                                                continue;
                                        case '[':
                                        {
                                                // find the matching ']' first
                                                const char* end = pattern;
                                                while (*end != ']') {
                                                        if (*end++ == '\0') {
                                                                fError = REG_EBRACK;
                                                                return;
                                                        }
                                                }

                                                if (pattern == end) {
                                                        // Empty bracket expression. It will never match
                                                        // anything. Strictly speaking this is not
                                                        // considered an error, but we handle it like one.
                                                        fError = REG_EBRACK;
                                                        return;
                                                }

                                                patternString += '[';

                                                // We need to avoid "[." ... ".]", "[=" ... "=]", and
                                                // "[:" ... ":]" sequences, since those have special
                                                // meaning in regular expressions. If we encounter
                                                // a '[' followed by either of '.', '=', or ':', we
                                                // replace the '[' by "[.[.]".
                                                while (pattern < end) {
                                                        c = *pattern++;
                                                        if (c == '[' && pattern < end) {
                                                                switch (*pattern) {
                                                                        case '.':
                                                                        case '=':
                                                                        case ':':
                                                                                patternString += "[.[.]";
                                                                                continue;
                                                                }
                                                        }
                                                        patternString += c;
                                                }

                                                pattern++;
                                                patternString += ']';
                                                break;
                                        }

                                        case '\\':
                                        {
                                                // Quotes the next character. Works the same way for
                                                // regular expressions.
                                                if (*pattern == '\0') {
                                                        fError = REG_EESCAPE;
                                                        return;
                                                }

                                                patternString += '\\';
                                                patternString += *pattern++;
                                                break;
                                        }

                                        case '^':
                                        case '.':
                                        case '$':
                                        case '(':
                                        case ')':
                                        case '|':
                                        case '+':
                                        case '{':
                                                // need to be quoted
                                                patternString += '\\';
                                                // fall through
                                        default:
                                                patternString += c;
                                                break;
                                }
                        }

                        pattern = patternString.String();
                }

                int flags = REG_EXTENDED;
                if (!caseSensitive)
                        flags |= REG_ICASE;

                fError = regcomp(&fCompiledExpression, pattern, flags);
        }

        ~Data()
        {
                if (fError == 0)
                        regfree(&fCompiledExpression);
        }

        bool IsValid() const
        {
                return fError == 0;
        }

        const regex_t* CompiledExpression() const
        {
                return &fCompiledExpression;
        }

private:
        int             fError;
        regex_t fCompiledExpression;
};


// #pragma mark - RegExp::MatchResultData


struct RegExp::MatchResultData : public BReferenceable {
        MatchResultData(const regex_t* compiledExpression, const char* string)
                :
                BReferenceable(),
                fMatchCount(0),
                fMatches(NULL)
        {
                // fMatchCount is always set to the number of matching groups in the
                // expression (or 0 if an error occured). Some of the "matches" in
                // the array may still point to the (-1,-1) range if they don't
                // actually match anything.
                fMatchCount = compiledExpression->re_nsub + 1;
                fMatches = new regmatch_t[fMatchCount];
                if (regexec(compiledExpression, string, fMatchCount, fMatches, 0)
                                != 0) {
                        delete[] fMatches;
                        fMatches = NULL;
                        fMatchCount = 0;
                }
        }

        ~MatchResultData()
        {
                delete[] fMatches;
        }

        size_t MatchCount() const
        {
                return fMatchCount;
        }

        const regmatch_t* Matches() const
        {
                return fMatches;
        }

private:
        size_t          fMatchCount;
        regmatch_t*     fMatches;
};


// #pragma mark - RegExp


RegExp::RegExp()
        :
        fData(NULL)
{
}


RegExp::RegExp(const char* pattern, PatternType patternType,
        bool caseSensitive)
        :
        fData(NULL)
{
        SetPattern(pattern, patternType, caseSensitive);
}


RegExp::RegExp(const RegExp& other)
        :
        fData(other.fData)
{
        if (fData != NULL)
                fData->AcquireReference();
}


RegExp::~RegExp()
{
        if (fData != NULL)
                fData->ReleaseReference();
}


bool
RegExp::SetPattern(const char* pattern, PatternType patternType,
        bool caseSensitive)
{
        if (fData != NULL) {
                fData->ReleaseReference();
                fData = NULL;
        }

        Data* newData = new(std::nothrow) Data(pattern, patternType, caseSensitive);
        if (newData == NULL)
                return false;

        BReference<Data> dataReference(newData, true);
        if (!newData->IsValid())
                return false;

        fData = dataReference.Detach();
        return true;
}


RegExp::MatchResult
RegExp::Match(const char* string) const
{
        if (!IsValid())
                return MatchResult();

        return MatchResult(
                new(std::nothrow) MatchResultData(fData->CompiledExpression(),
                        string));
}


RegExp&
RegExp::operator=(const RegExp& other)
{
        if (fData != NULL)
                fData->ReleaseReference();

        fData = other.fData;

        if (fData != NULL)
                fData->AcquireReference();

        return *this;
}


// #pragma mark - RegExp::MatchResult


RegExp::MatchResult::MatchResult()
        :
        fData(NULL)
{
}


RegExp::MatchResult::MatchResult(MatchResultData* data)
        :
        fData(data)
{
}


RegExp::MatchResult::MatchResult(const MatchResult& other)
        :
        fData(other.fData)
{
        if (fData != NULL)
                fData->AcquireReference();
}


RegExp::MatchResult::~MatchResult()
{
        if (fData != NULL)
                fData->ReleaseReference();
}


bool
RegExp::MatchResult::HasMatched() const
{
        return fData != NULL && fData->MatchCount() > 0;
}


size_t
RegExp::MatchResult::StartOffset() const
{
        return fData != NULL && fData->MatchCount() > 0
                ? fData->Matches()[0].rm_so : 0;
}


size_t
RegExp::MatchResult::EndOffset() const
{
        return fData != NULL && fData->MatchCount() > 0
                ? fData->Matches()[0].rm_eo : 0;
}


size_t
RegExp::MatchResult::GroupCount() const
{
        if (fData == NULL)
                return 0;

        size_t matchCount = fData->MatchCount();
        return matchCount > 0 ? matchCount - 1 : 0;
}


size_t
RegExp::MatchResult::GroupStartOffsetAt(size_t index) const
{
        return fData != NULL && fData->MatchCount() > index + 1
                ? fData->Matches()[index + 1].rm_so : 0;
}


size_t
RegExp::MatchResult::GroupEndOffsetAt(size_t index) const
{
        return fData != NULL && fData->MatchCount() > index + 1
                ? fData->Matches()[index + 1].rm_eo : 0;
}


RegExp::MatchResult&
RegExp::MatchResult::operator=(const MatchResult& other)
{
        if (fData != NULL)
                fData->ReleaseReference();

        fData = other.fData;

        if (fData != NULL)
                fData->AcquireReference();

        return *this;
}