#include "Parser.h"
#include "Pattern.h"
#include "PatternList.h"
#include "RPattern.h"
#include "RPatternList.h"
#include "Range.h"
#include "Rule.h"
#include "Err.h"
#include <String.h>
#include <new>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
using namespace BPrivate::Storage::Sniffer;
static bool
isHexChar(char ch)
{
return ('0' <= ch && ch <= '9')
|| ('a' <= ch && ch <= 'f')
|| ('A' <= ch && ch <= 'F');
}
static bool
isWhiteSpace(char ch)
{
return ch == ' ' || ch == '\n' || ch == '\t';
}
static bool
isOctalChar(char ch)
{
return '0' <= ch && ch <= '7';
}
static bool
isDecimalChar(char ch)
{
return '0' <= ch && ch <= '9';
}
static bool
isPunctuation(char ch)
{
switch (ch) {
case '&':
case '(':
case ')':
case ':':
case '[':
case ']':
case '|':
return true;
default:
return false;
}
}
static char
escapeChar(char ch)
{
switch (ch) {
case 'a':
return '\a';
case 'b':
return '\b';
case 'f':
return '\f';
case 'n':
return '\n';
case 'r':
return '\r';
case 't':
return '\t';
case 'v':
return '\v';
default:
return ch;
}
}
static char
hexToChar(char hex)
{
if ('0' <= hex && hex <= '9') {
return hex - '0';
} else if ('a' <= hex && hex <= 'f') {
return hex - 'a' + 10;
} else if ('A' <= hex && hex <= 'F') {
return hex - 'A' + 10;
} else {
throw new Err(std::string("Sniffer parser error: invalid hex digit '") + hex
+ "' passed to hexToChar()", -1);
}
}
static char
hexToChar(char hi, char low)
{
return (hexToChar(hi) << 4) | hexToChar(low);
}
static char
octalToChar(char hi, char mid, char low)
{
if (isOctalChar(hi) && isOctalChar(mid) && isOctalChar(low)) {
if ((hi - '0') <= 3) {
return ((hi - '0') << 6) | ((mid - '0') << 3) | (low - '0');
} else {
throw new Err("Sniffer pattern error: invalid octal literal (octals must be between "
"octal 0 and octal 377 inclusive)", -1);
}
} else {
throw new Err(
std::string("Sniffer parser error: invalid octal digit passed to hexToChar()"), -1);
}
}
static char
octalToChar(char hi, char low)
{
return octalToChar('0', hi, low);
}
static char
octalToChar(char octal)
{
return octalToChar('0', '0', octal);
}
status_t
BPrivate::Storage::Sniffer::parse(const char* rule, Rule* result, BString* parseError)
{
Parser parser;
return parser.Parse(rule, result, parseError);
}
Token::Token(TokenType type, const ssize_t pos)
:
fType(type),
fPos(pos)
{
}
Token::~Token()
{
}
TokenType
Token::Type() const
{
return fType;
}
const std::string&
Token::String() const
{
throw new Err("Sniffer scanner error: Token::String() called on non-string token", fPos);
}
int32
Token::Int() const
{
throw new Err("Sniffer scanner error: Token::Int() called on non-integer token", fPos);
}
double
Token::Float() const
{
throw new Err("Sniffer scanner error: Token::Float() called on non-float token", fPos);
}
ssize_t
Token::Pos() const
{
return fPos;
}
bool
Token::operator==(Token& ref) const
{
if (Type() == ref.Type()) {
switch (Type()) {
case CharacterString:
return String() == ref.String();
case Integer:
return Int() == ref.Int();
case FloatingPoint:
return Float() == ref.Float();
default:
return true;
}
} else {
return false;
}
}
StringToken::StringToken(const std::string& str, const ssize_t pos)
:
Token(CharacterString, pos),
fString(str)
{
}
StringToken::~StringToken()
{
}
const std::string&
StringToken::String() const
{
return fString;
}
IntToken::IntToken(const int32 value, const ssize_t pos)
:
Token(Integer, pos),
fValue(value)
{
}
IntToken::~IntToken()
{
}
int32
IntToken::Int() const
{
return fValue;
}
double
IntToken::Float() const
{
return (double)fValue;
}
FloatToken::FloatToken(const double value, const ssize_t pos)
:
Token(FloatingPoint, pos),
fValue(value)
{
}
FloatToken::~FloatToken()
{
}
double
FloatToken::Float() const
{
return fValue;
}
TokenStream::TokenStream(const BString& string)
:
fCStatus(B_NO_INIT),
fPos(-1),
fStrLen(-1)
{
SetTo(string);
}
TokenStream::TokenStream()
:
fCStatus(B_NO_INIT),
fPos(-1),
fStrLen(-1)
{
}
TokenStream::~TokenStream()
{
Unset();
}
status_t
TokenStream::SetTo(const BString& string)
{
Unset();
fStrLen = string.Length();
CharStream stream(string);
typedef enum TokenStreamScannerState {
tsssStart,
tsssOneSingle,
tsssOneDouble,
tsssOneZero,
tsssZeroX,
tsssOneHex,
tsssTwoHex,
tsssIntOrFloat,
tsssFloat,
tsssLonelyDecimalPoint,
tsssLonelyMinusOrPlus,
tsssLonelyFloatExtension,
tsssLonelyFloatExtensionWithSign,
tsssExtendedFloat,
tsssUnquoted,
tsssEscape,
tsssEscapeX,
tsssEscapeOneOctal,
tsssEscapeTwoOctal,
tsssEscapeOneHex,
} TokenStreamScannerState;
TokenStreamScannerState state = tsssStart;
TokenStreamScannerState escapedState = tsssStart;
std::string charStr = "";
char lastChar = 0;
char lastLastChar = 0;
bool keepLooping = true;
ssize_t startPos = 0;
while (keepLooping) {
ssize_t pos = stream.Pos();
char ch = stream.Get();
switch (state) {
case tsssStart:
startPos = pos;
switch (ch) {
case 0x3:
if (stream.IsEmpty())
keepLooping = false;
else
throw new Err(std::string("Sniffer pattern error: invalid character '") + ch + "'", pos);
break;
case '\t':
case '\n':
case ' ':
break;
case '"':
charStr = "";
state = tsssOneDouble;
break;
case '\'':
charStr = "";
state = tsssOneSingle;
break;
case '+':
case '-':
charStr = ch;
lastChar = ch;
state = tsssLonelyMinusOrPlus;
break;
case '.':
charStr = ch;
state = tsssLonelyDecimalPoint;
break;
case '0':
charStr = ch;
state = tsssOneZero;
break;
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
charStr = ch;
state = tsssIntOrFloat;
break;
case '&': AddToken(Ampersand, pos); break;
case '(': AddToken(LeftParen, pos); break;
case ')': AddToken(RightParen, pos); break;
case ':': AddToken(Colon, pos); break;
case '[': AddToken(LeftBracket, pos); break;
case '\\':
charStr = "";
state = tsssEscape;
escapedState = tsssUnquoted;
break;
case ']': AddToken(RightBracket, pos); break;
case '|': AddToken(Divider, pos); break;
default:
throw new Err(std::string("Sniffer pattern error: invalid character '") + ch + "'", pos);
}
break;
case tsssOneSingle:
switch (ch) {
case '\\':
escapedState = state;
state = tsssEscape;
break;
case '\'':
AddString(charStr, startPos);
state = tsssStart;
break;
case 0x3:
if (stream.IsEmpty())
throw new Err(std::string("Sniffer pattern error: unterminated single-quoted string"), pos);
else
charStr += ch;
break;
default:
charStr += ch;
break;
}
break;
case tsssOneDouble:
switch (ch) {
case '\\':
escapedState = state;
state = tsssEscape;
break;
case '"':
AddString(charStr, startPos);
state = tsssStart;
break;
case 0x3:
if (stream.IsEmpty())
throw new Err(std::string("Sniffer pattern error: unterminated double-quoted string"), pos);
else
charStr += ch;
break;
default:
charStr += ch;
break;
}
break;
case tsssOneZero:
if (ch == 'x') {
charStr = "";
state = tsssZeroX;
} else if ('0' <= ch && ch <= '9') {
charStr += ch;
state = tsssIntOrFloat;
} else if (ch == '.') {
charStr += ch;
state = tsssFloat;
} else if (ch == 'e' || ch == 'E') {
charStr += ch;
state = tsssLonelyFloatExtension;
} else {
AddInt(charStr.c_str(), startPos);
stream.Unget();
state = tsssStart;
}
break;
case tsssZeroX:
if (isHexChar(ch)) {
lastChar = ch;
state = tsssOneHex;
} else
throw new Err(std::string("Sniffer pattern error: incomplete hex code"), pos);
break;
case tsssOneHex:
if (isHexChar(ch)) {
try {
charStr += hexToChar(lastChar, ch);
} catch (Err* err) {
if (err)
err->SetPos(pos);
throw err;
}
state = tsssTwoHex;
} else {
throw new Err(std::string("Sniffer pattern error: bad hex literal"),
pos);
}
break;
case tsssTwoHex:
if (isHexChar(ch)) {
lastChar = ch;
state = tsssOneHex;
} else {
AddString(charStr, startPos);
stream.Unget();
state = tsssStart;
}
break;
case tsssIntOrFloat:
if (isDecimalChar(ch)) {
charStr += ch;
} else if (ch == '.') {
charStr += ch;
state = tsssFloat;
} else if (ch == 'e' || ch == 'E') {
charStr += ch;
state = tsssLonelyFloatExtension;
} else {
AddInt(charStr.c_str(), startPos);
stream.Unget();
state = tsssStart;
}
break;
case tsssFloat:
if (isDecimalChar(ch)) {
charStr += ch;
} else if (ch == 'e' || ch == 'E') {
charStr += ch;
state = tsssLonelyFloatExtension;
} else {
AddFloat(charStr.c_str(), startPos);
stream.Unget();
state = tsssStart;
}
break;
case tsssLonelyDecimalPoint:
if (isDecimalChar(ch)) {
charStr += ch;
state = tsssFloat;
} else
throw new Err(std::string("Sniffer pattern error: incomplete floating point number"), pos);
break;
case tsssLonelyMinusOrPlus:
if (isDecimalChar(ch)) {
charStr += ch;
state = tsssIntOrFloat;
} else if (ch == '.') {
charStr += ch;
state = tsssLonelyDecimalPoint;
} else if (ch == 'i' && lastChar == '-') {
AddToken(CaseInsensitiveFlag, startPos);
state = tsssStart;
} else
throw new Err(std::string("Sniffer pattern error: incomplete signed number or invalid flag"), pos);
break;
case tsssLonelyFloatExtension:
if (ch == '+' || ch == '-') {
charStr += ch;
state = tsssLonelyFloatExtensionWithSign;
} else if (isDecimalChar(ch)) {
charStr += ch;
state = tsssExtendedFloat;
} else
throw new Err(std::string("Sniffer pattern error: incomplete extended-notation floating point number"), pos);
break;
case tsssLonelyFloatExtensionWithSign:
if (isDecimalChar(ch)) {
charStr += ch;
state = tsssExtendedFloat;
} else
throw new Err(std::string("Sniffer pattern error: incomplete extended-notation floating point number"), pos);
break;
case tsssExtendedFloat:
if (isDecimalChar(ch)) {
charStr += ch;
state = tsssExtendedFloat;
} else {
AddFloat(charStr.c_str(), startPos);
stream.Unget();
state = tsssStart;
}
break;
case tsssUnquoted:
if (ch == '\\') {
escapedState = state;
state = tsssEscape;
} else if (isWhiteSpace(ch) || isPunctuation(ch)) {
AddString(charStr, startPos);
stream.Unget();
state = tsssStart;
} else if (ch == 0x3 && stream.IsEmpty()) {
AddString(charStr, startPos);
keepLooping = false;
} else {
charStr += ch;
}
break;
case tsssEscape:
if (isOctalChar(ch)) {
lastChar = ch;
state = tsssEscapeOneOctal;
} else if (ch == 'x') {
state = tsssEscapeX;
} else {
if (ch == 0x3 && stream.IsEmpty())
throw new Err(std::string("Sniffer pattern error: incomplete escape sequence"), pos);
else {
charStr += escapeChar(ch);
state = escapedState;
}
}
break;
case tsssEscapeX:
if (isHexChar(ch)) {
lastChar = ch;
state = tsssEscapeOneHex;
} else
throw new Err(std::string("Sniffer pattern error: incomplete escaped hex code"), pos);
break;
case tsssEscapeOneOctal:
if (isOctalChar(ch)) {
lastLastChar = lastChar;
lastChar = ch;
state = tsssEscapeTwoOctal;
} else {
try {
charStr += octalToChar(lastChar);
} catch (Err* err) {
if (err)
err->SetPos(startPos);
throw err;
}
stream.Unget();
state = escapedState;
}
break;
case tsssEscapeTwoOctal:
if (isOctalChar(ch)) {
try {
charStr += octalToChar(lastLastChar, lastChar, ch);
} catch (Err* err) {
if (err)
err->SetPos(startPos);
throw err;
}
state = escapedState;
} else {
try {
charStr += octalToChar(lastLastChar, lastChar);
} catch (Err* err) {
if (err)
err->SetPos(startPos);
throw err;
}
stream.Unget();
state = escapedState;
}
break;
case tsssEscapeOneHex:
if (isHexChar(ch)) {
try {
charStr += hexToChar(lastChar, ch);
} catch (Err* err) {
if (err)
err->SetPos(pos);
throw err;
}
state = escapedState;
} else
throw new Err(std::string("Sniffer pattern error: incomplete escaped hex code"), pos);
break;
}
}
if (state == tsssStart) {
fCStatus = B_OK;
fPos = 0;
} else {
throw new Err("Sniffer pattern error: unterminated rule", stream.Pos());
}
return fCStatus;
}
void
TokenStream::Unset()
{
std::vector<Token*>::iterator i;
for (i = fTokenList.begin(); i != fTokenList.end(); i++)
delete *i;
fTokenList.clear();
fCStatus = B_NO_INIT;
fStrLen = -1;
}
status_t
TokenStream::InitCheck() const
{
return fCStatus;
}
const Token*
TokenStream::Get()
{
if (fCStatus != B_OK)
throw new Err("Sniffer parser error: TokenStream::Get() called on uninitialized TokenStream object", -1);
if (fPos < (ssize_t)fTokenList.size())
return fTokenList[fPos++];
else {
throw new Err("Sniffer pattern error: unterminated rule", EndPos());
}
}
void
TokenStream::Unget()
{
if (fCStatus != B_OK)
throw new Err("Sniffer parser error: TokenStream::Unget() called on uninitialized TokenStream object", -1);
if (fPos > 0)
fPos--;
else
throw new Err("Sniffer parser error: TokenStream::Unget() called at beginning of token stream", -1);
}
void
TokenStream::Read(TokenType type)
{
const Token* t = Get();
if (t->Type() != type) {
throw new Err((std::string("Sniffer pattern error: expected ") + tokenTypeToString(type)
+ ", found " + tokenTypeToString(t->Type())).c_str(), t->Pos());
}
}
bool
TokenStream::CondRead(TokenType type)
{
const Token* t = Get();
if (t->Type() == type) {
return true;
} else {
Unget();
return false;
}
}
ssize_t
TokenStream::Pos() const
{
return fPos < (ssize_t)fTokenList.size() ? fTokenList[fPos]->Pos() : fStrLen;
}
ssize_t
TokenStream::EndPos() const
{
return fStrLen;
}
bool
TokenStream::IsEmpty() const
{
return fCStatus != B_OK || fPos >= (ssize_t)fTokenList.size();
}
void
TokenStream::AddToken(TokenType type, ssize_t pos)
{
Token* token = new Token(type, pos);
fTokenList.push_back(token);
}
void
TokenStream::AddString(const std::string& str, ssize_t pos)
{
Token* token = new StringToken(str, pos);
fTokenList.push_back(token);
}
void
TokenStream::AddInt(const char* str, ssize_t pos)
{
int32 value = atol(str);
Token* token = new IntToken(value, pos);
fTokenList.push_back(token);
}
void
TokenStream::AddFloat(const char* str, ssize_t pos)
{
double value = atof(str);
Token* token = new FloatToken(value, pos);
fTokenList.push_back(token);
}
const char*
BPrivate::Storage::Sniffer::tokenTypeToString(TokenType type)
{
switch (type) {
case LeftParen:
return "LeftParen";
break;
case RightParen:
return "RightParen";
break;
case LeftBracket:
return "LeftBracket";
break;
case RightBracket:
return "RightBracket";
break;
case Colon:
return "Colon";
break;
case Divider:
return "Divider";
break;
case Ampersand:
return "Ampersand";
break;
case CaseInsensitiveFlag:
return "CaseInsensitiveFlag";
break;
case CharacterString:
return "CharacterString";
break;
case Integer:
return "Integer";
break;
case FloatingPoint:
return "FloatingPoint";
break;
default:
return "UNKNOWN TOKEN TYPE";
break;
}
}
Parser::Parser()
:
fOutOfMemErr(new(std::nothrow) Err("Sniffer parser error: out of memory", -1))
{
}
Parser::~Parser()
{
delete fOutOfMemErr;
}
status_t
Parser::Parse(const char* rule, Rule* result, BString* parseError)
{
try {
if (!rule)
throw new Err("Sniffer pattern error: NULL pattern", -1);
if (!result)
return B_BAD_VALUE;
if (stream.SetTo(rule) != B_OK)
throw new Err("Sniffer parser error: Unable to intialize token stream", -1);
ParseRule(result);
return B_OK;
} catch (Err* err) {
if (parseError)
parseError->SetTo(ErrorMessage(err, rule).c_str());
delete err;
return rule ? (status_t)B_BAD_MIME_SNIFFER_RULE : (status_t)B_BAD_VALUE;
}
}
std::string
Parser::ErrorMessage(Err* err, const char* rule)
{
const char* msg = (err && err->Msg())
? err->Msg()
: "Sniffer parser error: Unexpected error with no supplied error message";
ssize_t pos = err && (err->Pos() >= 0) ? err->Pos() : 0;
std::string str = std::string(rule ? rule : "") + "\n";
for (int i = 0; i < pos; i++)
str += " ";
str += "^ ";
str += msg;
return str;
}
void
Parser::ParseRule(Rule* result)
{
if (!result)
throw new Err("Sniffer parser error: NULL Rule object passed to Parser::ParseRule()", -1);
double priority = ParsePriority();
std::vector<DisjList*>* list = ParseConjList();
result->SetTo(priority, list);
}
double
Parser::ParsePriority()
{
const Token* t = stream.Get();
if (t->Type() == FloatingPoint || t->Type() == Integer) {
double result = t->Float();
if (0.0 <= result && result <= 1.0) {
return result;
} else {
throw new Err("Sniffer pattern error: invalid priority", t->Pos());
}
} else {
throw new Err("Sniffer pattern error: match level expected", t->Pos());
}
}
std::vector<DisjList*>*
Parser::ParseConjList()
{
std::vector<DisjList*>* list = new(std::nothrow) std::vector<DisjList*>;
if (!list)
ThrowOutOfMemError(stream.Pos());
try {
int count = 0;
while (true) {
DisjList* expr = ParseDisjList();
if (!expr) {
break;
} else {
list->push_back(expr);
count++;
}
}
if (count == 0)
throw new Err("Sniffer pattern error: missing expression", -1);
} catch (...) {
delete list;
throw;
}
return list;
}
DisjList*
Parser::ParseDisjList()
{
if (stream.IsEmpty())
return NULL;
const Token* t1 = stream.Get();
if (t1->Type() == LeftParen) {
const Token* t2 = stream.Get();
const Token* tokenOfInterest = (t2->Type() == CaseInsensitiveFlag) ? stream.Get() : t2;
if (t2 != tokenOfInterest)
stream.Unget();
stream.Unget();
stream.Unget();
if (tokenOfInterest->Type() == LeftBracket) {
return ParseRPatternList();
} else {
return ParsePatternList(Range(0, 0));
}
} else if (t1->Type() == LeftBracket) {
stream.Unget();
return ParsePatternList(ParseRange());
} else {
throw new Err("Sniffer pattern error: missing pattern", t1->Pos());
}
}
Range
Parser::ParseRange()
{
int32 start, end;
stream.Read(LeftBracket);
{
const Token* t = stream.Get();
if (t->Type() == Integer) {
start = t->Int();
end = start;
} else {
throw new Err("Sniffer pattern error: pattern offset expected", t->Pos());
}
}
{
const Token* t = stream.Get();
if (t->Type() == Colon) {
{
const Token* t = stream.Get();
if (t->Type() == Integer)
end = t->Int();
else
ThrowUnexpectedTokenError(Integer, t);
}
stream.Read(RightBracket);
} else if (t->Type() == RightBracket) {
} else {
ThrowUnexpectedTokenError(Colon, Integer, t);
}
}
Range range(start, end);
if (range.InitCheck() == B_OK)
return range;
else
throw range.GetErr();
}
DisjList*
Parser::ParsePatternList(Range range)
{
PatternList* list = new(std::nothrow) PatternList(range);
if (!list)
ThrowOutOfMemError(stream.Pos());
try {
stream.Read(LeftParen);
while (true) {
if (stream.CondRead(CaseInsensitiveFlag))
list->SetCaseInsensitive(true);
list->Add(ParsePattern());
if (!stream.CondRead(Divider))
break;
}
const Token* t = stream.Get();
if (t->Type() != RightParen)
throw new Err("Sniffer pattern error: expecting '|', ')', or possibly '&'", t->Pos());
} catch (...) {
delete list;
throw;
}
return list;
}
DisjList*
Parser::ParseRPatternList()
{
RPatternList* list = new(std::nothrow) RPatternList();
if (!list)
ThrowOutOfMemError(stream.Pos());
try {
stream.Read(LeftParen);
while (true) {
if (stream.CondRead(CaseInsensitiveFlag))
list->SetCaseInsensitive(true);
list->Add(ParseRPattern());
if (!stream.CondRead(Divider))
break;
}
const Token* t = stream.Get();
if (t->Type() != RightParen)
throw new Err("Sniffer pattern error: expecting '|', ')', or possibly '&'", t->Pos());
} catch (...) {
delete list;
throw;
}
return list;
}
RPattern*
Parser::ParseRPattern()
{
Range range = ParseRange();
Pattern* pattern = ParsePattern();
RPattern* result = new(std::nothrow) RPattern(range, pattern);
if (result) {
if (result->InitCheck() == B_OK) {
return result;
} else {
Err* err = result->GetErr();
delete result;
throw err;
}
} else {
ThrowOutOfMemError(stream.Pos());
}
return NULL;
}
Pattern*
Parser::ParsePattern()
{
std::string str;
{
const Token* t = stream.Get();
if (t->Type() == CharacterString)
str = t->String();
else
throw new Err("Sniffer pattern error: missing pattern", t->Pos());
}
if (stream.CondRead(Ampersand)) {
const Token* t = stream.Get();
if (t->Type() == CharacterString) {
Pattern* result = new(std::nothrow) Pattern(str, t->String());
if (!result)
ThrowOutOfMemError(t->Pos());
if (result->InitCheck() == B_OK) {
return result;
} else {
Err* err = result->GetErr();
delete result;
if (err)
err->SetPos(t->Pos());
throw err;
}
} else {
ThrowUnexpectedTokenError(CharacterString, t);
}
} else {
Pattern* result = new(std::nothrow) Pattern(str);
if (result) {
if (result->InitCheck() == B_OK) {
return result;
} else {
Err* err = result->GetErr();
delete result;
throw err;
}
} else {
ThrowOutOfMemError(stream.Pos());
}
}
return NULL;
}
void
Parser::ThrowEndOfStreamError()
{
throw new Err("Sniffer pattern error: unterminated rule", stream.EndPos());
}
inline void
Parser::ThrowOutOfMemError(ssize_t pos)
{
if (fOutOfMemErr)
fOutOfMemErr->SetPos(pos);
Err* err = fOutOfMemErr;
fOutOfMemErr = NULL;
throw err;
}
void
Parser::ThrowUnexpectedTokenError(TokenType expected, const Token* found)
{
throw new Err((std::string("Sniffer pattern error: expected ") + tokenTypeToString(expected)
+ ", found " + (found ? tokenTypeToString(found->Type()) : "NULL token")).c_str()
, (found ? found->Pos() : stream.EndPos()));
}
void
Parser::ThrowUnexpectedTokenError(TokenType expected1, TokenType expected2, const Token* found)
{
throw new Err((std::string("Sniffer pattern error: expected ") + tokenTypeToString(expected1)
+ " or " + tokenTypeToString(expected2) + ", found "
+ (found ? tokenTypeToString(found->Type()) : "NULL token")).c_str()
, (found ? found->Pos() : stream.EndPos()));
}