wesnoth/src/formula_tokenizer.cpp
2012-01-07 02:35:17 +00:00

288 lines
7.1 KiB
C++

/* $Id$ */
/*
Copyright (C) 2007 - 2012 by David White <dave.net>
Part of the Silver Tree Project
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by or later.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY.
See the COPYING file for more details.
*/
#include <sstream>
#include "foreach.hpp"
#include "formula_tokenizer.hpp"
namespace formula_tokenizer
{
namespace {
void raise_exception(iterator& i1, iterator i2, std::string str) {
std::ostringstream expr;
while( (i1 != i2) && (*i1 != '\n') ) {
if( (*i1 != '\t') )
expr << *i1;
++i1;
}
if( str.empty() )
throw token_error("Unrecognized token", expr.str() );
else
throw token_error(str, expr.str() );
}
}
token get_token(iterator& i1, iterator i2) {
iterator it = i1;
if( *i1 >= 'A' ) {
//current character is >= 'A', limit search to the upper-half of the ASCII table
// check if we parse now TOKEN_IDENTIFIER or TOKEN_OPERATOR/KEYWORD based on string
if( *i1 <= 'Z' || ( *i1 >= 'a' && *it <= 'z' ) || *i1 == '_' ) {
while( i1 != i2 && ( ( *i1 >= 'a' && *i1 <= 'z' ) || *i1 == '_' || ( *i1 >= 'A' && *i1 <= 'Z' ) ) )
++i1;
int diff = i1 - it;
TOKEN_TYPE t = TOKEN_IDENTIFIER;
//check if this string matches any keyword or an operator
//possible opearators and keywords:
// d, or, def, and, not, fai, where, faiend, functions
if( diff == 1 ) {
if( *it == 'd' )
t = TOKEN_OPERATOR;
} else if( diff == 2 ) {
if( *it == 'o' && *(it+1) == 'r' )
t = TOKEN_OPERATOR;
} else if( diff == 3 ) {
if( *it == 'd' ) { //def
if( *(it+1) == 'e' && *(it+2) == 'f' )
t = TOKEN_KEYWORD;
} else if( *it == 'a' ) { //and
if( *(it+1) == 'n' && *(it+2) == 'd' )
t = TOKEN_OPERATOR;
} else if( *it == 'n' ) { //not
if( *(it+1) == 'o' && *(it+2) == 't' )
t = TOKEN_OPERATOR;
} else if( *it == 'f' ) { //fai
if( *(it+1) == 'a' && *(it+2) == 'i' )
t = TOKEN_KEYWORD;
}
} else if( diff == 5 ) {
std::string s(it, i1);
if( s == "where" )
t = TOKEN_OPERATOR;
} else if( diff == 6 ) {
std::string s(it, i1);
if( s == "faiend" )
t = TOKEN_KEYWORD;
} else if( diff == 9 ) {
std::string s(it, i1);
if( s == "functions" )
t = TOKEN_KEYWORD;
}
return token( it, i1, t);
} else {
//at this point only 3 chars left to check:
if( *i1 == '[' )
return token( it, ++i1, TOKEN_LSQUARE );
if( *i1 == ']' )
return token( it, ++i1, TOKEN_RSQUARE );
if( *i1 == '^' )
return token( it, ++i1, TOKEN_OPERATOR );
}
} else {
//limit search to the lower-half of the ASCII table
//start by checking for whitespaces/end of line char
if( *i1 <= ' ' ) {
if( *i1 == '\n' ) {
return token( it, ++i1, TOKEN_EOL);
} else {
while( i1 != i2 && *i1 <= ' ' && *i1 != '\n' )
++i1;
return token( it, i1, TOKEN_WHITESPACE );
}
//try to further limit number of characters that we need to check:
} else if ( *i1 >= '0' ){
//current character is between '0' and '@'
if( *i1 <= '9' ) {
//we parse integer or decimal number
++i1;
bool dot = false;
while( i1 != i2 ) {
if( *i1 >= '0' && *i1 <= '9' ) {
//do nothing
} else {
//look for '.' in case of decimal numer
if( *i1 == '.' ) {
//allow only one dot in such expression
if( !dot )
dot = true;
else
raise_exception(it, i2, "Multiple dots near decimal expression");
} else
break;
}
++i1;
}
if( dot )
return token( it, i1, TOKEN_DECIMAL );
else
return token( it, i1, TOKEN_INTEGER );
} else {
//current character is between ':' and '@'
//possible tokens at this point that we are intersted with:
// ; < = > <= >=
if( *i1 == ';' ) {
return token( it, ++i1, TOKEN_SEMICOLON);
} else if( *i1 == '=' ) {
return token( it, ++i1, TOKEN_OPERATOR);
} else if( *i1 == '<' ) {
++i1;
if( i1 != i2 ) {
if( *i1 == '=' )
return token( it, ++i1, TOKEN_OPERATOR);
else
return token( it, i1, TOKEN_OPERATOR);
} else
return token( it, i1, TOKEN_OPERATOR);
} else if( *i1 == '>' ) {
++i1;
if( i1 != i2 ) {
if( *i1 == '=' )
return token( it, ++i1, TOKEN_OPERATOR);
else
return token( it, i1, TOKEN_OPERATOR);
} else
return token( it, i1, TOKEN_OPERATOR);
}
}
//current character is between '!' and '/'
} else if ( *i1 == ',' ) {
return token( it, ++i1, TOKEN_COMMA);
} else if ( *i1 == '.' ) {
++i1;
if( i1 != i2 ) {
if( *i1 == '+' || *i1 == '-' || *i1 == '*' || *i1 == '/')
return token( it, ++i1, TOKEN_OPERATOR );
else
return token( it, i1, TOKEN_OPERATOR );
} else {
return token( it, i1, TOKEN_OPERATOR);
}
} else if ( *i1 == '(' ) {
return token( it, ++i1, TOKEN_LPARENS);
} else if ( *i1 == ')' ) {
return token( it, ++i1, TOKEN_RPARENS);
} else if ( *i1 == '\'' ) {
++i1;
while( i1 != i2 && *i1 != '\'' )
++i1;
if( i1 != i2 ) {
return token( it, ++i1, TOKEN_STRING_LITERAL );
} else {
raise_exception(it, i2, "Missing closing ' for formula string");
}
} else if ( *i1 == '#' ) {
++i1;
while( i1 != i2 && *i1 != '#' )
++i1;
if( i1 != i2 ) {
return token( it, ++i1, TOKEN_COMMENT );
} else {
raise_exception(it, i2, "Missing closing # for formula comment");
}
} else if ( *i1 == '+' ) {
return token( it, ++i1, TOKEN_OPERATOR);
} else if ( *i1 == '-' ) {
++i1;
if( i1 != i2 ) {
if( *i1 == '>' )
return token( it, ++i1, TOKEN_POINTER );
else
return token( it, i1, TOKEN_OPERATOR );
} else {
return token( it, i1, TOKEN_OPERATOR);
}
} else if ( *i1 == '*' ) {
return token( it, ++i1, TOKEN_OPERATOR);
} else if ( *i1 == '/' ) {
return token( it, ++i1, TOKEN_OPERATOR);
} else if ( *i1 == '%' ) {
return token( it, ++i1, TOKEN_OPERATOR);
} else if ( *i1 == '!' ) {
++i1;
if( *i1 == '=' )
return token( it, ++i1, TOKEN_OPERATOR);
else
raise_exception(it, i2, std::string() );
}
}
raise_exception(it, i2, std::string() );
return token();
}
}
#ifdef UNIT_TEST_TOKENIZER
int main()
{
using namespace formula_tokenizer;
std::string test = "(abc + 4 * (5+3))^2";
std::string::const_iterator i1 = test.begin();
std::string::const_iterator i2 = test.end();
TOKEN_TYPE types[] = {TOKEN_LPARENS, TOKEN_IDENTIFIER,
TOKEN_WHITESPACE, TOKEN_OPERATOR,
TOKEN_WHITESPACE, TOKEN_INTEGER,
TOKEN_WHITESPACE, TOKEN_OPERATOR,
TOKEN_WHITESPACE, TOKEN_LPARENS,
TOKEN_INTEGER, TOKEN_OPERATOR,
TOKEN_INTEGER, TOKEN_RPARENS,
TOKEN_RPARENS, TOKEN_KEYWORD,
TOKEN_OPERATOR, TOKEN_INTEGER};
std::string tokens[] = {"(", "abc", " ", "+", " ", "4", " ",
"*", " ", "(", "5", "+", "3", ")", ")", "functions"};
for(int n = 0; n != sizeof(types)/sizeof(*types); ++n) {
token t = get_token(i1,i2);
assert(std::string(t.begin,t.end) == tokens[n]);
assert(t.type == types[n]);
}
return 0;
}
#endif