mirror of
https://github.com/wesnoth/wesnoth
synced 2025-05-11 02:46:32 +00:00
288 lines
7.1 KiB
C++
288 lines
7.1 KiB
C++
/* $Id$ */
|
|
/*
|
|
Copyright (C) 2007 - 2012 by David White <dave.net>
|
|
Part of the Silver Tree Project
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by or later.
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY.
|
|
|
|
See the COPYING file for more details.
|
|
*/
|
|
|
|
#include <sstream>
|
|
|
|
#include "foreach.hpp"
|
|
#include "formula_tokenizer.hpp"
|
|
|
|
namespace formula_tokenizer
|
|
{
|
|
|
|
namespace {
|
|
|
|
void raise_exception(iterator& i1, iterator i2, std::string str) {
|
|
std::ostringstream expr;
|
|
while( (i1 != i2) && (*i1 != '\n') ) {
|
|
if( (*i1 != '\t') )
|
|
expr << *i1;
|
|
++i1;
|
|
}
|
|
|
|
if( str.empty() )
|
|
throw token_error("Unrecognized token", expr.str() );
|
|
else
|
|
throw token_error(str, expr.str() );
|
|
}
|
|
|
|
}
|
|
|
|
token get_token(iterator& i1, iterator i2) {
|
|
|
|
iterator it = i1;
|
|
if( *i1 >= 'A' ) {
|
|
//current character is >= 'A', limit search to the upper-half of the ASCII table
|
|
|
|
// check if we parse now TOKEN_IDENTIFIER or TOKEN_OPERATOR/KEYWORD based on string
|
|
if( *i1 <= 'Z' || ( *i1 >= 'a' && *it <= 'z' ) || *i1 == '_' ) {
|
|
|
|
while( i1 != i2 && ( ( *i1 >= 'a' && *i1 <= 'z' ) || *i1 == '_' || ( *i1 >= 'A' && *i1 <= 'Z' ) ) )
|
|
++i1;
|
|
|
|
int diff = i1 - it;
|
|
TOKEN_TYPE t = TOKEN_IDENTIFIER;
|
|
|
|
//check if this string matches any keyword or an operator
|
|
//possible opearators and keywords:
|
|
// d, or, def, and, not, fai, where, faiend, functions
|
|
if( diff == 1 ) {
|
|
if( *it == 'd' )
|
|
t = TOKEN_OPERATOR;
|
|
} else if( diff == 2 ) {
|
|
if( *it == 'o' && *(it+1) == 'r' )
|
|
t = TOKEN_OPERATOR;
|
|
} else if( diff == 3 ) {
|
|
if( *it == 'd' ) { //def
|
|
if( *(it+1) == 'e' && *(it+2) == 'f' )
|
|
t = TOKEN_KEYWORD;
|
|
} else if( *it == 'a' ) { //and
|
|
if( *(it+1) == 'n' && *(it+2) == 'd' )
|
|
t = TOKEN_OPERATOR;
|
|
} else if( *it == 'n' ) { //not
|
|
if( *(it+1) == 'o' && *(it+2) == 't' )
|
|
t = TOKEN_OPERATOR;
|
|
} else if( *it == 'f' ) { //fai
|
|
if( *(it+1) == 'a' && *(it+2) == 'i' )
|
|
t = TOKEN_KEYWORD;
|
|
}
|
|
} else if( diff == 5 ) {
|
|
std::string s(it, i1);
|
|
if( s == "where" )
|
|
t = TOKEN_OPERATOR;
|
|
} else if( diff == 6 ) {
|
|
std::string s(it, i1);
|
|
if( s == "faiend" )
|
|
t = TOKEN_KEYWORD;
|
|
} else if( diff == 9 ) {
|
|
std::string s(it, i1);
|
|
if( s == "functions" )
|
|
t = TOKEN_KEYWORD;
|
|
}
|
|
|
|
return token( it, i1, t);
|
|
} else {
|
|
//at this point only 3 chars left to check:
|
|
if( *i1 == '[' )
|
|
return token( it, ++i1, TOKEN_LSQUARE );
|
|
|
|
if( *i1 == ']' )
|
|
return token( it, ++i1, TOKEN_RSQUARE );
|
|
|
|
if( *i1 == '^' )
|
|
return token( it, ++i1, TOKEN_OPERATOR );
|
|
|
|
}
|
|
} else {
|
|
//limit search to the lower-half of the ASCII table
|
|
//start by checking for whitespaces/end of line char
|
|
if( *i1 <= ' ' ) {
|
|
if( *i1 == '\n' ) {
|
|
return token( it, ++i1, TOKEN_EOL);
|
|
} else {
|
|
|
|
while( i1 != i2 && *i1 <= ' ' && *i1 != '\n' )
|
|
++i1;
|
|
|
|
return token( it, i1, TOKEN_WHITESPACE );
|
|
}
|
|
//try to further limit number of characters that we need to check:
|
|
} else if ( *i1 >= '0' ){
|
|
//current character is between '0' and '@'
|
|
if( *i1 <= '9' ) {
|
|
//we parse integer or decimal number
|
|
++i1;
|
|
bool dot = false;
|
|
|
|
while( i1 != i2 ) {
|
|
if( *i1 >= '0' && *i1 <= '9' ) {
|
|
//do nothing
|
|
} else {
|
|
//look for '.' in case of decimal numer
|
|
if( *i1 == '.' ) {
|
|
//allow only one dot in such expression
|
|
if( !dot )
|
|
dot = true;
|
|
else
|
|
raise_exception(it, i2, "Multiple dots near decimal expression");
|
|
} else
|
|
break;
|
|
}
|
|
++i1;
|
|
}
|
|
|
|
if( dot )
|
|
return token( it, i1, TOKEN_DECIMAL );
|
|
else
|
|
return token( it, i1, TOKEN_INTEGER );
|
|
|
|
} else {
|
|
//current character is between ':' and '@'
|
|
//possible tokens at this point that we are intersted with:
|
|
// ; < = > <= >=
|
|
|
|
if( *i1 == ';' ) {
|
|
return token( it, ++i1, TOKEN_SEMICOLON);
|
|
} else if( *i1 == '=' ) {
|
|
return token( it, ++i1, TOKEN_OPERATOR);
|
|
} else if( *i1 == '<' ) {
|
|
++i1;
|
|
if( i1 != i2 ) {
|
|
if( *i1 == '=' )
|
|
return token( it, ++i1, TOKEN_OPERATOR);
|
|
else
|
|
return token( it, i1, TOKEN_OPERATOR);
|
|
} else
|
|
return token( it, i1, TOKEN_OPERATOR);
|
|
} else if( *i1 == '>' ) {
|
|
++i1;
|
|
if( i1 != i2 ) {
|
|
if( *i1 == '=' )
|
|
return token( it, ++i1, TOKEN_OPERATOR);
|
|
else
|
|
return token( it, i1, TOKEN_OPERATOR);
|
|
} else
|
|
return token( it, i1, TOKEN_OPERATOR);
|
|
}
|
|
}
|
|
//current character is between '!' and '/'
|
|
} else if ( *i1 == ',' ) {
|
|
return token( it, ++i1, TOKEN_COMMA);
|
|
|
|
} else if ( *i1 == '.' ) {
|
|
++i1;
|
|
|
|
if( i1 != i2 ) {
|
|
if( *i1 == '+' || *i1 == '-' || *i1 == '*' || *i1 == '/')
|
|
return token( it, ++i1, TOKEN_OPERATOR );
|
|
else
|
|
return token( it, i1, TOKEN_OPERATOR );
|
|
} else {
|
|
return token( it, i1, TOKEN_OPERATOR);
|
|
}
|
|
|
|
} else if ( *i1 == '(' ) {
|
|
return token( it, ++i1, TOKEN_LPARENS);
|
|
|
|
} else if ( *i1 == ')' ) {
|
|
return token( it, ++i1, TOKEN_RPARENS);
|
|
|
|
} else if ( *i1 == '\'' ) {
|
|
++i1;
|
|
while( i1 != i2 && *i1 != '\'' )
|
|
++i1;
|
|
|
|
if( i1 != i2 ) {
|
|
return token( it, ++i1, TOKEN_STRING_LITERAL );
|
|
} else {
|
|
raise_exception(it, i2, "Missing closing ' for formula string");
|
|
}
|
|
|
|
} else if ( *i1 == '#' ) {
|
|
++i1;
|
|
while( i1 != i2 && *i1 != '#' )
|
|
++i1;
|
|
|
|
if( i1 != i2 ) {
|
|
return token( it, ++i1, TOKEN_COMMENT );
|
|
} else {
|
|
raise_exception(it, i2, "Missing closing # for formula comment");
|
|
}
|
|
|
|
} else if ( *i1 == '+' ) {
|
|
return token( it, ++i1, TOKEN_OPERATOR);
|
|
|
|
} else if ( *i1 == '-' ) {
|
|
++i1;
|
|
|
|
if( i1 != i2 ) {
|
|
if( *i1 == '>' )
|
|
return token( it, ++i1, TOKEN_POINTER );
|
|
else
|
|
return token( it, i1, TOKEN_OPERATOR );
|
|
} else {
|
|
return token( it, i1, TOKEN_OPERATOR);
|
|
}
|
|
|
|
} else if ( *i1 == '*' ) {
|
|
return token( it, ++i1, TOKEN_OPERATOR);
|
|
|
|
} else if ( *i1 == '/' ) {
|
|
return token( it, ++i1, TOKEN_OPERATOR);
|
|
|
|
} else if ( *i1 == '%' ) {
|
|
return token( it, ++i1, TOKEN_OPERATOR);
|
|
|
|
} else if ( *i1 == '!' ) {
|
|
++i1;
|
|
if( *i1 == '=' )
|
|
return token( it, ++i1, TOKEN_OPERATOR);
|
|
else
|
|
raise_exception(it, i2, std::string() );
|
|
}
|
|
}
|
|
raise_exception(it, i2, std::string() );
|
|
return token();
|
|
}
|
|
|
|
}
|
|
|
|
#ifdef UNIT_TEST_TOKENIZER
|
|
|
|
int main()
|
|
{
|
|
using namespace formula_tokenizer;
|
|
std::string test = "(abc + 4 * (5+3))^2";
|
|
std::string::const_iterator i1 = test.begin();
|
|
std::string::const_iterator i2 = test.end();
|
|
TOKEN_TYPE types[] = {TOKEN_LPARENS, TOKEN_IDENTIFIER,
|
|
TOKEN_WHITESPACE, TOKEN_OPERATOR,
|
|
TOKEN_WHITESPACE, TOKEN_INTEGER,
|
|
TOKEN_WHITESPACE, TOKEN_OPERATOR,
|
|
TOKEN_WHITESPACE, TOKEN_LPARENS,
|
|
TOKEN_INTEGER, TOKEN_OPERATOR,
|
|
TOKEN_INTEGER, TOKEN_RPARENS,
|
|
TOKEN_RPARENS, TOKEN_KEYWORD,
|
|
TOKEN_OPERATOR, TOKEN_INTEGER};
|
|
std::string tokens[] = {"(", "abc", " ", "+", " ", "4", " ",
|
|
"*", " ", "(", "5", "+", "3", ")", ")", "functions"};
|
|
for(int n = 0; n != sizeof(types)/sizeof(*types); ++n) {
|
|
token t = get_token(i1,i2);
|
|
assert(std::string(t.begin,t.end) == tokens[n]);
|
|
assert(t.type == types[n]);
|
|
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
#endif
|