formula_tokenizer.cpp

Go to the documentation of this file.
00001 /* $Id: formula_tokenizer.cpp 25713 2008-04-09 18:36:16Z dragonking $ */
00002 /*
00003    Copyright (C) 2007 - 2008 by David White <dave.net>
00004    Part of the Silver Tree Project
00005 
00006    This program is free software; you can redistribute it and/or modify
00007    it under the terms of the GNU General Public License version 2 or later.
00008    This program is distributed in the hope that it will be useful,
00009    but WITHOUT ANY WARRANTY.
00010 
00011    See the COPYING file for more details.
00012 */
00013 
00014 #include <iostream>
00015 
00016 #include <boost/regex.hpp>
00017 
00018 #include "foreach.hpp"
00019 #include "formula_tokenizer.hpp"
00020 
00021 namespace formula_tokenizer
00022 {
00023 
00024 namespace {
00025 
00026 using boost::regex;
00027 
00028 struct token_type {
00029     regex re;
00030     TOKEN_TYPE type;
00031 };
00032 
00033 //create the array with list of possible tokens
00034 token_type token_types[] = { { regex("^(not\\b|and\\b|or\\b|where\\b|d(?=[^a-zA-Z])|\\*|\\+|-(?=[^>])|\\^|%|/|<=|>=|<|>|!=|=|\\.)"), TOKEN_OPERATOR },
00035                 { regex("^functions\\b"),  TOKEN_KEYWORD },
00036                 { regex("^def\\b"),        TOKEN_KEYWORD },
00037                 { regex("^'[^']*'"),       TOKEN_STRING_LITERAL },
00038                 { regex("^[a-zA-Z_]+"),    TOKEN_IDENTIFIER },
00039                 { regex("^\\d+"),          TOKEN_INTEGER },
00040                 { regex("^\\("),           TOKEN_LPARENS },
00041                 { regex("^\\)"),           TOKEN_RPARENS },
00042                 { regex("^\\["),           TOKEN_LSQUARE },
00043                 { regex("^\\]"),           TOKEN_RSQUARE },
00044                 { regex("^\\{"),           TOKEN_LBRACKET },
00045                 { regex("^\\}"),           TOKEN_RBRACKET },
00046                 { regex("^#.*?#"),     TOKEN_COMMENT },
00047                 { regex("^,"),             TOKEN_COMMA },
00048                 { regex("^;"),             TOKEN_SEMICOLON },
00049                 { regex("^\\s+"),          TOKEN_WHITESPACE },
00050                 { regex("^->"),          TOKEN_POINTER }
00051 };
00052 
00053 }
00054 
00055 token get_token(iterator& i1, iterator i2) {
00056     foreach(const token_type& t, token_types) {
00057         boost::smatch match;
00058         if(boost::regex_search(i1, i2, match, t.re, boost::match_single_line)) {
00059             token res;
00060             res.type = t.type;
00061             res.begin = i1;
00062             i1 = res.end = i1 + match.length();
00063             
00064             return res;
00065         }
00066     }
00067 
00068     std::cerr << "Unrecognized token: '" << std::string(i1,i2) << "'\n";
00069     throw token_error();
00070 }
00071 
00072 }
00073 
00074 #ifdef UNIT_TEST_TOKENIZER
00075 
00076 int main()
00077 {
00078     using namespace formula_tokenizer;
00079     std::string test = "(abc + 4 * (5+3))^2";
00080     std::string::const_iterator i1 = test.begin();
00081     std::string::const_iterator i2 = test.end();
00082     TOKEN_TYPE types[] = {TOKEN_LPARENS, TOKEN_IDENTIFIER,
00083                           TOKEN_WHITESPACE, TOKEN_OPERATOR,
00084                           TOKEN_WHITESPACE, TOKEN_INTEGER,
00085                           TOKEN_WHITESPACE, TOKEN_OPERATOR,
00086                           TOKEN_WHITESPACE, TOKEN_LPARENS,
00087                           TOKEN_INTEGER, TOKEN_OPERATOR,
00088                           TOKEN_INTEGER, TOKEN_RPARENS,
00089                           TOKEN_RPARENS, TOKEN_KEYWORD,
00090                           TOKEN_OPERATOR, TOKEN_INTEGER};
00091     std::string tokens[] = {"(", "abc", " ", "+", " ", "4", " ",
00092                             "*", " ", "(", "5", "+", "3", ")", ")", "functions"};
00093     for(int n = 0; n != sizeof(types)/sizeof(*types); ++n) {
00094         token t = get_token(i1,i2);
00095         assert(std::string(t.begin,t.end) == tokens[n]);
00096         assert(t.type == types[n]);
00097 
00098     }
00099 }
00100 
00101 #endif

Generated by doxygen 1.5.5 on 23 May 2008 for The Battle for Wesnoth
Gna! | Forum | Wiki | CIA | devdocs