parser.cpp

Go to the documentation of this file.
00001 /* $Id: parser.cpp 24730 2008-03-17 05:11:14Z dave $ */
00002 /*
00003    Copyright (C) 2003 by David White <dave@whitevine.net>
00004    Copyright (C) 2005 by Guillaume Melquiond <guillaume.melquiond@gmail.com>
00005    Copyright (C) 2005 - 2008 by Philippe Plantier <ayin@anathas.org>
00006    Part of the Battle for Wesnoth Project http://www.wesnoth.org/
00007 
00008    This program is free software; you can redistribute it and/or modify
00009    it under the terms of the GNU General Public License version 2
00010    or at your option any later version.
00011    This program is distributed in the hope that it will be useful,
00012    but WITHOUT ANY WARRANTY.
00013 
00014    See the COPYING file for more details.
00015 */
00016 
00017 //! @file serialization/parser.cpp 
00018 //! Read/Write & analyse WML- and config-files.
00019 
00020 #include "serialization/parser.hpp"
00021 
00022 #include "config.hpp"
00023 #include "filesystem.hpp"
00024 #include "gettext.hpp"
00025 #include "language.hpp"
00026 #include "loadscreen.hpp"
00027 #include "log.hpp"
00028 #include "util.hpp"
00029 #include "wesconfig.h"
00030 #include "serialization/binary_wml.hpp"
00031 #include "serialization/preprocessor.hpp"
00032 #include "serialization/string_utils.hpp"
00033 #include "serialization/tokenizer.hpp"
00034 
00035 #include <cassert>
00036 #include <sstream>
00037 #include <stack>
00038 
00039 #include <boost/iostreams/filtering_stream.hpp>
00040 #include <boost/iostreams/filter/gzip.hpp>
00041 
00042 #define ERR_CF LOG_STREAM(err, config)
00043 #define WRN_CF LOG_STREAM(warn, config)
00044 #define LOG_CF LOG_STREAM(info, config)
00045 
00046 static const size_t max_recursion_levels = 1000;
00047 
00048 namespace {
00049 
00050 class parser
00051 {
00052     parser();
00053     parser(const parser&);
00054     parser& operator=(const parser&);
00055 public:
00056     parser(config& cfg, std::istream& in);
00057     ~parser();
00058     void operator() (std::string* error_log=NULL);
00059 
00060 private:
00061     void parse_element();
00062     void parse_variable();
00063     void parse_directive();
00064     std::string lineno_string(utils::string_map &map, std::string const &lineno,
00065                       std::string const &error_string);
00066     void error(const std::string& message);
00067 
00068     config& cfg_;
00069     tokenizer *tok_;
00070 
00071     struct element {
00072         element(config *cfg, std::string 
00073             const &name, std::string const &start_line) :
00074                 cfg(cfg), 
00075                 name(name),
00076                 last_element_map(),
00077                 start_line(start_line) 
00078             {}
00079 
00080         config* cfg;
00081         std::string name;
00082 
00083         std::map<std::string, config*> last_element_map;
00084         std::string start_line;
00085     };
00086 
00087     std::stack<element> elements;
00088 };
00089 
00090 parser::parser(config &cfg, std::istream &in) :
00091         cfg_(cfg), 
00092         tok_(new tokenizer(in)),
00093         elements()
00094 {
00095 }
00096 
00097 
00098 parser::~parser()
00099 {
00100     if(tok_) {
00101         delete tok_;
00102     }
00103 }
00104 
00105 void parser::operator()(std::string* error_log)
00106 {
00107     cfg_.clear();
00108     elements.push(element(&cfg_, "", ""));
00109 
00110     do {
00111         try {
00112             tok_->next_token();
00113 
00114             switch(tok_->current_token().type) {
00115             case token::LF:
00116                 continue;
00117             case '[':
00118                 parse_element();
00119                 break;
00120             case token::STRING:
00121                 parse_variable();
00122                 break;
00123             default:
00124                 error(_("Unexpected characters at line start"));
00125                 break;
00126             case token::END:
00127                 break;
00128             }
00129         } catch(config::error& e) {
00130             if(error_log == NULL)
00131                 throw;
00132 
00133             // On error, dump tokens to the next LF
00134             while(tok_->current_token().type != token::LF &&
00135                     tok_->current_token().type != token::END) {
00136                 tok_->next_token();
00137             }
00138 
00139             *error_log += e.message + '\n';
00140         }
00141         increment_parser_progress();
00142     } while (tok_->current_token().type != token::END);
00143 
00144     // The main element should be there. If it is not, this is a parser error.
00145     assert(!elements.empty());
00146 
00147     if(elements.size() != 1) {
00148         utils::string_map i18n_symbols;
00149         i18n_symbols["tag"] = elements.top().name;
00150         error(lineno_string(i18n_symbols, elements.top().start_line,
00151                 N_("Missing closing tag for tag $tag at $pos")));
00152     }
00153 }
00154 
00155 void parser::parse_element()
00156 {
00157     tok_->next_token();
00158     std::string elname;
00159     config* current_element = NULL;
00160     std::map<std::string, config*>::const_iterator last_element_itor;
00161 
00162     switch(tok_->current_token().type) {
00163     case token::STRING: // [element]
00164         elname = tok_->current_token().value;
00165         if (tok_->next_token().type != ']')
00166             error(_("Unterminated [element] tag"));
00167 
00168         // Add the element
00169         current_element = &(elements.top().cfg->add_child(elname));
00170         elements.top().last_element_map[elname] = current_element;
00171         elements.push(element(current_element, elname, tok_->get_line()));
00172         break;
00173 
00174     case '+': // [+element]
00175         if (tok_->next_token().type != token::STRING)
00176             error(_("Invalid tag name"));
00177         elname = tok_->current_token().value;
00178         if (tok_->next_token().type != ']')
00179             error(_("Unterminated [+element] tag"));
00180 
00181         // Find the last child of the current element whose name is
00182         // element
00183         last_element_itor = elements.top().last_element_map.find(elname);
00184         if(last_element_itor == elements.top().last_element_map.end()) {
00185             current_element = &elements.top().cfg->add_child(elname);
00186         } else {
00187             current_element = last_element_itor->second;
00188         }
00189         elements.top().last_element_map[elname] = current_element;
00190         elements.push(element(current_element, elname, tok_->get_line()));
00191         break;
00192 
00193     case '/': // [/element]
00194         if(tok_->next_token().type != token::STRING)
00195             error(_("Invalid closing tag name"));
00196         elname = tok_->current_token().value;
00197         if(tok_->next_token().type != ']')
00198             error(_("Unterminated closing tag"));
00199         if(elements.size() <= 1)
00200             error(_("Unexpected closing tag"));
00201         if(elname != elements.top().name) {
00202             utils::string_map i18n_symbols;
00203             i18n_symbols["tag"] = elements.top().name;
00204             i18n_symbols["tag2"] = elname;
00205             error(lineno_string(i18n_symbols, elements.top().start_line,
00206                     N_("Found invalid closing tag $tag2 for tag $tag (opened at $pos)")));
00207         }
00208 
00209         elements.pop();
00210         break;
00211     default:
00212         error(_("Invalid tag name"));
00213     }
00214 }
00215 
00216 void parser::parse_variable()
00217 {
00218     config& cfg = *elements.top().cfg;
00219     std::vector<std::string> variables;
00220     variables.push_back("");
00221 
00222     while (tok_->current_token().type != '=') {
00223         switch(tok_->current_token().type) {
00224         case token::STRING:
00225             if(!variables.back().empty())
00226                 variables.back() += ' ';
00227             variables.back() += tok_->current_token().value;
00228             break;
00229         case ',':
00230             if(variables.back().empty()) {
00231                 error(_("Empty variable name"));
00232             } else {
00233                 variables.push_back("");
00234             }
00235             break;
00236         default:
00237             error(_("Unexpected characters after variable name (expected , or =)"));
00238             break;
00239         }
00240         tok_->next_token();
00241     }
00242     if(variables.back().empty())
00243         error(_("Empty variable name"));
00244 
00245     {
00246         for(std::vector<std::string>::iterator curvar = variables.begin(); curvar != variables.end(); ++curvar) {
00247             cfg[*curvar] = "";
00248         }
00249     }
00250 
00251     std::vector<std::string>::const_iterator curvar = variables.begin();
00252 
00253     bool ignore_next_newlines = false;
00254     while(1) {
00255         tok_->next_token();
00256         assert(curvar != variables.end());
00257 
00258         switch (tok_->current_token().type) {
00259         case ',':
00260             if ((curvar+1) != variables.end()) {
00261                 curvar++;
00262                 cfg[*curvar] = "";
00263                 continue;
00264             } else {
00265                 cfg[*curvar] += ",";
00266             }
00267             break;
00268         case '_':
00269             tok_->next_token();
00270             switch (tok_->current_token().type) {
00271             case token::UNTERMINATED_QSTRING:
00272                 error(_("Unterminated quoted string"));
00273                 break;
00274             case token::QSTRING:
00275                 cfg[*curvar] += t_string(tok_->current_token().value, tok_->textdomain());
00276                 break;
00277             default:
00278                 cfg[*curvar] += "_";
00279                 cfg[*curvar] += tok_->current_token().value;
00280                 break;
00281             case token::END:
00282             case token::LF:
00283                 return;
00284             }
00285             break;
00286         case '+':
00287             // Ignore this
00288             break;
00289         default:
00290             cfg[*curvar] += tok_->current_token().leading_spaces + tok_->current_token().value;
00291             break;
00292         case token::QSTRING:
00293             cfg[*curvar] += tok_->current_token().value;
00294             break;
00295         case token::UNTERMINATED_QSTRING:
00296             error(_("Unterminated quoted string"));
00297             break;
00298         case token::LF:
00299             if(!ignore_next_newlines)
00300                 return;
00301             break;
00302         case token::END:
00303             return;
00304         }
00305 
00306         if (tok_->current_token().type == '+') {
00307             ignore_next_newlines = true;
00308         } else if (tok_->current_token().type != token::LF) {
00309             ignore_next_newlines = false;
00310         }
00311     }
00312 }
00313 
00314 std::string parser::lineno_string(utils::string_map &i18n_symbols, std::string const &lineno,
00315                       std::string const &error_string)
00316 {
00317     std::vector< std::string > pos = utils::quoted_split(lineno, ' ');
00318     std::vector< std::string >::const_iterator i = pos.begin(), end = pos.end();
00319     std::string included_from = _(" included from ");
00320     std::string res;
00321     while (i != end) {
00322         std::string const &line = *(i++);
00323         std::string const &file = i != end ? *(i++) : "<unknown>";
00324         if (!res.empty())
00325             res += included_from;
00326         res += file + ':' + line;
00327     }
00328     if (res.empty()) res = "???";
00329     i18n_symbols["pos"] = res;
00330     return vgettext(error_string.c_str(), i18n_symbols);
00331 }
00332 
00333 void parser::error(const std::string& error_type)
00334 {
00335     utils::string_map i18n_symbols;
00336     i18n_symbols["error"] = error_type;
00337 
00338     throw config::error(
00339         lineno_string(i18n_symbols, tok_->get_line(),
00340                       N_("$error at $pos")));
00341 }
00342 
00343 } // end anon namespace
00344 
00345 void read(config &cfg, std::istream &in, std::string* error_log)
00346 {
00347     parser(cfg, in)(error_log);
00348 }
00349 
00350 void read(config &cfg, std::string &in, std::string* error_log)
00351 {
00352     std::stringstream ss(in);
00353     parser(cfg, ss)(error_log);
00354 }
00355 
00356 void read_gz(config &cfg, std::istream &file, std::string* error_log)
00357 {
00358     boost::iostreams::filtering_stream<boost::iostreams::input> filter;
00359     filter.push(boost::iostreams::gzip_decompressor());
00360     filter.push(file);
00361 
00362     parser(cfg, filter)(error_log);
00363 }
00364 
00365 static char const *AttributeEquals = "=";
00366 
00367 static char const *TranslatableAttributePrefix = "_ \"";
00368 static char const *AttributePrefix = "\"";
00369 static char const *AttributePostfix = "\"";
00370 
00371 static char const* AttributeContPostfix = " + \n";
00372 static char const* AttributeEndPostfix = "\n";
00373 
00374 static char const* TextdomainPrefix = "#textdomain ";
00375 static char const* TextdomainPostfix = "\n";
00376 
00377 static char const *ElementPrefix = "[";
00378 static char const *ElementPostfix = "]\n";
00379 static char const *EndElementPrefix = "[/";
00380 static char const *EndElementPostfix = "]\n";
00381 
00382 static std::string escaped_string(const std::string& value) {
00383     std::vector<char> res;
00384     for(std::string::const_iterator i = value.begin(); i != value.end(); ++i) {
00385         // double interior quotes
00386         if(*i == '\"') res.push_back(*i);
00387         res.push_back(*i);
00388     }
00389     return std::string(res.begin(), res.end());
00390 }
00391 
00392 void write_key_val(std::ostream &out, const std::string &key, const t_string &value, unsigned int level, std::string& textdomain)
00393 {
00394     bool first = true;
00395     if (value.empty()) {
00396         return;
00397     }
00398 
00399     for(t_string::walker w(value); !w.eos(); w.next()) {
00400         std::string part(w.begin(), w.end());
00401 
00402         if(w.translatable()) {
00403             if(w.textdomain() != textdomain) {
00404                 out << TextdomainPrefix
00405                     << w.textdomain()
00406                     << TextdomainPostfix;
00407                 textdomain = w.textdomain();
00408             }
00409 
00410             if(first) {
00411                 out << std::string(level, '\t')
00412                     << key
00413                     << AttributeEquals;
00414             }
00415 
00416             out << TranslatableAttributePrefix
00417                 << escaped_string(part)
00418                 << AttributePostfix;
00419 
00420         } else {
00421             if(first) {
00422                 out << std::string(level, '\t')
00423                     << key
00424                     << AttributeEquals;
00425             }
00426 
00427             out << AttributePrefix
00428                 << escaped_string(part)
00429                 << AttributePostfix;
00430         }
00431 
00432         if(w.last()) {
00433             out << AttributeEndPostfix;
00434         } else {
00435             out << AttributeContPostfix;
00436             out << std::string(level+1, '\t');
00437         }
00438 
00439         first = false;
00440     }
00441 }
00442 
00443 void write_open_child(std::ostream &out, const std::string &child, unsigned int level)
00444 {
00445     out << std::string(level, '\t')
00446         << ElementPrefix << child << ElementPostfix;
00447 }
00448 
00449 void write_close_child(std::ostream &out, const std::string &child, unsigned int level)
00450 {
00451     out << std::string(level, '\t')
00452         << EndElementPrefix << child << EndElementPostfix;
00453 }
00454 
00455 static void write_internal(config const &cfg, std::ostream &out, std::string& textdomain, size_t tab = 0)
00456 {
00457     if (tab > max_recursion_levels)
00458         throw config::error("Too many recursion levels in config write");
00459 
00460     for(string_map::const_iterator i = cfg.values.begin(), i_end = cfg.values.end(); i != i_end; ++i) {
00461         write_key_val(out, i->first, i->second, tab, textdomain);
00462     }
00463 
00464     for(config::all_children_iterator j = cfg.ordered_begin(), j_end = cfg.ordered_end(); j != j_end; ++j) {
00465         const std::pair<const std::string*,const config*>& item = *j;
00466         const std::string& name = *item.first;
00467         const config& cfg = *item.second;
00468 
00469         write_open_child(out, name, tab);
00470         write_internal(cfg, out, textdomain, tab + 1);
00471         write_close_child(out, name, tab);
00472     }
00473 }
00474 
00475 void write(std::ostream &out, config const &cfg, unsigned int level)
00476 {
00477     std::string textdomain = PACKAGE;
00478     write_internal(cfg, out, textdomain, level);
00479 }
00480 

Generated by doxygen 1.5.5 on 23 May 2008 for The Battle for Wesnoth
Gna! | Forum | Wiki | CIA | devdocs