binary_wml.cpp

Go to the documentation of this file.
00001 /* $Id: binary_wml.cpp 23842 2008-02-16 08:47:16Z mordante $ */
00002 /*
00003    Copyright (C) 2003 by David White <dave@whitevine.net>
00004    Copyright (C) 2005 - 2008 by Guillaume Melquiond <guillaume.melquiond@gmail.com>
00005    Part of the Battle for Wesnoth Project http://www.wesnoth.org/
00006 
00007    This program is free software; you can redistribute it and/or modify
00008    it under the terms of the GNU General Public License version 2
00009    or at your option any later version.
00010    This program is distributed in the hope that it will be useful,
00011    but WITHOUT ANY WARRANTY.
00012 
00013    See the COPYING file for more details.
00014 */
00015 
00016 //! @file serialization/binary_wml.cpp 
00017 //! Data compression, designed for network traffic.
00018 
00019 #include "global.hpp"
00020 
00021 #include "config.hpp"
00022 #include "loadscreen.hpp"
00023 #include "log.hpp"
00024 #include "serialization/binary_wml.hpp"
00025 
00026 #include <algorithm>
00027 #include <iostream>
00028 #include <sstream>
00029 
00030 #define ERR_CF LOG_STREAM(err, config)
00031 
00032 // Data compression. Compression is designed for network traffic.
00033 // Assumptions the compression is based on:
00034 // - most space is taken up by element names and attribute names
00035 // - there are relatively few element names and attribute names 
00036 //   that are repeated many times
00037 //
00038 // How it works: there are some control characters:
00039 // 'compress_open_element': signals that the next word found is an element.
00040 // Any words found that are not after this are assumed to be attributes:
00041 // 'compress_close_element': signals to close the current element.
00042 // 'compress_schema_item': signals that following is a nul-delimited string, 
00043 //                         which should be added as a word in the schema.
00044 // 'compress_literal_word': signals that following is a word stored as a nul-delimited string
00045 //    (an attribute name, unless it was preceeded by 'compress_open_element').
00046 //
00047 // All other characters are mapped to words. When an item is inserted into the schema,
00048 // it is mapped to the first available character. Any attribute found is always followed
00049 // by a nul-delimited string which is the value for the attribute.
00050 //
00051 // The schema objects are designed to be persisted. That is, in a network game, 
00052 // both peers can store their schema objects, and so rather than sending 
00053 // schema data each time, the peers use and build their schemas as the 
00054 // game progresses, adding a new word to the schema anytime it is required.
00055 
00056 static const unsigned int
00057     compress_open_element = 0, compress_close_element = 1,
00058     compress_schema_item  = 2, compress_literal_word = 3,
00059     compress_first_word   = 4, compress_end_words = 256;
00060 static const size_t compress_max_words = compress_end_words - compress_first_word;
00061 static const size_t max_schema_item_length = 20;
00062 static const int max_recursion_levels = 1000;
00063 
00064 static void compress_output_literal_word(std::ostream &out, std::string const &word)
00065 {
00066     out.write(word.c_str(), word.length() + 1);
00067 }
00068 
00069 static compression_schema::word_char_map::const_iterator
00070 add_word_to_schema(std::string const &word, compression_schema &schema)
00071 {
00072     if (word.size() > max_schema_item_length)
00073         throw config::error("Schema item is too long");
00074 
00075     unsigned int c = compress_first_word + schema.word_to_char.size();
00076 
00077     schema.char_to_word.insert(std::make_pair(c, word));
00078     return schema.word_to_char.insert(std::make_pair(word, c)).first;
00079 }
00080 
00081 static compression_schema::word_char_map::const_iterator
00082 get_word_in_schema(std::string const &word, compression_schema &schema, std::ostream &out)
00083 {
00084     if (word.size() > max_schema_item_length)
00085         return schema.word_to_char.end();
00086 
00087     // See if this word is already in the schema
00088     const compression_schema::word_char_map::const_iterator w = schema.word_to_char.find(word);
00089     if (w != schema.word_to_char.end()) {
00090         // It is in the schema. Return it.
00091         return w;
00092     } else if (schema.word_to_char.size() < compress_max_words) {
00093         // We can add the word to the schema
00094 
00095         // We insert the code to add a schema item, followed by the zero-delimited word.
00096         out.put(compress_schema_item);
00097         compress_output_literal_word(out, word);
00098 
00099         return add_word_to_schema(word, schema);
00100     } else {
00101         // It is not there, and there is no room to add it
00102         return schema.word_to_char.end();
00103     }
00104 }
00105 
00106 static void compress_emit_word(std::ostream &out, std::string const &word, compression_schema &schema)
00107 {
00108     // Get the word in the schema
00109     const compression_schema::word_char_map::const_iterator w = get_word_in_schema(word, schema, out);
00110     if (w != schema.word_to_char.end()) {
00111         // The word is in the schema, all we have to do is output the compression code for it.
00112         out.put(w->second);
00113     } else {
00114         // The word is not in the schema. Output it as a literal word.
00115         out.put(compress_literal_word);
00116         compress_output_literal_word(out, word);
00117     }
00118 }
00119 
00120 static std::string compress_read_literal_word(std::istream &in)
00121 {
00122     std::string buffer;
00123     std::getline(in, buffer, '\0');
00124     if (!in.good())
00125         throw config::error("Unexpected end of data in compressed config read");
00126     return buffer;
00127 }
00128 
00129 static void write_compressed_internal(std::ostream &out, config const &cfg, compression_schema &schema, int level)
00130 {
00131     if (level > max_recursion_levels)
00132         throw config::error("Too many recursion levels in compressed config write");
00133 
00134     for (string_map::const_iterator i = cfg.values.begin(), i_end = cfg.values.end(); i != i_end; ++i) {
00135         if (i->second.empty() == false) {
00136             // Output the name, using compression
00137             compress_emit_word(out, i->first, schema);
00138 
00139             // Output the value, with no compression
00140             compress_output_literal_word(out, i->second.to_serialized());
00141         }
00142     }
00143 
00144     for (config::all_children_iterator j = cfg.ordered_begin(), j_end = cfg.ordered_end(); j != j_end; ++j) {
00145         std::pair< std::string const *, config const * > const &item = *j;
00146         std::string const &name = *item.first;
00147         config const &cfg2 = *item.second;
00148 
00149         out.put(compress_open_element);
00150         compress_emit_word(out, name, schema);
00151         write_compressed_internal(out, cfg2, schema, level + 1);
00152         out.put(compress_close_element);
00153     }
00154 }
00155 
00156 void write_compressed(std::ostream &out, config const &cfg, compression_schema &schema)
00157 {
00158     write_compressed_internal(out, cfg, schema, 0);
00159 }
00160 
00161 static void read_compressed_internal(config &cfg, std::istream &in, compression_schema &schema, int level)
00162 {
00163     increment_binary_wml_progress();
00164     if (level >= max_recursion_levels)
00165         throw config::error("Too many recursion levels in compressed config read");
00166 
00167     bool in_open_element = false;
00168     for(;;) {
00169         unsigned char const c = in.get();
00170         if (!in.good())
00171             return;
00172         switch (c) {
00173         case compress_open_element:
00174             in_open_element = true;
00175             break;
00176         case compress_close_element:
00177             return;
00178         case compress_schema_item:
00179             add_word_to_schema(compress_read_literal_word(in), schema);
00180             break;
00181 
00182         default: {
00183             std::string word;
00184             if (c == compress_literal_word) {
00185                 word = compress_read_literal_word(in);
00186             } else {
00187                 unsigned int code = c;
00188 
00189                 const compression_schema::char_word_map::const_iterator itor
00190                     = schema.char_to_word.find(code);
00191                 if (itor == schema.char_to_word.end()) {
00192                     ERR_CF << "illegal word code: " << code << "\n";
00193                     throw config::error("Illegal character in compression input");
00194                 }
00195 
00196                 word = itor->second;
00197             }
00198 
00199             if (in_open_element) {
00200                 in_open_element = false;
00201                 config &cfg2 = cfg.add_child(word);
00202                 read_compressed_internal(cfg2, in, schema, level + 1);
00203             } else {
00204                 // We have a name/value pair, the value is always a literal string
00205                 std::string value = compress_read_literal_word(in);
00206                 t_string t_value = t_string::from_serialized(value);
00207                 cfg.values.insert(std::make_pair(word, t_value));
00208             }
00209         }
00210 
00211         } // end switch
00212     }
00213 }
00214 
00215 void read_compressed(config &cfg, std::istream &in, compression_schema &schema)
00216 {
00217     cfg.clear();
00218     read_compressed_internal(cfg, in, schema, 0);
00219 }
00220 
00221 void write_compressed(std::ostream &out, config const &cfg) {
00222     compression_schema schema;
00223     write_compressed(out, cfg, schema);
00224 }
00225 
00226 void read_compressed(config &cfg, std::istream &in) {
00227     compression_schema schema;
00228     read_compressed(cfg, in, schema);
00229 }
00230 

Generated by doxygen 1.5.5 on 23 May 2008 for The Battle for Wesnoth
Gna! | Forum | Wiki | CIA | devdocs