string_utils.cpp

Go to the documentation of this file.
00001 /* $Id: string_utils.cpp 26501 2008-05-09 22:44:58Z mordante $ */
00002 /*
00003    Copyright (C) 2003 by David White <dave@whitevine.net>
00004    Copyright (C) 2005 by Guillaume Melquiond <guillaume.melquiond@gmail.com>
00005    Copyright (C) 2005 - 2008 by Philippe Plantier <ayin@anathas.org>
00006    Part of the Battle for Wesnoth Project http://www.wesnoth.org/
00007 
00008    This program is free software; you can redistribute it and/or modify
00009    it under the terms of the GNU General Public License version 2
00010    or at your option any later version.
00011    This program is distributed in the hope that it will be useful,
00012    but WITHOUT ANY WARRANTY.
00013 
00014    See the COPYING file for more details.
00015 */
00016 
00017 //! @file serialization/string_utils.cpp
00018 //! Various string-routines.
00019 
00020 #include "global.hpp"
00021 
00022 #include <cctype>
00023 #include <sstream>
00024 #include "gettext.hpp"
00025 #include "util.hpp"
00026 #include "log.hpp"
00027 #include "serialization/string_utils.hpp"
00028 #include "wctype.h"
00029 
00030 #define ERR_GENERAL LOG_STREAM(err, general)
00031 
00032 variable_set::~variable_set()
00033 {
00034 }
00035 
00036 static bool two_dots(char a, char b) { return a == '.' && b == '.'; }
00037 
00038 static std::string do_interpolation(const std::string &str, const variable_set& set)
00039 {
00040     std::string res = str;
00041     // This needs to be able to store negative numbers to check for the while's condition
00042     // (which is only false when the previous '$' was at index 0)
00043     int rfind_dollars_sign_from = res.size();
00044     while(rfind_dollars_sign_from >= 0) {
00045         // Going in a backwards order allows nested variable-retrieval, e.g. in arrays.
00046         // For example, "I am $creatures[$i].user_description!"
00047         const std::string::size_type var_begin_loc = res.rfind('$', rfind_dollars_sign_from);
00048 
00049         // If there are no '$' left then we're done.
00050         if(var_begin_loc == std::string::npos) {
00051             break;
00052         }
00053 
00054         // For the next iteration of the loop, search for more '$'
00055         // (not from the same place because sometimes the '$' is not replaced)
00056         rfind_dollars_sign_from = int(var_begin_loc) - 1;
00057 
00058 
00059         const std::string::iterator var_begin = res.begin() + var_begin_loc;
00060 
00061         // The '$' is not part of the variable name.
00062         const std::string::iterator var_name_begin = var_begin + 1;
00063 
00064         // Find the maximum extent of the variable name (it may be shortened later).
00065         std::string::iterator var_end = var_name_begin;
00066         for(int bracket_nesting_level = 0; var_end != res.end(); ++var_end) {
00067             const char c = *var_end;
00068             if(c == '[') {
00069                 ++bracket_nesting_level;
00070             }
00071             else if(c == ']') {
00072                 if(--bracket_nesting_level < 0) {
00073                     break;
00074                 }
00075             }
00076             else if(!isdigit(c) && !isalpha(c) && c != '.' && c != '_') {
00077                 break;
00078             }
00079         }
00080 
00081         // Two dots in a row cannot be part of a valid variable name.
00082         // That matters for random=, e.g. $x..$y
00083         var_end = std::adjacent_find(var_name_begin, var_end, two_dots);
00084 
00085         // If the last character is '.', then it can't be a sub-variable.
00086         // It's probably meant to be a period instead. Don't include it.
00087         // Would need to do it repetitively if there are multiple '.'s at the end,
00088         // but don't actually need to do so because the previous check for adjacent '.'s would catch that.
00089         // For example, "My score is $score." or "My score is $score..."
00090         if(*(var_end-1) == '.'
00091         // However, "$array[$i]" by itself does not name a variable,
00092         // so if "$array[$i]." is encountered, then best to include the '.',
00093         // so that it more closely follows the syntax of a variable (if only to get rid of all of it).
00094         // (If it's the script writer's error, they'll have to fix it in either case.)
00095         // For example in "$array[$i].$field_name", if field_name does not exist as a variable,
00096         // then the result of the expansion should be "", not "." (which it would be if this exception did not exist).
00097         && *(var_end-2) != ']') {
00098             --var_end;
00099         }
00100 
00101         const std::string var_name(var_name_begin, var_end);
00102 
00103         if(*var_end == '|') {
00104             // It's been used to end this variable name; now it has no more effect.
00105             // This can allow use of things like "$$composite_var_name|.x"
00106             // (Yes, that's a WML 'pointer' of sorts. They are sometimes useful.)
00107             // If there should still be a '|' there afterwards to affect other variable names (unlikely),
00108             // just put another '|' there, one matching each '$', e.g. "$$var_containing_var_name||blah"
00109             var_end++;
00110         }
00111 
00112 
00113         if (var_name == "") {
00114             // Allow for a way to have $s in a string.
00115             // $| will be replaced by $.
00116             res.replace(var_begin, var_end, "$");
00117         }
00118         else {
00119             // The variable is replaced with its value.
00120             res.replace(var_begin, var_end,
00121                 set.get_variable_const(var_name));
00122         }
00123     }
00124 
00125     return res;
00126 }
00127 
00128 namespace utils {
00129 
00130 bool isnewline(const char c)
00131 {
00132     return c == '\r' || c == '\n';
00133 }
00134 
00135 // Make sure that we can use Mac, DOS, or Unix style text files on any system
00136 // and they will work, by making sure the definition of whitespace is consistent
00137 bool portable_isspace(const char c)
00138 {
00139     // returns true only on ASCII spaces
00140     if (static_cast<unsigned char>(c) >= 128)
00141         return false;
00142     return isnewline(c) || isspace(c);
00143 }
00144 
00145 // Make sure we regard '\r' and '\n' as a space, since Mac, Unix, and DOS
00146 // all consider these differently.
00147 bool notspace(const char c)
00148 {
00149     return !portable_isspace(c);
00150 }
00151 
00152 //! Remove whitespace from the front and back of the string 'str'.
00153 std::string &strip(std::string &str)
00154 {
00155     // If all the string contains is whitespace,
00156     // then the whitespace may have meaning, so don't strip it
00157     std::string::iterator it = std::find_if(str.begin(), str.end(), notspace);
00158     if (it == str.end())
00159         return str;
00160 
00161     str.erase(str.begin(), it);
00162     str.erase(std::find_if(str.rbegin(), str.rend(), notspace).base(), str.end());
00163 
00164     return str;
00165 }
00166 
00167 //! Removes character 'c' from the first and last position of the string 'str'.
00168 std::string& strip_char(std::string &str, const char c) {
00169     if (*str.begin() == c)
00170         str.erase(str.begin(), str.begin() + 1);
00171     if (*(str.end() - 1) == c)
00172         str.erase(str.end() - 1, str.end());
00173     return str;
00174 }
00175 
00176 std::vector< std::string > split(std::string const &val, char c, int flags)
00177 {
00178     std::vector< std::string > res;
00179 
00180     std::string::const_iterator i1 = val.begin();
00181     std::string::const_iterator i2 = val.begin();
00182 
00183     while (i2 != val.end()) {
00184         if (*i2 == c) {
00185             std::string new_val(i1, i2);
00186             if (flags & STRIP_SPACES)
00187                 strip(new_val);
00188             if (!(flags & REMOVE_EMPTY) || !new_val.empty())
00189                 res.push_back(new_val);
00190             ++i2;
00191             if (flags & STRIP_SPACES) {
00192                 while (i2 != val.end() && *i2 == ' ')
00193                     ++i2;
00194             }
00195 
00196             i1 = i2;
00197         } else {
00198             ++i2;
00199         }
00200     }
00201 
00202     std::string new_val(i1, i2);
00203     if (flags & STRIP_SPACES)
00204         strip(new_val);
00205     if (!(flags & REMOVE_EMPTY) || !new_val.empty())
00206         res.push_back(new_val);
00207 
00208     return res;
00209 }
00210 
00211 //! Splits a string based either on a separator where text within paranthesis
00212 //! is protected from splitting (Note that one can use the same character for
00213 //! both the left and right paranthesis. In this mode it usually makes only
00214 //! sense to have one character for the left and right paranthesis.)
00215 //! or if the separator == 0 it splits a string into an odd number of parts:
00216 //! - The part before the first '(',
00217 //! - the part between the first '('
00218 //! - and the matching right ')', etc ...
00219 //! and the remainder of the string.
00220 //! Note that this will find the first matching char in the left string
00221 //! and match against the corresponding char in the right string.
00222 //! In this mode, a correctly processed string should return with
00223 //! an odd number of elements to the vector and
00224 //! an empty elements are never removed as they are placeholders.
00225 //! hence REMOVE EMPTY only works for the separator split.
00226 //!
00227 //! parenthetical_split("a(b)c{d}e(f{g})h",0,"({",")}") should return
00228 //! a vector of <"a","b","c","d","e","f{g}","h">
00229 std::vector< std::string > paranthetical_split(std::string const &val, const char separator, std::string const &left, std::string const &right,int flags)
00230 {
00231     std::vector< std::string > res;
00232     std::vector<char> part;
00233     bool in_paranthesis = false;
00234 
00235     std::string::const_iterator i1 = val.begin();
00236     std::string::const_iterator i2 = val.begin();
00237 
00238     std::string lp=left;
00239     std::string rp=right;
00240 
00241     if(left.size()!=right.size()){
00242         ERR_GENERAL << "Left and Right Parenthesis lists not same length\n";
00243         return res;
00244     }
00245 
00246     while (i2 != val.end()) {
00247         if(!in_paranthesis && separator && *i2 == separator){
00248             std::string new_val(i1, i2);
00249             if (flags & STRIP_SPACES)
00250                 strip(new_val);
00251             if (!(flags & REMOVE_EMPTY) || !new_val.empty())
00252                 res.push_back(new_val);
00253             ++i2;
00254             if (flags & STRIP_SPACES) {
00255                 while (i2 != val.end() && *i2 == ' ')
00256                     ++i2;
00257             }
00258             i1=i2;
00259             continue;
00260         }
00261         if(part.size() && *i2 == part.back()){
00262             part.pop_back();
00263             if(!separator && part.size() == 0){
00264                 std::string new_val(i1, i2);
00265                 if (flags & STRIP_SPACES)
00266                     strip(new_val);
00267                 res.push_back(new_val);
00268                 ++i2;
00269                 i1=i2;
00270             }else{
00271                 if (part.size() == 0)
00272                     in_paranthesis = false;
00273                 ++i2;
00274             }
00275             continue;
00276         }
00277         bool found=false;
00278         for(size_t i=0; i < lp.size(); i++){
00279             if (*i2 == lp[i]){
00280                 if (!separator && part.size()==0){
00281                     std::string new_val(i1, i2);
00282                     if (flags & STRIP_SPACES)
00283                         strip(new_val);
00284                     res.push_back(new_val);
00285                     ++i2;
00286                     i1=i2;
00287                 }else{
00288                     ++i2;
00289                 }
00290                 part.push_back(rp[i]);
00291                 found=true;
00292                 break;
00293             }
00294         }
00295         if(!found){
00296             ++i2;
00297         } else
00298             in_paranthesis = true;
00299     }
00300 
00301     std::string new_val(i1, i2);
00302     if (flags & STRIP_SPACES)
00303         strip(new_val);
00304     if (!(flags & REMOVE_EMPTY) || !new_val.empty())
00305         res.push_back(new_val);
00306 
00307     if(part.size()){
00308             ERR_GENERAL << "Mismatched paranthesis:\n"<<val<<"\n";;
00309     }
00310 
00311     return res;
00312 }
00313 
00314 class string_map_variable_set : public variable_set
00315 {
00316 public:
00317     string_map_variable_set(const string_map& map) : map_(map) {};
00318 
00319     virtual const t_string& get_variable_const (const std::string& key) const
00320     {
00321         static const t_string empty_string = "";
00322 
00323         const string_map::const_iterator itor = map_.find(key);
00324         if(itor == map_.end()) {
00325             return empty_string;
00326         } else {
00327             return itor->second;
00328         }
00329     };
00330 private:
00331     const string_map& map_;
00332 
00333 };
00334 
00335 std::string interpolate_variables_into_string(const std::string &str, const string_map * const symbols)
00336 {
00337     string_map_variable_set set(*symbols);
00338     return do_interpolation(str, set);
00339 }
00340 
00341 std::string interpolate_variables_into_string(const std::string &str, const variable_set& variables)
00342 {
00343     return do_interpolation(str, variables);
00344 }
00345 
00346 // Modify a number by string representing integer difference, or optionally %
00347 int apply_modifier( const int number, const std::string &amount, const int minimum ) {
00348     // wassert( amount.empty() == false );
00349     int value = atoi(amount.c_str());
00350     if(amount[amount.size()-1] == '%') {
00351         value = div100rounded(number * value);
00352     }
00353     value += number;
00354     if (( minimum > 0 ) && ( value < minimum ))
00355         value = minimum;
00356     return value;
00357 }
00358 
00359 //! Prepends a configurable set of characters with a backslash
00360 std::string &escape(std::string &str, const std::string& special_chars)
00361 {
00362     std::string::size_type pos = 0;
00363     do {
00364         pos = str.find_first_of(special_chars, pos);
00365         if (pos == std::string::npos)
00366             break;
00367         str.insert(pos, 1, '\\');
00368         pos += 2;
00369     } while (pos < str.size());
00370     return str;
00371 }
00372 
00373 //! Prepend all special characters with a backslash.
00374 // Special characters are:
00375 // #@{}+-,\*=
00376 std::string& escape(std::string& str)
00377 {
00378     static const std::string special_chars("#@{}+-,\\*=");
00379     return escape(str, special_chars);
00380 }
00381 
00382 //! Remove all escape characters (backslash)
00383 std::string &unescape(std::string &str)
00384 {
00385     std::string::size_type pos = 0;
00386     do {
00387         pos = str.find('\\', pos);
00388         if (pos == std::string::npos)
00389             break;
00390         str.erase(pos, 1);
00391         ++pos;
00392     } while (pos < str.size());
00393     return str;
00394 }
00395 
00396 bool string_bool(const std::string& str, bool def) {
00397     if (str.empty()) return def;
00398     if (str == "yes" || str == "on" || str == "true"
00399     || lexical_cast_default<int>(str, 0)) {
00400         return true;
00401     }
00402     if (str == "no" || str == "off" || str == "false"
00403     || !lexical_cast_default<int>(str, 1)) {
00404         return false;
00405     }
00406     return def;
00407 }
00408 
00409 static bool is_username_char(char c) {
00410     return ((c == '_') || (c == '-'));
00411 }
00412 
00413 //! Check if the username is valid.
00414 //! (all alpha-numeric characters plus underscore and hyphen)
00415 bool isvalid_username(const std::string& username) {
00416     const size_t alnum = std::count_if(username.begin(), username.end(), isalnum);
00417     const size_t valid_char =
00418             std::count_if(username.begin(), username.end(), is_username_char);
00419     if ((alnum + valid_char != username.size()) 
00420             || valid_char == username.size() || username.empty() )
00421     {
00422         return false;
00423     }
00424     return true;
00425 }
00426 
00427 //! Try to complete the last word of 'text' with the 'wordlist'.
00428 //! @param[in]  'text'     Text where we try to complete the last word of.
00429 //! @param[out] 'text'     Text with completed last word.
00430 //! @param[in]  'wordlist' A vector of strings to complete against.
00431 //! @param[out] 'wordlist' A vector of strings that matched 'text'.
00432 //! @return 'true' iff text is just one word (no spaces)
00433 bool word_completion(std::string& text, std::vector<std::string>& wordlist) {
00434     std::vector<std::string> matches;
00435     const size_t last_space = text.rfind(" ");
00436     // If last character is a space return.
00437     if (last_space == text.size() -1) {
00438         wordlist = matches;
00439         return false;
00440     }
00441 
00442     bool text_start;
00443     std::string semiword;
00444     if (last_space == std::string::npos) {
00445         text_start = true;
00446         semiword = text;
00447     } else {
00448         text_start = false;
00449         semiword.assign(text, last_space + 1, text.size());
00450     }
00451 
00452     std::string best_match = semiword;
00453     for (std::vector<std::string>::const_iterator word = wordlist.begin();
00454             word != wordlist.end(); ++word)
00455     {
00456         if (word->size() < semiword.size()
00457         || !std::equal(semiword.begin(), semiword.end(), word->begin(),
00458                 chars_equal_insensitive))
00459         {
00460             continue;
00461         }
00462         if (matches.empty()) {
00463             best_match = *word;
00464         } else {
00465             int j = 0;
00466             while (toupper(best_match[j]) == toupper((*word)[j])) j++;
00467             if (best_match.begin() + j < best_match.end()) {
00468                 best_match.erase(best_match.begin() + j, best_match.end());
00469             }
00470         }
00471         matches.push_back(*word);
00472     }
00473     if(!matches.empty()) {
00474         text.replace(last_space + 1, best_match.size(), best_match);
00475     }
00476     wordlist = matches;
00477     return text_start;
00478 }
00479 
00480 static bool is_word_boundary(char c) {
00481     return (c == ' ' || c == ',' || c == ':' || c == '\'' || c == '"' || c == '-');
00482 }
00483 
00484 //! Check if a string contains a word.
00485 bool word_match(const std::string& message, const std::string& word) {
00486     size_t first = message.find(word);
00487     if (first == std::string::npos) return false;
00488     if (first == 0 || is_word_boundary(message[first - 1])) {
00489         size_t next = first + word.size();
00490         if (next == message.size() || is_word_boundary(message[next])) {
00491             return true;
00492         }
00493     }
00494     return false;
00495 }
00496 
00497 //! Match using '*' as any number of characters (including none), 
00498 //! and '?' as any one character.
00499 bool wildcard_string_match(const std::string& str, const std::string& match) {
00500     const bool wild_matching = (!match.empty() && match[0] == '*');
00501     const std::string::size_type solid_begin = match.find_first_not_of('*');
00502     const bool have_solids = (solid_begin != std::string::npos);
00503     // Check the simple case first
00504     if(str.empty() || !have_solids) {
00505         return wild_matching || str == match;
00506     }
00507     const std::string::size_type solid_end = match.find_first_of('*', solid_begin);
00508     const std::string::size_type solid_len = (solid_end == std::string::npos)
00509         ? match.length() - solid_begin : solid_end - solid_begin;
00510     std::string::size_type current = 0;
00511     bool matches;
00512     do {
00513         matches = true;
00514         // Now try to place the str into the solid space
00515         const std::string::size_type test_len = str.length() - current;
00516         for(std::string::size_type i=0; i < solid_len && matches; ++i) {
00517             char solid_c = match[solid_begin + i];
00518             if(i > test_len || !(solid_c == '?' || solid_c == str[current+i])) {
00519                 matches = false;
00520             }
00521         }
00522         if(matches) {
00523             // The solid space matched, now consume it and attempt to find more
00524             const std::string consumed_match = (solid_begin+solid_len < match.length())
00525                 ? match.substr(solid_end) : "";
00526             const std::string consumed_str = (solid_len < test_len)
00527                 ? str.substr(current+solid_len) : "";
00528             matches = wildcard_string_match(consumed_str, consumed_match);
00529         }
00530     } while(wild_matching && !matches && ++current < str.length());
00531     return matches;
00532 }
00533 
00534 std::string join(std::vector< std::string > const &v, char c)
00535 {
00536     std::stringstream str;
00537     for(std::vector< std::string >::const_iterator i = v.begin(); i != v.end(); ++i) {
00538         str << *i;
00539         if (i + 1 != v.end())
00540             str << c;
00541     }
00542 
00543     return str.str();
00544 }
00545 
00546 // This function is identical to split(), except it does not split
00547 // when it otherwise would if the previous character was identical to the parameter 'quote'.
00548 // i.e. it does not split quoted commas.
00549 // This method was added to make it possible to quote user input,
00550 // particularly so commas in user input will not cause visual problems in menus.
00551 //
00552 //! @todo Why not change split()? That would change the methods post condition.
00553 std::vector< std::string > quoted_split(std::string const &val, char c, int flags, char quote)
00554 {
00555     std::vector<std::string> res;
00556 
00557     std::string::const_iterator i1 = val.begin();
00558     std::string::const_iterator i2 = val.begin();
00559 
00560     while (i2 != val.end()) {
00561         if (*i2 == quote) {
00562             // Ignore quoted character
00563             ++i2;
00564             if (i2 != val.end()) ++i2;
00565         } else if (*i2 == c) {
00566             std::string new_val(i1, i2);
00567             if (flags & STRIP_SPACES)
00568                 strip(new_val);
00569             if (!(flags & REMOVE_EMPTY) || !new_val.empty())
00570                 res.push_back(new_val);
00571             ++i2;
00572             if (flags & STRIP_SPACES) {
00573                 while(i2 != val.end() && *i2 == ' ')
00574                     ++i2;
00575             }
00576 
00577             i1 = i2;
00578         } else {
00579             ++i2;
00580         }
00581     }
00582 
00583     std::string new_val(i1, i2);
00584     if (flags & STRIP_SPACES)
00585         strip(new_val);
00586     if (!(flags & REMOVE_EMPTY) || !new_val.empty())
00587         res.push_back(new_val);
00588 
00589     return res;
00590 }
00591 
00592 std::pair< int, int > parse_range(std::string const &str)
00593 {
00594     const std::string::const_iterator dash = std::find(str.begin(), str.end(), '-');
00595     const std::string a(str.begin(), dash);
00596     const std::string b = dash != str.end() ? std::string(dash + 1, str.end()) : a;
00597     std::pair<int,int> res(atoi(a.c_str()), atoi(b.c_str()));
00598     if (res.second < res.first)
00599         res.second = res.first;
00600 
00601     return res;
00602 }
00603 
00604 std::vector< std::pair< int, int > > parse_ranges(std::string const &str)
00605 {
00606     std::vector< std::pair< int, int > > to_return;
00607     std::vector<std::string> strs = utils::split(str);
00608     std::vector<std::string>::const_iterator i, i_end=strs.end();
00609     for(i = strs.begin(); i != i_end; ++i) {
00610         to_return.push_back(parse_range(*i));
00611     }
00612     return to_return;
00613 }
00614 
00615 static int byte_size_from_utf8_first(unsigned char ch)
00616 {
00617     int count;
00618 
00619     if ((ch & 0x80) == 0)
00620         count = 1;
00621     else if ((ch & 0xE0) == 0xC0)
00622         count = 2;
00623     else if ((ch & 0xF0) == 0xE0)
00624         count = 3;
00625     else if ((ch & 0xF8) == 0xF0)
00626         count = 4;
00627     else if ((ch & 0xFC) == 0xF8)
00628         count = 5;
00629     else if ((ch & 0xFE) == 0xFC)
00630         count = 6;
00631     else
00632         throw invalid_utf8_exception(); // Stop on invalid characters
00633 
00634     return count;
00635 }
00636 
00637 utf8_iterator::utf8_iterator(const std::string& str) : 
00638     current_char(0),
00639     string_end(str.end()),
00640     current_substr(std::make_pair(str.begin(), str.begin()))
00641 {
00642     update();
00643 }
00644 
00645 utf8_iterator::utf8_iterator(std::string::const_iterator const &beg, 
00646         std::string::const_iterator const &end) :
00647     current_char(0),
00648     string_end(end),
00649     current_substr(std::make_pair(beg, beg))
00650 {
00651     update();
00652 }
00653 
00654 utf8_iterator utf8_iterator::begin(std::string const &str)
00655 {
00656     return utf8_iterator(str.begin(), str.end());
00657 }
00658 
00659 utf8_iterator utf8_iterator::end(const std::string& str)
00660 {
00661     return utf8_iterator(str.end(), str.end());
00662 }
00663 
00664 bool utf8_iterator::operator==(const utf8_iterator& a) const
00665 {
00666     return current_substr.first == a.current_substr.first;
00667 }
00668 
00669 utf8_iterator& utf8_iterator::operator++()
00670 {
00671     current_substr.first = current_substr.second;
00672     update();
00673     return *this;
00674 }
00675 
00676 wchar_t utf8_iterator::operator*() const
00677 {
00678     return current_char;
00679 }
00680 
00681 bool utf8_iterator::next_is_end()
00682 {
00683     if(current_substr.second == string_end)
00684         return true;
00685     return false;
00686 }
00687 
00688 const std::pair<std::string::const_iterator, std::string::const_iterator>& utf8_iterator::substr() const
00689 {
00690     return current_substr;
00691 }
00692 
00693 void utf8_iterator::update()
00694 {
00695     // Do not try to update the current unicode char at end-of-string.
00696     if(current_substr.first == string_end)
00697         return;
00698 
00699     size_t size = byte_size_from_utf8_first(*current_substr.first);
00700     current_substr.second = current_substr.first + size;
00701 
00702     current_char = static_cast<unsigned char>(*current_substr.first);
00703 
00704     // Convert the first character
00705     if(size != 1) {
00706         current_char &= 0xFF >> (size + 1);
00707     }
00708 
00709     // Convert the continuation bytes
00710     for(std::string::const_iterator c = current_substr.first+1;
00711             c != current_substr.second; ++c) {
00712         // If the string ends occurs within an UTF8-sequence, this is bad.
00713         if (c == string_end)
00714             throw invalid_utf8_exception();
00715 
00716         if ((*c & 0xC0) != 0x80)
00717             throw invalid_utf8_exception();
00718 
00719         current_char = (current_char << 6) | (static_cast<unsigned char>(*c) & 0x3F);
00720     }
00721 }
00722 
00723 
00724 std::string wstring_to_string(const wide_string &src)
00725 {
00726     wchar_t ch;
00727     wide_string::const_iterator i;
00728     Uint32 bitmask;
00729     std::string ret;
00730 
00731     try {
00732 
00733         for(i = src.begin(); i != src.end(); ++i) {
00734             unsigned int count;
00735             ch = *i;
00736 
00737             // Determine the bytes required
00738             count = 1;
00739             if(ch >= 0x80)
00740                 count++;
00741 
00742             bitmask = 0x800;
00743             for(unsigned int j = 0; j < 5; ++j) {
00744                 if(static_cast<Uint32>(ch) >= bitmask) {
00745                     count++;
00746                 }
00747 
00748                 bitmask <<= 5;
00749             }
00750 
00751             if(count > 6) {
00752                 throw invalid_utf8_exception();
00753             }
00754 
00755             if(count == 1) {
00756                 push_back(ret,static_cast<char>(ch));
00757             } else {
00758                 for(int j = static_cast<int>(count) - 1; j >= 0; --j) {
00759                     unsigned char c = (ch >> (6 * j)) & 0x3f;
00760                     c |= 0x80;
00761                     if(j == static_cast<int>(count) - 1) {
00762                         c |= 0xff << (8 - count);
00763                     }
00764                     push_back(ret, c);
00765                 }
00766             }
00767 
00768         }
00769 
00770         return ret;
00771     }
00772     catch(invalid_utf8_exception e) {
00773         ERR_GENERAL << "Invalid wide character string\n";
00774         return ret;
00775     }
00776 }
00777 
00778 std::string wchar_to_string(const wchar_t c)
00779 {
00780     wide_string s;
00781     s.push_back(c);
00782     return wstring_to_string(s);
00783 }
00784 
00785 wide_string string_to_wstring(const std::string &src)
00786 {
00787     wide_string res;
00788 
00789     try {
00790         utf8_iterator i1(src);
00791         const utf8_iterator i2(utf8_iterator::end(src));
00792 
00793         // Equivalent to res.insert(res.end(),i1,i2) which doesn't work on VC++6.
00794         while(i1 != i2) {
00795             push_back(res,*i1);
00796             ++i1;
00797         }
00798     }
00799     catch(invalid_utf8_exception e) {
00800         ERR_GENERAL << "Invalid UTF-8 string: \"" << src << "\"\n";
00801         return res;
00802     }
00803 
00804     return res;
00805 }
00806 
00807 utf8_string capitalize(const utf8_string& s)
00808 {
00809     if(s.size() > 0) {
00810         utf8_iterator itor(s);
00811 #if defined(__APPLE__) || defined(__AMIGAOS4__)
00812         //! @todo FIXME: Should we support towupper on recent OSX platforms?
00813         wchar_t uchar = *itor;
00814         if(uchar >= 0 && uchar < 0x100)
00815             uchar = toupper(uchar);
00816         std::string res = utils::wchar_to_string(uchar);
00817 #else
00818         std::string res = utils::wchar_to_string(towupper(*itor));
00819 #endif
00820         res.append(itor.substr().second, s.end());
00821         return res;
00822     }
00823     return s;
00824 }
00825 
00826 utf8_string uppercase(const utf8_string& s)
00827 {
00828     if(s.size() > 0) {
00829         utf8_iterator itor(s);
00830         std::string res;
00831 
00832         for(;itor != utf8_iterator::end(s); ++itor) {
00833 #if defined(__APPLE__) || defined(__AMIGAOS4__)
00834             //! @todo FIXME: Should we support towupper on recent OSX platforms?
00835             wchar_t uchar = *itor;
00836             if(uchar >= 0 && uchar < 0x100)
00837                 uchar = toupper(uchar);
00838             res += utils::wchar_to_string(uchar);
00839 #else
00840             res += utils::wchar_to_string(towupper(*itor));
00841 #endif
00842         }
00843 
00844         return res;
00845     }
00846     return s;
00847 }
00848 
00849 utf8_string lowercase(const utf8_string& s)
00850 {
00851     if(s.size() > 0) {
00852         utf8_iterator itor(s);
00853         std::string res;
00854 
00855         for(;itor != utf8_iterator::end(s); ++itor) {
00856 #if defined(__APPLE__) || defined(__OpenBSD__) || defined(__AMIGAOS4__)
00857             //! @todo FIXME: Should we support towupper on recent OSX platforms?
00858             wchar_t uchar = *itor;
00859             if(uchar >= 0 && uchar < 0x100)
00860                 uchar = tolower(uchar);
00861             res += utils::wchar_to_string(uchar);
00862 #else
00863             res += utils::wchar_to_string(towlower(*itor));
00864 #endif
00865         }
00866 
00867         res.append(itor.substr().second, s.end());
00868         return res;
00869     }
00870     return s;
00871 }
00872 
00873 //! Truncates a string.
00874 //!
00875 //! If the string send has more than size utf-8 characters it will be truncated
00876 //! to this size. 
00877 //! No assumptions can be made about the actual size of the string.
00878 //! 
00879 //! @param[in]  str     String which can be converted to utf-8.
00880 //! @param[out] str     String which contains maximal size utf-8 characters.
00881 //! @param size         The size to truncate at.
00882 void truncate_as_wstring(std::string& str, const size_t size)
00883 {
00884     wide_string utf8_str = utils::string_to_wstring(str);
00885     if(utf8_str.size() > size) {
00886         utf8_str.resize(size);
00887         str = utils::wstring_to_string(utf8_str);
00888     }
00889 }
00890 
00891 } // end namespace utils
00892 
00893 std::string vgettext(const char *msgid, const utils::string_map& symbols)
00894 {
00895     const std::string orig(_(msgid));
00896     const std::string msg = utils::interpolate_variables_into_string(orig, &symbols);
00897     return msg;
00898 }
00899 
00900 std::string vngettext(const char* sing, const char* plur, int n, const utils::string_map& symbols)
00901 {
00902     const std::string orig(_n(sing, plur, n));
00903     const std::string msg = utils::interpolate_variables_into_string(orig, &symbols);
00904     return msg;
00905 }

Generated by doxygen 1.5.5 on 23 May 2008 for The Battle for Wesnoth
Gna! | Forum | Wiki | CIA | devdocs