/* ScummVM - Graphic Adventure Engine * * ScummVM is the legal property of its developers, whose names * are too numerous to list here. Please refer to the COPYRIGHT * file distributed with this source distribution. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * */ #ifndef COMMON_USTR_H #define COMMON_USTR_H #include "common/scummsys.h" #include "common/str-enc.h" #include "common/base-str.h" namespace Common { /** * @defgroup common_ustr UTF-32 strings * @ingroup common * * @brief API for working with UTF-32 strings. * * @{ */ class String; /** * A simple string class for UTF-32 strings in ScummVM. The main intention * behind this class is to feature a simple way of displaying UTF-32 strings * through the Graphics::Font API. * * Note that operations like equals, deleteCharacter, toUppercase, etc. * are only simplified convenience operations. They might not fully work * as you would expect for a proper UTF-32 string class. * * The presence of \0 characters in the string will cause undefined * behavior in some operations. */ #ifdef USE_CXX11 typedef char32_t u32char_type_t; #else typedef uint32 u32char_type_t; #endif class U32String : public BaseString { public: typedef uint32 unsigned_type; /*!< Unsigned version of the underlying type. */ public: /** Construct a new empty string. */ U32String() : BaseString() {} /** Construct a new string from the given null-terminated C string. */ explicit U32String(const value_type *str) : BaseString(str) {} /** Construct a new string containing exactly @p len characters read from address @p str. */ U32String(const value_type *str, uint32 len) : BaseString(str, len) {} #ifdef USE_CXX11 explicit U32String(const uint32 *str) : BaseString((const value_type *) str) {} U32String(const uint32 *str, uint32 len) : BaseString((const value_type *) str, len) {} U32String(const uint32 *beginP, const uint32 *endP) : BaseString((const value_type *) beginP, (const value_type *) endP) {} #endif /** Construct a new string containing the characters between @p beginP (including) and @p endP (excluding). */ U32String(const value_type *beginP, const value_type *endP) : BaseString(beginP, endP) {} /** Construct a copy of the given string. */ U32String(const U32String &str) : BaseString(str) {} /** Construct a new string from the given null-terminated C string that uses the given @p page encoding. */ explicit U32String(const char *str, CodePage page = kUtf8); /** Construct a new string containing exactly @p len characters read from address @p str. */ U32String(const char *str, uint32 len, CodePage page = kUtf8); /** Construct a new string containing the characters between @p beginP (including) and @p endP (excluding). */ U32String(const char *beginP, const char *endP, CodePage page = kUtf8); /** Construct a copy of the given string. */ U32String(const String &str, CodePage page = kUtf8); /** Construct a string consisting of the given character. */ explicit U32String(value_type c); /** Assign a given string to this string. */ U32String &operator=(const U32String &str); /** @overload */ U32String &operator=(const String &str); /** @overload */ U32String &operator=(const value_type *str); /** @overload */ U32String &operator=(const char *str); /** Append the given string to this string. */ U32String &operator+=(const U32String &str); /** @overload */ U32String &operator+=(value_type c); using BaseString::operator==; using BaseString::operator!=; /** Check whether this string is identical to string @p x. */ bool operator==(const String &x) const; /** @overload */ bool operator==(const char *x) const; /** Check whether this string is different than string @p x. */ bool operator!=(const String &x) const; /** @overload */ bool operator!=(const char *x) const; /** Convert the string to the given @p page encoding and return the result as a new String. */ String encode(CodePage page = kUtf8) const; /** * Print formatted data into a U32String object. * * Similar to sprintf, except that it stores the result * in a (variably sized) string instead of a fixed-size buffer. */ static U32String format(U32String fmt, ...); /** @overload **/ static U32String format(const char *fmt, ...); /** * Print formatted data into a U32String object. * The method takes in the output by reference and works with iterators. */ static int vformat(const value_type *fmt, const value_type *fmtEnd, U32String &output, va_list args); /** * Helper function for vformat. Convert an int to string. * Minimal implementation, only for base 10. */ static char* itoa(int num, char* str, int base); using BaseString::insertString; void insertString(const char *s, uint32 p, CodePage page = kUtf8); /*!< Insert string @p s into this string at position @p p. */ void insertString(const String &s, uint32 p, CodePage page = kUtf8); /*!< @overload */ /** Return a substring of this string */ U32String substr(size_t pos = 0, size_t len = npos) const; const uint32 *u32_str() const { /*!< Return the string as a UTF-32 pointer. */ return (const uint32 *) _str; } /** Decode a big endian UTF-16 string into a U32String. */ static Common::U32String decodeUTF16BE(const uint16 *start, uint len); /** Decode a little endian UTF-16 string into a U32String. */ static Common::U32String decodeUTF16LE(const uint16 *start, uint len); /** Decode a native UTF-16 string into a U32String. */ static Common::U32String decodeUTF16Native(const uint16 *start, uint len); /** Transform a U32String into UTF-16 representation (big endian). The result must be freed. */ uint16 *encodeUTF16BE(uint *len = nullptr) const; /** Transform a U32String into UTF-16 representation (native endian). The result must be freed. */ uint16 *encodeUTF16LE(uint *len = nullptr) const; /** Transform a U32String into UTF-16 representation (native encoding). The result must be freed. */ uint16 *encodeUTF16Native(uint *len = nullptr) const; private: void decodeInternal(const char *str, uint32 len, CodePage page); void decodeOneByte(const char *str, uint32 len, CodePage page); void decodeWindows932(const char *src, uint32 len); void decodeWindows949(const char *src, uint32 len); void decodeWindows950(const char *src, uint32 len); void decodeUTF8(const char *str, uint32 len); friend class String; }; /** Concatenate strings @p x and @p y. */ U32String operator+(const U32String &x, const U32String &y); /** Append the given @p y character to the given @p x string. */ U32String operator+(const U32String &x, U32String::value_type y); /** * Converts string with all non-printable characters properly escaped * with use of C++ escape sequences. * Unlike the String version, this does not escape characters with * codepoints > 127. * * @param src The source string. * @param keepNewLines Whether keep newlines or convert them to '\n', default: true. * @return The converted string. */ U32String toPrintable(const U32String &src, bool keepNewLines = true); /** @} */ } // End of namespace Common #endif