2013-11-23 20:34:54 +00:00
|
|
|
/* ScummVM - Graphic Adventure Engine
|
|
|
|
*
|
|
|
|
* ScummVM is the legal property of its developers, whose names
|
|
|
|
* are too numerous to list here. Please refer to the COPYRIGHT
|
|
|
|
* file distributed with this source distribution.
|
|
|
|
*
|
2021-12-26 17:47:58 +00:00
|
|
|
* This program is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
|
|
* (at your option) any later version.
|
2013-11-23 20:34:54 +00:00
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
2021-12-26 17:47:58 +00:00
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
2014-02-18 01:34:18 +00:00
|
|
|
*
|
2013-11-23 20:34:54 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef COMMON_USTR_H
|
|
|
|
#define COMMON_USTR_H
|
|
|
|
|
|
|
|
#include "common/scummsys.h"
|
2022-10-30 11:24:27 +00:00
|
|
|
#include "common/util.h"
|
2019-10-19 15:38:26 +00:00
|
|
|
#include "common/str-enc.h"
|
2023-01-05 13:21:23 +00:00
|
|
|
#include "common/str-base.h"
|
2013-11-23 20:34:54 +00:00
|
|
|
|
|
|
|
namespace Common {
|
|
|
|
|
2020-07-08 21:30:36 +00:00
|
|
|
/**
|
|
|
|
* @defgroup common_ustr UTF-32 strings
|
2020-11-11 18:48:06 +00:00
|
|
|
* @ingroup common
|
2020-07-08 21:30:36 +00:00
|
|
|
*
|
|
|
|
* @brief API for working with UTF-32 strings.
|
|
|
|
*
|
|
|
|
* @{
|
|
|
|
*/
|
2021-05-04 08:45:03 +00:00
|
|
|
|
2018-06-17 22:04:03 +00:00
|
|
|
class String;
|
|
|
|
|
2013-11-23 20:34:54 +00:00
|
|
|
/**
|
2020-11-11 18:48:06 +00:00
|
|
|
* A simple string class for UTF-32 strings in ScummVM. The main intention
|
2013-11-23 20:34:54 +00:00
|
|
|
* behind this class is to feature a simple way of displaying UTF-32 strings
|
|
|
|
* through the Graphics::Font API.
|
|
|
|
*
|
2020-11-11 18:48:06 +00:00
|
|
|
* Note that operations like equals, deleteCharacter, toUppercase, etc.
|
|
|
|
* are only simplified convenience operations. They might not fully work
|
2013-11-23 20:34:54 +00:00
|
|
|
* as you would expect for a proper UTF-32 string class.
|
|
|
|
*
|
|
|
|
* The presence of \0 characters in the string will cause undefined
|
|
|
|
* behavior in some operations.
|
|
|
|
*/
|
2020-10-27 22:22:25 +00:00
|
|
|
typedef char32_t u32char_type_t;
|
2013-11-23 20:34:54 +00:00
|
|
|
|
2020-10-27 22:22:25 +00:00
|
|
|
class U32String : public BaseString<u32char_type_t> {
|
|
|
|
public:
|
2020-11-11 18:48:06 +00:00
|
|
|
typedef uint32 unsigned_type; /*!< Unsigned version of the underlying type. */
|
2013-11-23 20:34:54 +00:00
|
|
|
public:
|
2013-11-23 20:34:54 +00:00
|
|
|
/** Construct a new empty string. */
|
2023-04-15 07:22:18 +00:00
|
|
|
constexpr U32String() : BaseString<u32char_type_t>() {}
|
2013-11-23 20:34:54 +00:00
|
|
|
|
2020-11-11 18:48:06 +00:00
|
|
|
/** Construct a new string from the given null-terminated C string. */
|
2020-10-31 16:56:00 +00:00
|
|
|
explicit U32String(const value_type *str) : BaseString<u32char_type_t>(str) {}
|
2013-11-23 20:34:54 +00:00
|
|
|
|
2020-11-11 18:48:06 +00:00
|
|
|
/** Construct a new string containing exactly @p len characters read from address @p str. */
|
2020-10-31 16:56:00 +00:00
|
|
|
U32String(const value_type *str, uint32 len) : BaseString<u32char_type_t>(str, len) {}
|
2020-10-27 22:22:25 +00:00
|
|
|
|
2020-10-31 16:56:00 +00:00
|
|
|
explicit U32String(const uint32 *str) : BaseString<u32char_type_t>((const value_type *) str) {}
|
|
|
|
U32String(const uint32 *str, uint32 len) : BaseString<u32char_type_t>((const value_type *) str, len) {}
|
|
|
|
U32String(const uint32 *beginP, const uint32 *endP) : BaseString<u32char_type_t>((const value_type *) beginP, (const value_type *) endP) {}
|
2013-11-23 20:34:54 +00:00
|
|
|
|
2020-11-11 18:48:06 +00:00
|
|
|
/** Construct a new string containing the characters between @p beginP (including) and @p endP (excluding). */
|
2020-10-31 16:56:00 +00:00
|
|
|
U32String(const value_type *beginP, const value_type *endP) : BaseString<u32char_type_t>(beginP, endP) {}
|
2013-11-23 20:34:54 +00:00
|
|
|
|
2013-11-23 20:34:54 +00:00
|
|
|
/** Construct a copy of the given string. */
|
2020-10-31 16:56:00 +00:00
|
|
|
U32String(const U32String &str) : BaseString<u32char_type_t>(str) {}
|
2013-11-23 20:34:54 +00:00
|
|
|
|
2022-09-24 21:49:58 +00:00
|
|
|
/** Construct a string by moving an existing string. */
|
|
|
|
U32String(U32String &&str) : BaseString<u32char_type_t>(static_cast<BaseString<u32char_type_t> &&>(str)) {}
|
|
|
|
|
2020-11-11 18:48:06 +00:00
|
|
|
/** Construct a new string from the given null-terminated C string that uses the given @p page encoding. */
|
2020-11-15 15:20:35 +00:00
|
|
|
explicit U32String(const char *str, CodePage page = kUtf8);
|
2019-01-01 08:40:17 +00:00
|
|
|
|
2020-11-11 18:48:06 +00:00
|
|
|
/** Construct a new string containing exactly @p len characters read from address @p str. */
|
2020-11-15 15:20:35 +00:00
|
|
|
U32String(const char *str, uint32 len, CodePage page = kUtf8);
|
2019-01-01 08:40:17 +00:00
|
|
|
|
2020-11-11 18:48:06 +00:00
|
|
|
/** Construct a new string containing the characters between @p beginP (including) and @p endP (excluding). */
|
2020-11-15 15:20:35 +00:00
|
|
|
U32String(const char *beginP, const char *endP, CodePage page = kUtf8);
|
2019-01-01 08:40:17 +00:00
|
|
|
|
|
|
|
/** Construct a copy of the given string. */
|
2020-11-15 15:20:35 +00:00
|
|
|
U32String(const String &str, CodePage page = kUtf8);
|
2019-01-01 08:40:17 +00:00
|
|
|
|
2021-07-15 03:10:25 +00:00
|
|
|
/** Construct a string consisting of the given character. */
|
|
|
|
explicit U32String(value_type c);
|
|
|
|
|
2020-11-11 18:48:06 +00:00
|
|
|
/** Assign a given string to this string. */
|
2013-11-23 20:34:54 +00:00
|
|
|
U32String &operator=(const U32String &str);
|
2020-11-11 18:48:06 +00:00
|
|
|
|
2022-09-24 21:49:58 +00:00
|
|
|
/** Move a given string to this string. */
|
|
|
|
U32String &operator=(U32String &&str);
|
|
|
|
|
2020-11-11 18:48:06 +00:00
|
|
|
/** @overload */
|
2019-01-01 08:40:17 +00:00
|
|
|
U32String &operator=(const String &str);
|
2020-11-11 18:48:06 +00:00
|
|
|
|
|
|
|
/** @overload */
|
2019-01-01 08:40:17 +00:00
|
|
|
U32String &operator=(const value_type *str);
|
2020-11-11 18:48:06 +00:00
|
|
|
|
|
|
|
/** @overload */
|
2019-01-01 08:40:17 +00:00
|
|
|
U32String &operator=(const char *str);
|
2020-11-11 18:48:06 +00:00
|
|
|
|
2023-12-30 15:02:14 +00:00
|
|
|
/** @overload */
|
|
|
|
U32String &operator=(value_type c);
|
|
|
|
|
2020-11-11 18:48:06 +00:00
|
|
|
/** Append the given string to this string. */
|
2013-11-23 20:34:54 +00:00
|
|
|
U32String &operator+=(const U32String &str);
|
2020-11-11 18:48:06 +00:00
|
|
|
|
2023-12-30 15:02:14 +00:00
|
|
|
/** @overload */
|
|
|
|
U32String &operator+=(const value_type *str);
|
|
|
|
|
2020-11-11 18:48:06 +00:00
|
|
|
/** @overload */
|
2013-11-23 20:34:54 +00:00
|
|
|
U32String &operator+=(value_type c);
|
2020-11-11 18:48:06 +00:00
|
|
|
|
2020-10-27 22:22:25 +00:00
|
|
|
using BaseString<value_type>::operator==;
|
|
|
|
using BaseString<value_type>::operator!=;
|
2020-11-11 18:48:06 +00:00
|
|
|
|
|
|
|
/** Check whether this string is identical to string @p x. */
|
2019-01-01 08:40:17 +00:00
|
|
|
bool operator==(const String &x) const;
|
2020-11-11 18:48:06 +00:00
|
|
|
|
|
|
|
/** @overload */
|
2019-01-01 08:40:17 +00:00
|
|
|
bool operator==(const char *x) const;
|
2020-11-11 18:48:06 +00:00
|
|
|
|
|
|
|
/** Check whether this string is different than string @p x. */
|
2019-01-01 08:40:17 +00:00
|
|
|
bool operator!=(const String &x) const;
|
2020-11-11 18:48:06 +00:00
|
|
|
|
|
|
|
/** @overload */
|
2019-01-01 08:40:17 +00:00
|
|
|
bool operator!=(const char *x) const;
|
2013-11-23 20:34:54 +00:00
|
|
|
|
2020-11-11 18:48:06 +00:00
|
|
|
/** Convert the string to the given @p page encoding and return the result as a new String. */
|
2020-11-05 23:10:52 +00:00
|
|
|
String encode(CodePage page = kUtf8) const;
|
2019-10-19 15:38:26 +00:00
|
|
|
|
2022-06-19 01:57:23 +00:00
|
|
|
/** Convert the string to the given @p page encoding and output in string @p outString,
|
|
|
|
replacing invalid characters with @p errorChar. */
|
|
|
|
StringEncodingResult encode(String &outString, CodePage page, char errorChar) const;
|
|
|
|
|
2020-06-13 16:42:25 +00:00
|
|
|
/**
|
2020-06-22 16:35:11 +00:00
|
|
|
* Print formatted data into a U32String object.
|
2020-11-11 18:48:06 +00:00
|
|
|
*
|
|
|
|
* Similar to sprintf, except that it stores the result
|
|
|
|
* in a (variably sized) string instead of a fixed-size buffer.
|
2020-06-13 16:42:25 +00:00
|
|
|
*/
|
2022-09-19 21:25:16 +00:00
|
|
|
template<class... TParam>
|
|
|
|
static U32String format(const U32String &fmt, TParam... param);
|
2020-11-11 18:48:06 +00:00
|
|
|
|
|
|
|
/** @overload **/
|
2020-09-08 20:22:04 +00:00
|
|
|
static U32String format(const char *fmt, ...);
|
2020-06-13 16:42:25 +00:00
|
|
|
|
|
|
|
/**
|
2020-11-11 18:48:06 +00:00
|
|
|
* Print formatted data into a U32String object.
|
|
|
|
* The method takes in the output by reference and works with iterators.
|
2020-06-13 16:42:25 +00:00
|
|
|
*/
|
2020-10-27 22:22:25 +00:00
|
|
|
static int vformat(const value_type *fmt, const value_type *fmtEnd, U32String &output, va_list args);
|
2020-06-13 16:42:25 +00:00
|
|
|
|
2020-11-05 23:10:52 +00:00
|
|
|
/** Return a substring of this string */
|
2020-11-01 21:53:03 +00:00
|
|
|
U32String substr(size_t pos = 0, size_t len = npos) const;
|
|
|
|
|
2020-11-11 18:48:06 +00:00
|
|
|
const uint32 *u32_str() const { /*!< Return the string as a UTF-32 pointer. */
|
2020-10-27 22:22:25 +00:00
|
|
|
return (const uint32 *) _str;
|
|
|
|
}
|
2013-11-23 20:34:54 +00:00
|
|
|
|
2020-11-11 18:48:06 +00:00
|
|
|
/** Decode a big endian UTF-16 string into a U32String. */
|
2020-11-15 15:20:35 +00:00
|
|
|
static Common::U32String decodeUTF16BE(const uint16 *start, uint len);
|
2020-11-11 18:48:06 +00:00
|
|
|
|
|
|
|
/** Decode a little endian UTF-16 string into a U32String. */
|
2020-11-15 15:20:35 +00:00
|
|
|
static Common::U32String decodeUTF16LE(const uint16 *start, uint len);
|
2020-11-11 18:48:06 +00:00
|
|
|
|
|
|
|
/** Decode a native UTF-16 string into a U32String. */
|
2020-11-15 15:20:35 +00:00
|
|
|
static Common::U32String decodeUTF16Native(const uint16 *start, uint len);
|
|
|
|
|
2020-11-11 18:48:06 +00:00
|
|
|
/** Transform a U32String into UTF-16 representation (big endian). The result must be freed. */
|
2020-11-15 15:20:35 +00:00
|
|
|
uint16 *encodeUTF16BE(uint *len = nullptr) const;
|
2020-11-11 18:48:06 +00:00
|
|
|
|
|
|
|
/** Transform a U32String into UTF-16 representation (native endian). The result must be freed. */
|
2020-11-15 15:20:35 +00:00
|
|
|
uint16 *encodeUTF16LE(uint *len = nullptr) const;
|
2020-11-11 18:48:06 +00:00
|
|
|
|
|
|
|
/** Transform a U32String into UTF-16 representation (native encoding). The result must be freed. */
|
2020-11-15 15:20:35 +00:00
|
|
|
uint16 *encodeUTF16Native(uint *len = nullptr) const;
|
|
|
|
|
2020-10-27 22:22:25 +00:00
|
|
|
private:
|
2022-09-19 21:25:16 +00:00
|
|
|
static U32String formatInternal(const U32String *fmt, ...);
|
|
|
|
|
2022-11-15 10:59:51 +00:00
|
|
|
/**
|
|
|
|
* Helper function for vformat. Convert an int to a string.
|
|
|
|
* Minimal implementation, only for base 10.
|
|
|
|
*/
|
2024-03-04 00:16:43 +00:00
|
|
|
static value_type* ustr_helper_itoa(int num, value_type* str, uint base);
|
2022-11-15 10:59:51 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Helper function for vformat. Convert an unsigned int to a string.
|
|
|
|
* Minimal implementation, only for base 10.
|
|
|
|
*/
|
2024-03-04 00:16:43 +00:00
|
|
|
static value_type* ustr_helper_uitoa(uint num, value_type* str, uint base);
|
2022-11-15 10:59:51 +00:00
|
|
|
|
2020-11-15 15:20:35 +00:00
|
|
|
void decodeInternal(const char *str, uint32 len, CodePage page);
|
|
|
|
void decodeOneByte(const char *str, uint32 len, CodePage page);
|
2021-04-15 20:16:21 +00:00
|
|
|
void decodeWindows932(const char *src, uint32 len);
|
2023-02-27 03:16:28 +00:00
|
|
|
void decodeWindows936(const char *src, uint32 len);
|
2020-11-15 15:20:35 +00:00
|
|
|
void decodeWindows949(const char *src, uint32 len);
|
2021-04-15 20:16:21 +00:00
|
|
|
void decodeWindows950(const char *src, uint32 len);
|
2022-06-12 22:13:23 +00:00
|
|
|
void decodeJohab(const char *src, uint32 len);
|
2020-11-15 15:20:35 +00:00
|
|
|
void decodeUTF8(const char *str, uint32 len);
|
2021-05-04 08:45:03 +00:00
|
|
|
|
2020-11-15 15:20:35 +00:00
|
|
|
friend class String;
|
2018-08-05 17:32:25 +00:00
|
|
|
};
|
|
|
|
|
2022-09-19 21:25:16 +00:00
|
|
|
template<class... TParam>
|
|
|
|
inline U32String U32String::format(const U32String &fmt, TParam... param) {
|
2022-10-30 11:24:27 +00:00
|
|
|
return formatInternal(&fmt, Common::forward<TParam>(param)...);
|
2022-09-19 21:25:16 +00:00
|
|
|
}
|
|
|
|
|
2020-11-11 18:48:06 +00:00
|
|
|
/** Concatenate strings @p x and @p y. */
|
2019-10-18 15:18:54 +00:00
|
|
|
U32String operator+(const U32String &x, const U32String &y);
|
2020-11-11 18:48:06 +00:00
|
|
|
|
|
|
|
/** Append the given @p y character to the given @p x string. */
|
2020-10-27 22:22:25 +00:00
|
|
|
U32String operator+(const U32String &x, U32String::value_type y);
|
2020-07-08 21:30:36 +00:00
|
|
|
|
2021-07-12 18:54:47 +00:00
|
|
|
/**
|
|
|
|
* Converts string with all non-printable characters properly escaped
|
|
|
|
* with use of C++ escape sequences.
|
|
|
|
* Unlike the String version, this does not escape characters with
|
|
|
|
* codepoints > 127.
|
|
|
|
*
|
|
|
|
* @param src The source string.
|
|
|
|
* @param keepNewLines Whether keep newlines or convert them to '\n', default: true.
|
|
|
|
* @return The converted string.
|
|
|
|
*/
|
|
|
|
U32String toPrintable(const U32String &src, bool keepNewLines = true);
|
|
|
|
|
2020-07-08 21:30:36 +00:00
|
|
|
/** @} */
|
|
|
|
|
2013-11-23 20:34:54 +00:00
|
|
|
} // End of namespace Common
|
|
|
|
|
|
|
|
#endif
|