2019-07-26 00:39:44 +02:00
|
|
|
/* ScummVM - Graphic Adventure Engine
|
|
|
|
*
|
|
|
|
* ScummVM is the legal property of its developers, whose names
|
|
|
|
* are too numerous to list here. Please refer to the COPYRIGHT
|
|
|
|
* file distributed with this source distribution.
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
* as published by the Free Software Foundation; either version 2
|
|
|
|
* of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef COMMON_ENCODING_H
|
|
|
|
#define COMMON_ENCODING_H
|
2019-08-19 18:22:50 +02:00
|
|
|
|
2019-08-25 01:04:54 +02:00
|
|
|
#include "common/scummsys.h"
|
|
|
|
#include "common/str.h"
|
|
|
|
#include "common/system.h"
|
2019-07-26 00:39:44 +02:00
|
|
|
|
|
|
|
|
|
|
|
namespace Common {
|
|
|
|
|
2020-07-08 23:30:36 +02:00
|
|
|
/**
|
|
|
|
* @defgroup common_encoding Text encoding
|
|
|
|
* @ingroup common
|
|
|
|
*
|
|
|
|
* @brief Functions for managing text encoding.
|
|
|
|
*
|
|
|
|
* @{
|
|
|
|
*/
|
|
|
|
|
2019-07-31 01:28:59 +02:00
|
|
|
/**
|
|
|
|
* A class, that allows conversion between different text encoding,
|
|
|
|
* the encodings available depend on the current backend and if the
|
|
|
|
* ScummVM is compiled with or without iconv.
|
|
|
|
*/
|
2019-07-26 00:39:44 +02:00
|
|
|
class Encoding {
|
|
|
|
public:
|
2019-07-31 01:28:59 +02:00
|
|
|
/**
|
|
|
|
* Constructs everything needed for the conversion between 2 encodings
|
|
|
|
* and saves the values for future use.
|
|
|
|
*
|
|
|
|
* @param to Name of the encoding the strings will be converted to
|
|
|
|
* @param from Name of the encoding the strings will be converted from
|
|
|
|
*/
|
2019-07-26 00:39:44 +02:00
|
|
|
Encoding(const String &to, const String &from);
|
2019-08-25 10:47:59 +02:00
|
|
|
~Encoding() {};
|
2019-07-26 00:39:44 +02:00
|
|
|
|
2019-07-31 01:28:59 +02:00
|
|
|
/**
|
|
|
|
* Converts string between encodings. The resulting string is ended by
|
|
|
|
* a character with value 0 (C-like ending for 1 byte per character
|
|
|
|
* encodings, 2 zero bytes for UTF-16, 4 zero bytes for UTF-32)
|
|
|
|
*
|
|
|
|
* The result has to be freed after use.
|
|
|
|
*
|
|
|
|
* @param string String that should be converted.
|
|
|
|
* @param length Length of the string to convert in bytes.
|
|
|
|
*
|
|
|
|
* @return Converted string (must be freed) or nullptr if the conversion failed
|
|
|
|
*/
|
2019-07-26 00:39:44 +02:00
|
|
|
char *convert(const char *string, size_t length);
|
2019-07-31 01:28:59 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Static version of the method above.
|
|
|
|
* Converts string between encodings. The resulting string is ended by
|
|
|
|
* a character with value 0 (C-like ending for 1 byte per character
|
|
|
|
* encodings, 2 zero bytes for UTF-16, 4 zero bytes for UTF-32)
|
|
|
|
*
|
|
|
|
* The result has to be freed after use.
|
|
|
|
*
|
|
|
|
* @param to Name of the encoding the strings will be converted to
|
|
|
|
* @param from Name of the encoding the strings will be converted from
|
|
|
|
* @param string String that should be converted.
|
|
|
|
* @param length Length of the string to convert in bytes.
|
|
|
|
*
|
|
|
|
* @return Converted string (must be freed) or nullptr if the conversion failed
|
|
|
|
*/
|
2019-07-26 00:39:44 +02:00
|
|
|
static char *convert(const String &to, const String &from, const char *string, size_t length);
|
|
|
|
|
2020-09-06 16:54:58 +01:00
|
|
|
static char *convert(const String &to, const String &from, const String &s) {
|
|
|
|
return convert(to, from, s.c_str(), s.size());
|
|
|
|
}
|
|
|
|
|
|
|
|
static char *convert(const String &to, const U32String &s) {
|
|
|
|
return convert(to, "UTF-32", (const char *)s.c_str(), s.size() * 4);
|
|
|
|
}
|
|
|
|
|
2019-07-31 01:28:59 +02:00
|
|
|
/**
|
|
|
|
* @return The encoding, which is currently being converted from
|
|
|
|
*/
|
2019-07-26 00:39:44 +02:00
|
|
|
String getFrom() {return _from;};
|
2019-07-31 01:28:59 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @param from The encoding, to convert from
|
|
|
|
*/
|
2019-08-25 10:47:59 +02:00
|
|
|
void setFrom(const String &from) {_from = from;};
|
2019-07-26 00:39:44 +02:00
|
|
|
|
2019-07-31 01:28:59 +02:00
|
|
|
/**
|
|
|
|
* @return The encoding, which is currently being converted to
|
|
|
|
*/
|
2019-07-26 00:39:44 +02:00
|
|
|
String getTo() {return _to;};
|
2019-07-31 01:28:59 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @param to The encoding, to convert to
|
|
|
|
*/
|
2019-08-25 10:47:59 +02:00
|
|
|
void setTo(const String &to) {_to = to;};
|
2019-09-01 23:24:50 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Switches the endianity of a string.
|
|
|
|
*
|
|
|
|
* @param string Array containing the characters of a string.
|
|
|
|
* @param length Length of the string in bytes
|
|
|
|
* @param bitCount Number of bits used for each character.
|
|
|
|
*
|
|
|
|
* @return Array of characters with the opposite endianity
|
|
|
|
*/
|
|
|
|
static char *switchEndian(const char *string, int length, int bitCount);
|
2019-09-05 02:51:10 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Computes length (in bytes) of a string in a given encoding.
|
|
|
|
* The string must be zero ended. Similar to strlen
|
|
|
|
* (could be used instead of strlen).
|
|
|
|
*
|
|
|
|
* @param string String, which size should be computed.
|
|
|
|
* @param encoding Encoding of the string.
|
|
|
|
*
|
|
|
|
* @return Size of the string in bytes.
|
|
|
|
*/
|
|
|
|
static size_t stringLength(const char *string, const String &encoding);
|
2019-07-26 00:39:44 +02:00
|
|
|
|
|
|
|
private:
|
2019-07-31 01:28:59 +02:00
|
|
|
/** The encoding, which is currently being converted to */
|
2019-07-26 00:39:44 +02:00
|
|
|
String _to;
|
2019-07-31 01:28:59 +02:00
|
|
|
|
|
|
|
/** The encoding, which is currently being converted from */
|
2019-07-26 00:39:44 +02:00
|
|
|
String _from;
|
|
|
|
|
2019-07-31 01:28:59 +02:00
|
|
|
/**
|
2019-07-31 22:40:45 +02:00
|
|
|
* Takes care of transliteration and calls conversion
|
2019-07-31 01:28:59 +02:00
|
|
|
*
|
|
|
|
* The result has to be freed after use.
|
|
|
|
*
|
|
|
|
* @param to Name of the encoding the strings will be converted to
|
|
|
|
* @param from Name of the encoding the strings will be converted from
|
|
|
|
* @param string String that should be converted.
|
|
|
|
* @param length Length of the string to convert in bytes.
|
|
|
|
*
|
|
|
|
* @return Converted string (must be freed) or nullptr if the conversion failed
|
|
|
|
*/
|
2019-08-25 10:47:59 +02:00
|
|
|
static char *convertWithTransliteration(const String &to, const String &from, const char *string, size_t length);
|
2019-07-31 00:43:57 +02:00
|
|
|
|
2019-07-31 01:28:59 +02:00
|
|
|
/**
|
|
|
|
* Calls as many conversion functions as possible or until the conversion
|
|
|
|
* succeeds. It first tries to use iconv, then it tries to use platform
|
|
|
|
* specific functions and after that it tries to use TransMan mapping.
|
|
|
|
*
|
|
|
|
* The result has to be freed after use.
|
|
|
|
*
|
|
|
|
* @param to Name of the encoding the strings will be converted to
|
|
|
|
* @param from Name of the encoding the strings will be converted from
|
|
|
|
* @param string String that should be converted.
|
|
|
|
* @param length Length of the string to convert in bytes.
|
|
|
|
*
|
|
|
|
* @return Converted string (must be freed) or nullptr if the conversion failed
|
|
|
|
*/
|
2019-08-25 10:47:59 +02:00
|
|
|
static char *conversion(const String &to, const String &from, const char *string, size_t length);
|
2019-07-26 00:39:44 +02:00
|
|
|
|
2019-07-31 01:28:59 +02:00
|
|
|
/**
|
|
|
|
* Tries to convert the string using iconv.
|
|
|
|
*
|
|
|
|
* The result has to be freed after use.
|
|
|
|
*
|
2019-08-25 10:47:59 +02:00
|
|
|
* @param to Name of the encoding the strings will be converted to
|
|
|
|
* @param from Name of the encoding the strings will be converted from
|
2019-07-31 01:28:59 +02:00
|
|
|
* @param string String that should be converted.
|
|
|
|
* @param length Length of the string to convert in bytes.
|
|
|
|
*
|
|
|
|
* @return Converted string (must be freed) or nullptr if the conversion failed
|
|
|
|
*/
|
2019-08-25 10:47:59 +02:00
|
|
|
static char *convertIconv(const char *to, const char *from, const char *string, size_t length);
|
2019-07-26 00:39:44 +02:00
|
|
|
|
2019-08-26 16:48:50 +02:00
|
|
|
/**
|
|
|
|
* Uses conversion table to convert the string to unicode and from that
|
|
|
|
* to the final encoding. Important encodings, that aren't supported by
|
|
|
|
* all backends should go here.
|
|
|
|
*
|
|
|
|
* The result has to be freed after use.
|
|
|
|
*
|
|
|
|
* @param to Name of the encoding the strings will be converted to
|
|
|
|
* @param from Name of the encoding the strings will be converted from
|
|
|
|
* @param string String that should be converted.
|
|
|
|
* @param length Length of the string to convert in bytes.
|
|
|
|
*
|
|
|
|
* @return Converted string (must be freed) or nullptr if the conversion failed
|
|
|
|
*/
|
|
|
|
static char *convertConversionTable(const char *to, const char *from, const char *string, size_t length);
|
|
|
|
|
2019-07-31 01:28:59 +02:00
|
|
|
/**
|
2019-08-21 23:21:28 +02:00
|
|
|
* Transliterates cyrillic string in iso-8859-5 encoding and returns
|
2019-07-31 01:28:59 +02:00
|
|
|
* it's ASCII (latin) form.
|
|
|
|
*
|
|
|
|
* The result has to be freed after use.
|
|
|
|
*
|
|
|
|
* @param string String that should be converted
|
|
|
|
*
|
|
|
|
* @return Transliterated string in ASCII (must be freed) or nullptr on fail.
|
|
|
|
*/
|
2019-08-21 23:21:28 +02:00
|
|
|
static char *transliterateCyrillic(const char *string);
|
2019-07-31 01:28:59 +02:00
|
|
|
|
|
|
|
/**
|
2019-08-21 23:21:28 +02:00
|
|
|
* Transliterates cyrillic in UTF-32 string.
|
2019-07-31 01:28:59 +02:00
|
|
|
*
|
|
|
|
* The result has to be freed after use.
|
|
|
|
*
|
|
|
|
* @param string String that should be converted
|
|
|
|
* @param length Length of the string in bytes
|
|
|
|
*
|
|
|
|
* @return Transliterated string in UTF-32 (must be freed) or nullptr on fail.
|
|
|
|
*/
|
2019-07-31 00:43:57 +02:00
|
|
|
static uint32 *transliterateUTF32(const uint32 *string, size_t length);
|
2019-07-26 00:39:44 +02:00
|
|
|
};
|
|
|
|
|
2020-07-08 23:30:36 +02:00
|
|
|
/** @} */
|
|
|
|
|
2019-07-26 00:39:44 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
#endif // COMMON_ENCODING_H
|