scummvm/common/encoding.h

238 lines
7.9 KiB
C
Raw Normal View History

/* ScummVM - Graphic Adventure Engine
*
* ScummVM is the legal property of its developers, whose names
* are too numerous to list here. Please refer to the COPYRIGHT
* file distributed with this source distribution.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
*/
#ifndef COMMON_ENCODING_H
#define COMMON_ENCODING_H
2019-08-19 18:22:50 +02:00
#include "common/scummsys.h"
#include "common/str.h"
#include "common/system.h"
namespace Common {
/**
* @defgroup common_encoding Text encoding
* @ingroup common
*
* @brief Functions for managing text encoding.
*
* @{
*/
/**
* A class, that allows conversion between different text encoding,
* the encodings available depend on the current backend and if the
* ScummVM is compiled with or without iconv.
*/
class Encoding {
public:
/**
* Constructs everything needed for the conversion between 2 encodings
* and saves the values for future use.
*
* @param to Name of the encoding the strings will be converted to
* @param from Name of the encoding the strings will be converted from
*/
Encoding(const String &to, const String &from);
~Encoding() {};
/**
* Converts string between encodings. The resulting string is ended by
* a character with value 0 (C-like ending for 1 byte per character
* encodings, 2 zero bytes for UTF-16, 4 zero bytes for UTF-32)
*
* The result has to be freed after use.
*
* @param string String that should be converted.
* @param length Length of the string to convert in bytes.
*
* @return Converted string (must be freed) or nullptr if the conversion failed
*/
char *convert(const char *string, size_t length);
/**
* Static version of the method above.
* Converts string between encodings. The resulting string is ended by
* a character with value 0 (C-like ending for 1 byte per character
* encodings, 2 zero bytes for UTF-16, 4 zero bytes for UTF-32)
*
* The result has to be freed after use.
*
* @param to Name of the encoding the strings will be converted to
* @param from Name of the encoding the strings will be converted from
* @param string String that should be converted.
* @param length Length of the string to convert in bytes.
*
* @return Converted string (must be freed) or nullptr if the conversion failed
*/
static char *convert(const String &to, const String &from, const char *string, size_t length);
static char *convert(const String &to, const String &from, const String &s) {
return convert(to, from, s.c_str(), s.size());
}
static char *convert(const String &to, const U32String &s) {
return convert(to, "UTF-32", (const char *)s.c_str(), s.size() * 4);
}
/**
* @return The encoding, which is currently being converted from
*/
String getFrom() {return _from;};
/**
* @param from The encoding, to convert from
*/
void setFrom(const String &from) {_from = from;};
/**
* @return The encoding, which is currently being converted to
*/
String getTo() {return _to;};
/**
* @param to The encoding, to convert to
*/
void setTo(const String &to) {_to = to;};
/**
* Switches the endianity of a string.
*
* @param string Array containing the characters of a string.
* @param length Length of the string in bytes
* @param bitCount Number of bits used for each character.
*
* @return Array of characters with the opposite endianity
*/
static char *switchEndian(const char *string, int length, int bitCount);
/**
* Computes length (in bytes) of a string in a given encoding.
* The string must be zero ended. Similar to strlen
* (could be used instead of strlen).
*
* @param string String, which size should be computed.
* @param encoding Encoding of the string.
*
* @return Size of the string in bytes.
*/
static size_t stringLength(const char *string, const String &encoding);
private:
/** The encoding, which is currently being converted to */
String _to;
/** The encoding, which is currently being converted from */
String _from;
/**
* Takes care of transliteration and calls conversion
*
* The result has to be freed after use.
*
* @param to Name of the encoding the strings will be converted to
* @param from Name of the encoding the strings will be converted from
* @param string String that should be converted.
* @param length Length of the string to convert in bytes.
*
* @return Converted string (must be freed) or nullptr if the conversion failed
*/
static char *convertWithTransliteration(const String &to, const String &from, const char *string, size_t length);
/**
* Calls as many conversion functions as possible or until the conversion
* succeeds. It first tries to use iconv, then it tries to use platform
* specific functions and after that it tries to use TransMan mapping.
*
* The result has to be freed after use.
*
* @param to Name of the encoding the strings will be converted to
* @param from Name of the encoding the strings will be converted from
* @param string String that should be converted.
* @param length Length of the string to convert in bytes.
*
* @return Converted string (must be freed) or nullptr if the conversion failed
*/
static char *conversion(const String &to, const String &from, const char *string, size_t length);
/**
* Tries to convert the string using iconv.
*
* The result has to be freed after use.
*
* @param to Name of the encoding the strings will be converted to
* @param from Name of the encoding the strings will be converted from
* @param string String that should be converted.
* @param length Length of the string to convert in bytes.
*
* @return Converted string (must be freed) or nullptr if the conversion failed
*/
static char *convertIconv(const char *to, const char *from, const char *string, size_t length);
/**
* Uses conversion table to convert the string to unicode and from that
* to the final encoding. Important encodings, that aren't supported by
* all backends should go here.
*
* The result has to be freed after use.
*
* @param to Name of the encoding the strings will be converted to
* @param from Name of the encoding the strings will be converted from
* @param string String that should be converted.
* @param length Length of the string to convert in bytes.
*
* @return Converted string (must be freed) or nullptr if the conversion failed
*/
static char *convertConversionTable(const char *to, const char *from, const char *string, size_t length);
/**
2019-08-21 23:21:28 +02:00
* Transliterates cyrillic string in iso-8859-5 encoding and returns
* it's ASCII (latin) form.
*
* The result has to be freed after use.
*
* @param string String that should be converted
*
* @return Transliterated string in ASCII (must be freed) or nullptr on fail.
*/
2019-08-21 23:21:28 +02:00
static char *transliterateCyrillic(const char *string);
/**
2019-08-21 23:21:28 +02:00
* Transliterates cyrillic in UTF-32 string.
*
* The result has to be freed after use.
*
* @param string String that should be converted
* @param length Length of the string in bytes
*
* @return Transliterated string in UTF-32 (must be freed) or nullptr on fail.
*/
static uint32 *transliterateUTF32(const uint32 *string, size_t length);
};
/** @} */
}
#endif // COMMON_ENCODING_H