scummvm/engines/wintermute/utils/string_util.cpp

/* ScummVM - Graphic Adventure Engine
 *
 * ScummVM is the legal property of its developers, whose names
 * are too numerous to list here. Please refer to the COPYRIGHT
 * file distributed with this source distribution.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.

 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.

 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 *
 */

/*
 * This file is based on WME Lite.
 * http://dead-code.org/redir.php?target=wmelite
 * Copyright (c) 2011 Jan Nedoma
 */

#include "common/tokenizer.h"
#include "engines/wintermute/utils/string_util.h"
#include "engines/wintermute/utils/convert_utf.h"

namespace Wintermute {

//////////////////////////////////////////////////////////////////////////
bool StringUtil::compareNoCase(const AnsiString &str1, const AnsiString &str2) {
	return (str1.compareToIgnoreCase(str2) == 0);
}

//////////////////////////////////////////////////////////////////////////
/*bool StringUtil::CompareNoCase(const WideString &str1, const WideString &str2) {
    WideString str1lc = str1;
    WideString str2lc = str2;

    ToLowerCase(str1lc);
    ToLowerCase(str2lc);

    return (str1lc == str2lc);
}*/

Common::String StringUtil::substituteUtf8Characters(Common::String &str) {
	uint strSize = str.size();
	Common::String punctuation("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~");

	if (isAscii(str))
		return str;

	for (uint32 i = 0; i < strSize; i++) {
		if (!Common::isAlnum(str[i]) && str[i] != ' ' && !punctuation.contains(str[i])) {
			// Replace some UTF-8 characters with (almost) equivalent ANSII ones
			if ((byte)str[i] == 0xc2 && i + 1 < str.size() && (byte)str[i + 1] == 0xa9) {
				// UTF-8 copyright character, substitute with 'c'
				str.deleteChar(i);
				str.setChar('c', i);
				strSize--;
			}
		}
	}

	return str;
}

bool StringUtil::isAscii(const Common::String &str) {
	Common::String punctuation("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~");

	for (uint32 i = 0; i < str.size(); i++) {
		if (!Common::isAlnum(str[i]) && str[i] != ' ' && !punctuation.contains(str[i]))
			return false;
	}

	return true;
}

//////////////////////////////////////////////////////////////////////////
WideString StringUtil::utf8ToWide(const Utf8String &Utf8Str) {
	// WORKAROUND: Since wide strings aren't supported yet, we make this function
	// work at least with ASCII strings. This should cover all English versions.
	Common::String asciiString = Utf8Str;
	asciiString = substituteUtf8Characters(asciiString);
	if (isAscii(asciiString)) {
		// No special (UTF-8) characters found, just return the string
		return asciiString;
	} else {
		warning("String contains special (UTF-8) characters: '%s'", Utf8Str.c_str());
	}

	error("StringUtil::Utf8ToWide - WideString not supported yet for UTF-8 characters");

	/*  size_t WideSize = Utf8Str.size();

	    if (sizeof(wchar_t) == 2) {
	        wchar_t *WideStringNative = new wchar_t[WideSize + 1];

	        const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(Utf8Str.c_str());
	        const UTF8 *SourceEnd = SourceStart + WideSize;

	        UTF16 *TargetStart = reinterpret_cast<UTF16 *>(WideStringNative);
	        UTF16 *TargetEnd = TargetStart + WideSize + 1;

	        ConversionResult res = ConvertUTF8toUTF16(&SourceStart, SourceEnd, &TargetStart, TargetEnd, strictConversion);
	        if (res != conversionOK) {
	            delete[] WideStringNative;
	            return L"";
	        }
	        *TargetStart = 0;
	        WideString ResultString(WideStringNative);
	        delete[] WideStringNative;

	        return ResultString;
	    } else if (sizeof(wchar_t) == 4) {
	        wchar_t *WideStringNative = new wchar_t[WideSize + 1];

	        const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(Utf8Str.c_str());
	        const UTF8 *SourceEnd = SourceStart + WideSize;

	        UTF32 *TargetStart = reinterpret_cast<UTF32 *>(WideStringNative);
	        UTF32 *TargetEnd = TargetStart + WideSize;

	        ConversionResult res = ConvertUTF8toUTF32(&SourceStart, SourceEnd, &TargetStart, TargetEnd, strictConversion);
	        if (res != conversionOK) {
	            delete[] WideStringNative;
	            return L"";
	        }
	        *TargetStart = 0;
	        WideString ResultString(WideStringNative);
	        delete[] WideStringNative;

	        return ResultString;
	    } else {
	        return L"";
	    }*/
	return "";
}

//////////////////////////////////////////////////////////////////////////
Utf8String StringUtil::wideToUtf8(const WideString &WideStr) {
	// WORKAROUND: Since UTF-8 strings aren't supported yet, we make this function
	// work at least with ASCII strings. This should cover all English versions.
	Common::String asciiString = WideStr;
	asciiString = substituteUtf8Characters(asciiString);
	if (isAscii(asciiString)) {
		// No special (UTF-8) characters found, just return the string
		return asciiString;
	} else {
		warning("String contains special (UTF-8) characters: '%s'", WideStr.c_str());
	}

	error("StringUtil::wideToUtf8 - WideString not supported yet for UTF-8 characters");

	/*  size_t WideSize = WideStr.length();

	    if (sizeof(wchar_t) == 2) {
	        size_t utf8Size = 3 * WideSize + 1;
	        char *utf8StringNative = new char[Utf8Size];

	        const UTF16 *SourceStart = reinterpret_cast<const UTF16 *>(WideStr.c_str());
	        const UTF16 *SourceEnd = SourceStart + WideSize;

	        UTF8 *TargetStart = reinterpret_cast<UTF8 *>(Utf8StringNative);
	        UTF8 *TargetEnd = TargetStart + Utf8Size;

	        ConversionResult res = ConvertUTF16toUTF8(&SourceStart, SourceEnd, &TargetStart, TargetEnd, strictConversion);
	        if (res != conversionOK) {
	            delete[] Utf8StringNative;
	            return (Utf8String)"";
	        }
	        *TargetStart = 0;
	        Utf8String ResultString(Utf8StringNative);
	        delete[] Utf8StringNative;
	        return ResultString;
	    } else if (sizeof(wchar_t) == 4) {
	        size_t utf8Size = 4 * WideSize + 1;
	        char *utf8StringNative = new char[Utf8Size];

	        const UTF32 *SourceStart = reinterpret_cast<const UTF32 *>(WideStr.c_str());
	        const UTF32 *SourceEnd = SourceStart + WideSize;

	        UTF8 *TargetStart = reinterpret_cast<UTF8 *>(Utf8StringNative);
	        UTF8 *TargetEnd = TargetStart + Utf8Size;

	        ConversionResult res = ConvertUTF32toUTF8(&SourceStart, SourceEnd, &TargetStart, TargetEnd, strictConversion);
	        if (res != conversionOK) {
	            delete[] Utf8StringNative;
	            return (Utf8String)"";
	        }
	        *TargetStart = 0;
	        Utf8String ResultString(Utf8StringNative);
	        delete[] Utf8StringNative;
	        return ResultString;
	    } else {
	        return (Utf8String)"";
	    }*/
	return "";
}

//////////////////////////////////////////////////////////////////////////
WideString StringUtil::ansiToWide(const AnsiString &str) {
	// TODO: This function gets called a lot, so warnings like these drown out the usefull information
	Common::String converted = "";
	uint32 index = 0;
	while (index != str.size()) {
		byte c = str[index];
		if (c == 146) {
			converted += (char)39;  // Replace right-quote with apostrophe
		} else if (c == 133) {
			converted += Common::String("..."); // Replace ...-symbol with ...
		} else {
			converted += c;
		}
		index++;
	}
	// using default os locale!

	/*  setlocale(LC_CTYPE, "");
	    size_t wideSize = mbstowcs(NULL, str.c_str(), 0) + 1;
	    wchar_t *wstr = new wchar_t[WideSize];
	    mbstowcs(wstr, str.c_str(), WideSize);
	    WideString ResultString(wstr);
	    delete[] wstr;
	    return ResultString;*/
	return WideString(converted);
}

//////////////////////////////////////////////////////////////////////////
AnsiString StringUtil::wideToAnsi(const WideString &wstr) {
	// using default os locale!
	// TODO: This function gets called a lot, so warnings like these drown out the usefull information
	/*  setlocale(LC_CTYPE, "");
	    size_t wideSize = wcstombs(NULL, wstr.c_str(), 0) + 1;
	    char *str = new char[WideSize];
	    wcstombs(str, wstr.c_str(), WideSize);
	    AnsiString ResultString(str);
	    delete[] str;
	    return ResultString;*/
	return AnsiString(wstr);
}

//////////////////////////////////////////////////////////////////////////
bool StringUtil::isUtf8BOM(const byte *buffer, uint32 bufferSize) {
	if (bufferSize > 3 && buffer[0] == 0xEF && buffer[1] == 0xBB && buffer[2] == 0xBF) {
		return true;
	} else {
		return false;
	}
}

//////////////////////////////////////////////////////////////////////////
int StringUtil::indexOf(const WideString &str, const WideString &toFind, size_t startFrom) {
	const char *index = strstr(str.c_str(), toFind.c_str());
	if (index == nullptr) {
		return -1;
	} else {
		return index - str.c_str();
	}
}

Common::String StringUtil::encodeSetting(const Common::String &str) {
	for (uint32 i = 0; i < str.size(); i++) {
		if ((str[i] < 33) || (str[i] == '=') || (str[i] > 126)) {
			error("Setting contains illegal characters: %s", str.c_str());
		}
	}
	return str;
}

Common::String StringUtil::decodeSetting(const Common::String &str) {
	return str;
}

//////////////////////////////////////////////////////////////////////////
AnsiString StringUtil::toString(int val) {
	return Common::String::format("%d", val);
}


} // End of namespace Wintermute