diff --git a/Common/Data/Encoding/Utf8.cpp b/Common/Data/Encoding/Utf8.cpp index 00e2b70578..0e2b934a22 100644 --- a/Common/Data/Encoding/Utf8.cpp +++ b/Common/Data/Encoding/Utf8.cpp @@ -37,27 +37,21 @@ inline bool isutf(char c) { } static const uint32_t offsetsFromUTF8[6] = { - 0x00000000UL, 0x00003080UL, 0x000E2080UL, - 0x03C82080UL, 0xFA082080UL, 0x82082080UL + 0x00000000UL, 0x00003080UL, 0x000E2080UL, + 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; static const uint8_t trailingBytesForUTF8[256] = { - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5, }; -/* returns length of next utf-8 sequence */ -int u8_seqlen(const char *s) -{ - return trailingBytesForUTF8[(unsigned int)(unsigned char)s[0]] + 1; -} - /* conversions without error checking only works for valid UTF-8, i.e. no 5- or 6-byte sequences srcsz = source size in bytes, or -1 if 0-terminated @@ -68,7 +62,7 @@ int u8_seqlen(const char *s) for all the characters. if sz = srcsz+1 (i.e. 4*srcsz+4 bytes), there will always be enough space. */ -int u8_toucs(uint32_t *dest, int sz, const char *src, int srcsz) +int u8_toucs(uint32_t *dest, int sz, const char *src, int srcsz) { uint32_t ch; const char *src_end = src + srcsz; @@ -519,6 +513,67 @@ std::string CodepointToUTF8(uint32_t codePoint) { return std::string(temp); } +// Helper function to encode a Unicode code point into UTF-8, but doesn't support 4-byte output. +size_t encode_utf8_modified(uint32_t code_point, unsigned char* output) { + if (code_point <= 0x7F) { + output[0] = (unsigned char)code_point; + return 1; + } else if (code_point <= 0x7FF) { + output[0] = (unsigned char)(0xC0 | (code_point >> 6)); + output[1] = (unsigned char)(0x80 | (code_point & 0x3F)); + return 2; + } else if (code_point <= 0xFFFF) { + output[0] = (unsigned char)(0xE0 | (code_point >> 12)); + output[1] = (unsigned char)(0x80 | ((code_point >> 6) & 0x3F)); + output[2] = (unsigned char)(0x80 | (code_point & 0x3F)); + return 3; + } + return 0; +} + +// A function to convert regular UTF-8 to Java Modified UTF-8 +// Written by ChatGPT and corrected and modified. +void ConvertUTF8ToJavaModifiedUTF8(std::string *output, std::string_view input) { + output->resize(input.size() * 6); // worst case: every character is encoded as 6 bytes + size_t out_idx = 0; + for (size_t i = 0; i < input.length(); ) { + unsigned char c = input[i]; + if (c == 0x00) { + // Encode null character as 0xC0 0x80. TODO: We probably don't need to support this? + output[out_idx++] = 0xC0; + output[out_idx++] = 0x80; + i++; + } else if ((c & 0xF0) == 0xF0) { // 4-byte sequence (U+10000 to U+10FFFF) + // Decode the Unicode code point from the UTF-8 sequence + uint32_t code_point = ((input[i] & 0x07) << 18) | + ((input[i + 1] & 0x3F) << 12) | + ((input[i + 2] & 0x3F) << 6) | + (input[i + 3] & 0x3F); + + // Convert to surrogate pair + uint16_t high_surrogate = ((code_point - 0x10000) / 0x400) + 0xD800; + uint16_t low_surrogate = ((code_point - 0x10000) % 0x400) + 0xDC00; + + // Encode the surrogates in UTF-8 + out_idx += encode_utf8_modified(high_surrogate, (unsigned char *)(output->data() + out_idx)); + out_idx += encode_utf8_modified(low_surrogate, (unsigned char *)(output->data() + out_idx)); + + i += 4; + } else { + // Copy the other UTF-8 sequences (1-3 bytes) + size_t utf8_len = 1; + if ((c & 0xE0) == 0xC0) utf8_len = 2; // 2-byte sequence + else if ((c & 0xF0) == 0xE0) utf8_len = 3; // 3-byte sequence + + memcpy(output->data() + out_idx, input.data() + i, utf8_len); + out_idx += utf8_len; + i += utf8_len; + } + } + output->resize(out_idx); + _dbg_assert_(output->size() >= input.size()); +} + #ifndef _WIN32 // Replacements for the Win32 wstring functions. Not to be used from emulation code! diff --git a/Common/Data/Encoding/Utf8.h b/Common/Data/Encoding/Utf8.h index 7a397d2c5c..dc9696f2b3 100644 --- a/Common/Data/Encoding/Utf8.h +++ b/Common/Data/Encoding/Utf8.h @@ -126,3 +126,7 @@ std::string ConvertUCS2ToUTF8(const std::u16string &wstr); // Dest size in units, not bytes. void ConvertUTF8ToUCS2(char16_t *dest, size_t destSize, std::string_view source); std::u16string ConvertUTF8ToUCS2(std::string_view source); + +// Java needs 4-byte UTF-8 to be converted to surrogate pairs, each component of which get +// encoded into 3-byte UTF-8. +void ConvertUTF8ToJavaModifiedUTF8(std::string *output, std::string_view input); diff --git a/Common/GPU/OpenGL/thin3d_gl.cpp b/Common/GPU/OpenGL/thin3d_gl.cpp index cfdc883d0c..591f2eeffd 100644 --- a/Common/GPU/OpenGL/thin3d_gl.cpp +++ b/Common/GPU/OpenGL/thin3d_gl.cpp @@ -895,6 +895,7 @@ private: OpenGLTexture::OpenGLTexture(GLRenderManager *render, const TextureDesc &desc) : render_(render) { _dbg_assert_(desc.format != Draw::DataFormat::UNDEFINED); + _dbg_assert_msg_(desc.width > 0 && desc.height > 0 && desc.depth > 0, "w: %d h: %d d: %d fmt: %s", desc.width, desc.height, desc.depth, DataFormatToString(desc.format)); _dbg_assert_(desc.width > 0 && desc.height > 0 && desc.depth > 0); _dbg_assert_(desc.type != Draw::TextureType::UNKNOWN); diff --git a/Common/Math/CrossSIMD.h b/Common/Math/CrossSIMD.h index 4e7f8ab909..3d171bb1a8 100644 --- a/Common/Math/CrossSIMD.h +++ b/Common/Math/CrossSIMD.h @@ -40,7 +40,7 @@ // Compatibility wrappers making ARM64 NEON code run on ARM32 // With optimization on, these should compile down to the optimal code. -inline float32x4_t vmulq_laneq_f32(float32x4_t a, float32x4_t b, int lane) { +static inline float32x4_t vmulq_laneq_f32(float32x4_t a, float32x4_t b, int lane) { switch (lane & 3) { case 0: return vmulq_lane_f32(a, vget_low_f32(b), 0); case 1: return vmulq_lane_f32(a, vget_low_f32(b), 1); @@ -49,7 +49,7 @@ inline float32x4_t vmulq_laneq_f32(float32x4_t a, float32x4_t b, int lane) { } } -inline float32x4_t vmlaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t c, int lane) { +static inline float32x4_t vmlaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t c, int lane) { switch (lane & 3) { case 0: return vmlaq_lane_f32(a, b, vget_low_f32(c), 0); case 1: return vmlaq_lane_f32(a, b, vget_low_f32(c), 1); @@ -58,7 +58,7 @@ inline float32x4_t vmlaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t c, } } -inline uint32x4_t vcgezq_f32(float32x4_t v) { +static inline uint32x4_t vcgezq_f32(float32x4_t v) { return vcgeq_f32(v, vdupq_n_f32(0.0f)); } diff --git a/Common/Render/Text/draw_text.cpp b/Common/Render/Text/draw_text.cpp index 6c0c9af93f..1fa2a070b6 100644 --- a/Common/Render/Text/draw_text.cpp +++ b/Common/Render/Text/draw_text.cpp @@ -123,8 +123,8 @@ void TextDrawer::DrawString(DrawBuffer &target, std::string_view str, float x, f void TextDrawer::MeasureString(std::string_view str, float *w, float *h) { if (str.empty()) { - *w = 0.0; - *h = 0.0; + *w = 0.0f; + *h = 0.0f; return; } diff --git a/Common/Render/Text/draw_text_android.cpp b/Common/Render/Text/draw_text_android.cpp index 351d4b6d10..f7df51552e 100644 --- a/Common/Render/Text/draw_text_android.cpp +++ b/Common/Render/Text/draw_text_android.cpp @@ -83,15 +83,18 @@ void TextDrawerAndroid::MeasureStringInternal(std::string_view str, float *w, fl } else { ERROR_LOG(Log::G3D, "Missing font"); } - std::string text(str); + std::string text; + ConvertUTF8ToJavaModifiedUTF8(&text, str); + auto env = getEnv(); - // Unfortunate that we can't create a jstr from a std::string_view directly. jstring jstr = env->NewStringUTF(text.c_str()); uint32_t size = env->CallStaticIntMethod(cls_textRenderer, method_measureText, jstr, scaledSize); env->DeleteLocalRef(jstr); - *w = (size >> 16); - *h = (size & 0xFFFF); + *w = size >> 16; + *h = size & 0xFFFF; + + WARN_LOG(Log::G3D, "Measure Modified: '%.*s' size: %fx%f", (int)text.length(), text.data(), *w, *h); } bool TextDrawerAndroid::DrawStringBitmap(std::vector &bitmapData, TextStringEntry &entry, Draw::DataFormat texFormat, std::string_view str, int align, bool fullColor) { @@ -100,6 +103,7 @@ bool TextDrawerAndroid::DrawStringBitmap(std::vector &bitmapData, TextS return false; } + double size = 0.0; auto iter = fontMap_.find(fontHash_); if (iter != fontMap_.end()) { @@ -109,7 +113,11 @@ bool TextDrawerAndroid::DrawStringBitmap(std::vector &bitmapData, TextS } auto env = getEnv(); - jstring jstr = env->NewStringUTF(std::string(str).c_str()); + + std::string text; + ConvertUTF8ToJavaModifiedUTF8(&text, str); + jstring jstr = env->NewStringUTF(text.c_str()); + uint32_t textSize = env->CallStaticIntMethod(cls_textRenderer, method_measureText, jstr, size); int imageWidth = (short)(textSize >> 16); int imageHeight = (short)(textSize & 0xFFFF); @@ -117,6 +125,7 @@ bool TextDrawerAndroid::DrawStringBitmap(std::vector &bitmapData, TextS imageWidth = 1; if (imageHeight <= 0) imageHeight = 1; + WARN_LOG(Log::G3D, "Text: '%.*s' (%02x)", (int)str.length(), str.data(), str[0]); jintArray imageData = (jintArray)env->CallStaticObjectMethod(cls_textRenderer, method_renderText, jstr, size); env->DeleteLocalRef(jstr); diff --git a/Common/Render/Text/draw_text_win.cpp b/Common/Render/Text/draw_text_win.cpp index b49f4fd039..28a8aeb4f0 100644 --- a/Common/Render/Text/draw_text_win.cpp +++ b/Common/Render/Text/draw_text_win.cpp @@ -151,7 +151,6 @@ bool TextDrawerWin32::DrawStringBitmap(std::vector &bitmapData, TextStr bitmapData.clear(); return false; } - std::wstring wstr = ConvertUTF8ToWString(ReplaceAll(str, "\n", "\r\n")); auto iter = fontMap_.find(fontHash_);