An old version of android had strict JNI validation of UTF-8 strings enabled. Let's pass it.

This commit is contained in:
Henrik Rydgård 2024-10-18 13:51:48 +02:00
parent 0af3a87d3f
commit a6a189c24d
7 changed files with 96 additions and 28 deletions

View File

@ -37,27 +37,21 @@ inline bool isutf(char c) {
}
static const uint32_t offsetsFromUTF8[6] = {
0x00000000UL, 0x00003080UL, 0x000E2080UL,
0x03C82080UL, 0xFA082080UL, 0x82082080UL
0x00000000UL, 0x00003080UL, 0x000E2080UL,
0x03C82080UL, 0xFA082080UL, 0x82082080UL
};
static const uint8_t trailingBytesForUTF8[256] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5,
};
/* returns length of next utf-8 sequence */
int u8_seqlen(const char *s)
{
return trailingBytesForUTF8[(unsigned int)(unsigned char)s[0]] + 1;
}
/* conversions without error checking
only works for valid UTF-8, i.e. no 5- or 6-byte sequences
srcsz = source size in bytes, or -1 if 0-terminated
@ -68,7 +62,7 @@ int u8_seqlen(const char *s)
for all the characters.
if sz = srcsz+1 (i.e. 4*srcsz+4 bytes), there will always be enough space.
*/
int u8_toucs(uint32_t *dest, int sz, const char *src, int srcsz)
int u8_toucs(uint32_t *dest, int sz, const char *src, int srcsz)
{
uint32_t ch;
const char *src_end = src + srcsz;
@ -519,6 +513,67 @@ std::string CodepointToUTF8(uint32_t codePoint) {
return std::string(temp);
}
// Helper function to encode a Unicode code point into UTF-8, but doesn't support 4-byte output.
size_t encode_utf8_modified(uint32_t code_point, unsigned char* output) {
if (code_point <= 0x7F) {
output[0] = (unsigned char)code_point;
return 1;
} else if (code_point <= 0x7FF) {
output[0] = (unsigned char)(0xC0 | (code_point >> 6));
output[1] = (unsigned char)(0x80 | (code_point & 0x3F));
return 2;
} else if (code_point <= 0xFFFF) {
output[0] = (unsigned char)(0xE0 | (code_point >> 12));
output[1] = (unsigned char)(0x80 | ((code_point >> 6) & 0x3F));
output[2] = (unsigned char)(0x80 | (code_point & 0x3F));
return 3;
}
return 0;
}
// A function to convert regular UTF-8 to Java Modified UTF-8
// Written by ChatGPT and corrected and modified.
void ConvertUTF8ToJavaModifiedUTF8(std::string *output, std::string_view input) {
output->resize(input.size() * 6); // worst case: every character is encoded as 6 bytes
size_t out_idx = 0;
for (size_t i = 0; i < input.length(); ) {
unsigned char c = input[i];
if (c == 0x00) {
// Encode null character as 0xC0 0x80. TODO: We probably don't need to support this?
output[out_idx++] = 0xC0;
output[out_idx++] = 0x80;
i++;
} else if ((c & 0xF0) == 0xF0) { // 4-byte sequence (U+10000 to U+10FFFF)
// Decode the Unicode code point from the UTF-8 sequence
uint32_t code_point = ((input[i] & 0x07) << 18) |
((input[i + 1] & 0x3F) << 12) |
((input[i + 2] & 0x3F) << 6) |
(input[i + 3] & 0x3F);
// Convert to surrogate pair
uint16_t high_surrogate = ((code_point - 0x10000) / 0x400) + 0xD800;
uint16_t low_surrogate = ((code_point - 0x10000) % 0x400) + 0xDC00;
// Encode the surrogates in UTF-8
out_idx += encode_utf8_modified(high_surrogate, (unsigned char *)(output->data() + out_idx));
out_idx += encode_utf8_modified(low_surrogate, (unsigned char *)(output->data() + out_idx));
i += 4;
} else {
// Copy the other UTF-8 sequences (1-3 bytes)
size_t utf8_len = 1;
if ((c & 0xE0) == 0xC0) utf8_len = 2; // 2-byte sequence
else if ((c & 0xF0) == 0xE0) utf8_len = 3; // 3-byte sequence
memcpy(output->data() + out_idx, input.data() + i, utf8_len);
out_idx += utf8_len;
i += utf8_len;
}
}
output->resize(out_idx);
_dbg_assert_(output->size() >= input.size());
}
#ifndef _WIN32
// Replacements for the Win32 wstring functions. Not to be used from emulation code!

View File

@ -126,3 +126,7 @@ std::string ConvertUCS2ToUTF8(const std::u16string &wstr);
// Dest size in units, not bytes.
void ConvertUTF8ToUCS2(char16_t *dest, size_t destSize, std::string_view source);
std::u16string ConvertUTF8ToUCS2(std::string_view source);
// Java needs 4-byte UTF-8 to be converted to surrogate pairs, each component of which get
// encoded into 3-byte UTF-8.
void ConvertUTF8ToJavaModifiedUTF8(std::string *output, std::string_view input);

View File

@ -895,6 +895,7 @@ private:
OpenGLTexture::OpenGLTexture(GLRenderManager *render, const TextureDesc &desc) : render_(render) {
_dbg_assert_(desc.format != Draw::DataFormat::UNDEFINED);
_dbg_assert_msg_(desc.width > 0 && desc.height > 0 && desc.depth > 0, "w: %d h: %d d: %d fmt: %s", desc.width, desc.height, desc.depth, DataFormatToString(desc.format));
_dbg_assert_(desc.width > 0 && desc.height > 0 && desc.depth > 0);
_dbg_assert_(desc.type != Draw::TextureType::UNKNOWN);

View File

@ -40,7 +40,7 @@
// Compatibility wrappers making ARM64 NEON code run on ARM32
// With optimization on, these should compile down to the optimal code.
inline float32x4_t vmulq_laneq_f32(float32x4_t a, float32x4_t b, int lane) {
static inline float32x4_t vmulq_laneq_f32(float32x4_t a, float32x4_t b, int lane) {
switch (lane & 3) {
case 0: return vmulq_lane_f32(a, vget_low_f32(b), 0);
case 1: return vmulq_lane_f32(a, vget_low_f32(b), 1);
@ -49,7 +49,7 @@ inline float32x4_t vmulq_laneq_f32(float32x4_t a, float32x4_t b, int lane) {
}
}
inline float32x4_t vmlaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t c, int lane) {
static inline float32x4_t vmlaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t c, int lane) {
switch (lane & 3) {
case 0: return vmlaq_lane_f32(a, b, vget_low_f32(c), 0);
case 1: return vmlaq_lane_f32(a, b, vget_low_f32(c), 1);
@ -58,7 +58,7 @@ inline float32x4_t vmlaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t c,
}
}
inline uint32x4_t vcgezq_f32(float32x4_t v) {
static inline uint32x4_t vcgezq_f32(float32x4_t v) {
return vcgeq_f32(v, vdupq_n_f32(0.0f));
}

View File

@ -123,8 +123,8 @@ void TextDrawer::DrawString(DrawBuffer &target, std::string_view str, float x, f
void TextDrawer::MeasureString(std::string_view str, float *w, float *h) {
if (str.empty()) {
*w = 0.0;
*h = 0.0;
*w = 0.0f;
*h = 0.0f;
return;
}

View File

@ -83,15 +83,18 @@ void TextDrawerAndroid::MeasureStringInternal(std::string_view str, float *w, fl
} else {
ERROR_LOG(Log::G3D, "Missing font");
}
std::string text(str);
std::string text;
ConvertUTF8ToJavaModifiedUTF8(&text, str);
auto env = getEnv();
// Unfortunate that we can't create a jstr from a std::string_view directly.
jstring jstr = env->NewStringUTF(text.c_str());
uint32_t size = env->CallStaticIntMethod(cls_textRenderer, method_measureText, jstr, scaledSize);
env->DeleteLocalRef(jstr);
*w = (size >> 16);
*h = (size & 0xFFFF);
*w = size >> 16;
*h = size & 0xFFFF;
WARN_LOG(Log::G3D, "Measure Modified: '%.*s' size: %fx%f", (int)text.length(), text.data(), *w, *h);
}
bool TextDrawerAndroid::DrawStringBitmap(std::vector<uint8_t> &bitmapData, TextStringEntry &entry, Draw::DataFormat texFormat, std::string_view str, int align, bool fullColor) {
@ -100,6 +103,7 @@ bool TextDrawerAndroid::DrawStringBitmap(std::vector<uint8_t> &bitmapData, TextS
return false;
}
double size = 0.0;
auto iter = fontMap_.find(fontHash_);
if (iter != fontMap_.end()) {
@ -109,7 +113,11 @@ bool TextDrawerAndroid::DrawStringBitmap(std::vector<uint8_t> &bitmapData, TextS
}
auto env = getEnv();
jstring jstr = env->NewStringUTF(std::string(str).c_str());
std::string text;
ConvertUTF8ToJavaModifiedUTF8(&text, str);
jstring jstr = env->NewStringUTF(text.c_str());
uint32_t textSize = env->CallStaticIntMethod(cls_textRenderer, method_measureText, jstr, size);
int imageWidth = (short)(textSize >> 16);
int imageHeight = (short)(textSize & 0xFFFF);
@ -117,6 +125,7 @@ bool TextDrawerAndroid::DrawStringBitmap(std::vector<uint8_t> &bitmapData, TextS
imageWidth = 1;
if (imageHeight <= 0)
imageHeight = 1;
WARN_LOG(Log::G3D, "Text: '%.*s' (%02x)", (int)str.length(), str.data(), str[0]);
jintArray imageData = (jintArray)env->CallStaticObjectMethod(cls_textRenderer, method_renderText, jstr, size);
env->DeleteLocalRef(jstr);

View File

@ -151,7 +151,6 @@ bool TextDrawerWin32::DrawStringBitmap(std::vector<uint8_t> &bitmapData, TextStr
bitmapData.clear();
return false;
}
std::wstring wstr = ConvertUTF8ToWString(ReplaceAll(str, "\n", "\r\n"));
auto iter = fontMap_.find(fontHash_);