Update encoding_utf

This commit is contained in:
twinaphex 2015-12-01 19:24:51 +01:00
parent 263b2de885
commit 0b1f5df456
2 changed files with 29 additions and 127 deletions

View File

@ -58,135 +58,42 @@ static int Buf_EnsureSize(CBuf *dest, size_t size)
return Buf_Create(dest, size, &g_Alloc);
}
#ifndef _WIN32
static uint8_t kUtf8Limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
static Bool Utf16_To_Utf8(uint8_t *dest, size_t *destLen,
const uint16_t *src, size_t srcLen)
static bool Utf16_To_Char(CBuf *dest, const uint16_t *s, int fileMode)
{
size_t destPos = 0;
size_t srcPos = 0;
bool res;
size_t dest_len = 0;
unsigned len = 0;
for (;;)
{
unsigned numAdds;
uint32_t value;
while (s[len] != '\0')
len++;
if (srcPos == srcLen)
{
*destLen = destPos;
return True;
}
value = src[srcPos++];
if (value < 0x80)
{
if (dest)
dest[destPos] = (char)value;
destPos++;
continue;
}
if (value >= 0xD800 && value < 0xE000)
{
uint32_t c2;
utf16_conv_utf8(NULL, &dest_len, s, len);
dest_len += 1;
if (value >= 0xDC00 || srcPos == srcLen)
break;
c2 = src[srcPos++];
if (c2 < 0xDC00 || c2 >= 0xE000)
break;
value = (((value - 0xD800) << 10) | (c2 - 0xDC00)) + 0x10000;
}
for (numAdds = 1; numAdds < 5; numAdds++)
if (value < (((uint32_t)1) << (numAdds * 5 + 6)))
break;
if (dest)
dest[destPos] = (char)(kUtf8Limits[numAdds - 1]
+ (value >> (6 * numAdds)));
destPos++;
do
{
numAdds--;
if (dest)
dest[destPos] = (char)(0x80
+ ((value >> (6 * numAdds)) & 0x3F));
destPos++;
}while (numAdds != 0);
}
*destLen = destPos;
return False;
}
static SRes Utf16_To_Utf8Buf(CBuf *dest,
const uint16_t *src, size_t srcLen)
{
Bool res;
size_t destLen = 0;
Utf16_To_Utf8(NULL, &destLen, src, srcLen);
destLen += 1;
if (!Buf_EnsureSize(dest, destLen))
if (!Buf_EnsureSize(dest, dest_len))
return SZ_ERROR_MEM;
res = Utf16_To_Utf8(dest->data, &destLen, src, srcLen);
dest->data[destLen] = 0;
res = utf16_conv_utf8(dest->data, &dest_len, s, len);
dest->data[dest_len] = 0;
return res ? SZ_OK : SZ_ERROR_FAIL;
}
#endif
static SRes Utf16_To_Char(CBuf *buf, const uint16_t *s, int fileMode)
{
unsigned len = 0;
for (len = 0; s[len] != '\0'; len++);
#ifdef _WIN32
{
int size = len * 3 + 100;
if (!Buf_EnsureSize(buf, size))
return SZ_ERROR_MEM;
{
char defaultChar = '_';
BOOL defUsed;
int numChars = WideCharToMultiByte(fileMode ?
(
#ifdef UNDER_CE
CP_ACP
#else
AreFileApisANSI() ? CP_ACP : CP_OEMCP
#endif
) : CP_OEMCP,
0, (LPCWSTR)s, len, (char *)buf->data,
size, &defaultChar, &defUsed);
if (numChars == 0 || numChars >= size)
return SZ_ERROR_FAIL;
buf->data[numChars] = 0;
return SZ_OK;
}
}
#else
(void)fileMode;
return Utf16_To_Utf8Buf(buf, s, len);
#endif
}
static SRes ConvertUtf16toCharString(const uint16_t *in, char *s, size_t len)
{
CBuf buf;
SRes res;
Buf_Init(&buf);
res = Utf16_To_Char(&buf, in, 0);
if (res == SZ_OK)
strlcpy(s, (const char*)buf.data, len);
Buf_Free(&buf, &g_Alloc);
return res;
}
static bool ConvertUtf16toCharString(const uint16_t *in, char *s, size_t len)
{
CBuf dest;
bool ret;
Buf_Init(&dest);
ret = Utf16_To_Char(&dest, in, 0);
if (ret)
strlcpy(s, (const char*)dest.data, len);
Buf_Free(&dest, &g_Alloc);
return ret;
}
/* Extract the relative path relative_path from a 7z archive
* archive_path and allocate a buf for it to write it in.
* If optional_outfile is set, extract to that instead and don't alloc buffer.
@ -268,7 +175,7 @@ static int read_7zip_file(
}
SzArEx_GetFileNameUtf16(&db, i, temp);
res = ConvertUtf16toCharString(temp, infile, sizeof(infile));
res = ConvertUtf16toCharString(temp, infile, sizeof(infile)) ? SZ_OK : SZ_ERROR_FAIL;
if (!strcmp(infile, relative_path))
{
@ -419,7 +326,7 @@ static struct string_list *compressed_7zip_file_list_new(
}
}
SzArEx_GetFileNameUtf16(&db, i, temp);
res = ConvertUtf16toCharString(temp, infile, sizeof(infile));
res = ConvertUtf16toCharString(temp, infile, sizeof(infile)) ? SZ_OK : SZ_ERROR_FAIL;
file_ext = path_get_extension(infile);
if (string_list_find_elem_prefix(ext_list, ".", file_ext))

View File

@ -75,9 +75,8 @@ size_t utf8_conv_utf32(uint32_t *out, size_t out_chars,
return ret;
}
bool utf16_conv_utf8(uint8_t *out, size_t *out_chars,
const uint16_t *in, size_t in_size)
const uint16_t *in, size_t in_size)
{
static uint8_t kUtf8Limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
size_t out_pos = 0;
@ -93,9 +92,7 @@ bool utf16_conv_utf8(uint8_t *out, size_t *out_chars,
*out_chars = out_pos;
return true;
}
value = in[in_pos++];
if (value < 0x80)
{
if (out)
@ -119,12 +116,10 @@ bool utf16_conv_utf8(uint8_t *out, size_t *out_chars,
for (numAdds = 1; numAdds < 5; numAdds++)
if (value < (((uint32_t)1) << (numAdds * 5 + 6)))
break;
if (out)
out[out_pos] = (char)(kUtf8Limits[numAdds - 1]
+ (value >> (6 * numAdds)));
out_pos++;
do
{
numAdds--;