From ad6f533175dc85c871202ef5a7006c37b8f0aebb Mon Sep 17 00:00:00 2001 From: djsrv Date: Sun, 22 Aug 2021 12:11:29 -0400 Subject: [PATCH] COMMON: Fix punycode encoder/decoder These need to use Unicode code points, not raw bytes. --- common/punycode.cpp | 66 ++++++++++++++++++++++----------------------- common/punycode.h | 12 ++++----- 2 files changed, 39 insertions(+), 39 deletions(-) diff --git a/common/punycode.cpp b/common/punycode.cpp index d344ccb363f..c9484af402a 100644 --- a/common/punycode.cpp +++ b/common/punycode.cpp @@ -57,7 +57,7 @@ namespace Common { #define DAMP 700 #define INITIAL_N 128 #define INITIAL_BIAS 72 -#define SMAX 0x7fff +#define SMAX 0x10ffff // maximum Unicode code point #define SPECIAL_SYMBOLS "/\":*[]+|\\?%<>,;=" @@ -129,19 +129,19 @@ static size_t decode_digit(uint32 v) { return SMAX; } -String punycode_encode(String src) { - int srclen = src.size(); - int h = 0, si; +String punycode_encode(U32String src) { + size_t srclen = src.size(); + size_t h = 0, si; String dst; for (si = 0; si < srclen; si++) { - if ((byte)src[si] < 128) { + if (src[si] < 128) { dst += src[si]; h++; } } - int b = h; + size_t b = h; /* Write out delimiter if any basic code points were processed. */ if (h != srclen) { @@ -150,16 +150,16 @@ String punycode_encode(String src) { return src; } - int n = INITIAL_N; - int bias = INITIAL_BIAS; - int delta = 0; - int m; + size_t n = INITIAL_N; + size_t bias = INITIAL_BIAS; + size_t delta = 0; + size_t m; for (; h < srclen; n++, delta++) { /* Find next smallest non-basic code point. */ for (m = SMAX, si = 0; si < srclen; si++) { - if ((byte)src[si] >= n && (byte)src[si] < m) { - m = (byte)src[si]; + if (src[si] >= n && src[si] < m) { + m = src[si]; } } @@ -173,13 +173,13 @@ String punycode_encode(String src) { n = m; for (si = 0; si < srclen; si++) { - if ((byte)src[si] < n) { + if (src[si] < n) { if (++delta == 0) { /* OVERFLOW */ warning("punycode_encode: overflow2"); return src; } - } else if ((byte)src[si] == n) { + } else if (src[si] == n) { dst += encode_var_int(bias, delta); bias = adapt_bias(delta, h + 1, h == b); delta = 0; @@ -206,7 +206,7 @@ bool punycode_needEncode(const String src) { return false; } -String punycode_decode(const String src1) { +U32String punycode_decode(const String src1) { if (!src1.hasPrefix("xn--")) return src1; @@ -225,7 +225,7 @@ String punycode_decode(const String src1) { if (di == String::npos) return src; - String dst; + U32String dst; for (size_t i = 0; i < di; i++) { dst += src[i]; @@ -288,9 +288,9 @@ String punycode_decode(const String src1) { n += i / (di + 1); i %= (di + 1); - String dst1(dst.c_str(), i); - dst1 += (char )n; - dst1 += String(&dst.c_str()[i]); + U32String dst1(dst.c_str(), i); + dst1 += (u32char_type_t)n; + dst1 += U32String(&dst.c_str()[i]); dst = dst1; i++; } @@ -298,17 +298,17 @@ String punycode_decode(const String src1) { return dst; } -String punycode_encodefilename(const String src) { - String dst; +String punycode_encodefilename(const U32String src) { + U32String dst; for (uint i = 0; i < src.size(); i++) { - if ((byte)src[i] == 0x81) { // In case we have our escape character present - dst += '\x81'; - dst += '\x79'; + if (src[i] == 0x81) { // In case we have our escape character present + dst += 0x81; + dst += 0x79; // Encode special symbols and non-printables - } else if (strchr(SPECIAL_SYMBOLS, src[i]) || (byte)src[i] < 0x20) { - dst += '\x81'; - dst += (byte)src[i] + '\x80'; + } else if ((src[i] < 0x80 && strchr(SPECIAL_SYMBOLS, (byte)src[i])) || src[i] < 0x20) { + dst += 0x81; + dst += src[i] + 0x80; } else { dst += src[i]; } @@ -317,9 +317,9 @@ String punycode_encodefilename(const String src) { return punycode_encode(dst); } -String punycode_decodefilename(const String src1) { - String dst; - String src = punycode_decode(src1); +U32String punycode_decodefilename(const String src1) { + U32String dst; + U32String src = punycode_decode(src1); // Check if the string did not change which could be // also on decoding failure @@ -327,12 +327,12 @@ String punycode_decodefilename(const String src1) { return src; for (uint i = 0; i < src.size(); i++) { - if ((byte)src[i] == 0x81 && i + 1 < src.size()) { + if (src[i] == 0x81 && i + 1 < src.size()) { i++; if (src[i] == 0x79) - dst += '\x81'; + dst += 0x81; else - dst += (byte)src[i] - '\x80'; + dst += src[i] - 0x80; } else { dst += src[i]; } diff --git a/common/punycode.h b/common/punycode.h index 48539663069..0143f73c426 100644 --- a/common/punycode.h +++ b/common/punycode.h @@ -50,21 +50,21 @@ namespace Common { /** - * Convert Binary to Punycode. Returns the encoded string. + * Convert UTF-32 to Punycode. Returns the encoded string. */ -String punycode_encode(const String src); +String punycode_encode(const U32String src); /** - * Convert Punycode to Binary. Returns the decoded string + * Convert Punycode to UTF-32. Returns the decoded string */ -String punycode_decode(const String src); +U32String punycode_decode(const String src); -String punycode_encodefilename(const String src1); +String punycode_encodefilename(const U32String src1); /** * Convert Punycode filename to Binary using special 0x81 escape character. Returns the decoded string */ -String punycode_decodefilename(const String src1); +U32String punycode_decodefilename(const String src1); /** * Convert path from Punycode