would escape too much

This commit is contained in:
Sebastian Bachmann 2018-01-25 11:07:27 +01:00
parent 961f415ffd
commit be976ca89e
2 changed files with 4 additions and 4 deletions

View File

@ -74,11 +74,11 @@ def patch_string(s):
res = u''
it = PeekIterator(s)
for c in it:
if (ord(c) & 0xd800) == 0xd800:
if (ord(c) >> 10) == 0b110110:
# High surrogate
# Check for the next
n = it.peek()
if n and (ord(n) & 0xdc00) == 0xdc00:
if n and (ord(n) >> 10) == 0b110111:
# Next is a low surrogate! Merge them together
res += chr(((ord(c) & 0x3ff) << 10 | (ord(n) & 0x3ff)) + 0x10000)
# Skip next char, as we already consumed it
@ -86,7 +86,7 @@ def patch_string(s):
else:
# Lonely high surrogate
res += u"\\u{:04x}".format(ord(c))
elif (ord(c) & 0xdc00) == 0xdc00:
elif (ord(c) >> 10) == 0b110111:
# Lonely low surrogate
res += u"\\u{:04x}".format(ord(c))
else:

View File

@ -20,7 +20,7 @@ class StringTest(unittest.TestCase):
u"オンラインツールを使用して文字列を日本語に翻訳",
u"This is \U0001f64f, an emoji.", # complete surrogate
u"\u2713 check this string",
u"\\uffff \u0000 \\uff00", # lonely surrogates
u"\uffff \u0000 \uff00",
u"\u0420\u043e\u0441\u0441\u0438\u044f"]
for s in stests: