handle unassigned code points, as they're also invisible

This commit is contained in:
13xforever
2019-03-14 02:41:38 +05:00
parent 053067f33a
commit d86a5dfb2c
3 changed files with 37 additions and 4 deletions

View File

@@ -60,6 +60,7 @@ namespace CompatBot.EventHandlers
case UnicodeCategory.Control:
case UnicodeCategory.Format:
case UnicodeCategory.OtherNotAssigned when c >= 0xdb40:
break;
default:

View File

@@ -60,12 +60,40 @@ namespace CompatBot.Utils
int start, end;
for (start = 0; start < str.Length; start++)
if (!char.IsWhiteSpace(str[start]) && !IsFormat(str[start]))
break;
{
if (char.IsWhiteSpace(str, start) || IsFormat(str[start]))
continue;
if (char.IsHighSurrogate(str, start)
&& char.GetUnicodeCategory(str, start) == UnicodeCategory.OtherNotAssigned
&& str[start] >= 0xdb40) // this will check if the surrogate pair is >= E0000 (see https://en.wikipedia.org/wiki/UTF-16#U+010000_to_U+10FFFF)
continue;
if (char.IsLowSurrogate(str, start))
continue;
break;
}
for (end = str.Length - 1; end >= start; end--)
if (!char.IsWhiteSpace(str[end]) && !IsFormat(str[end]))
break;
{
if (char.IsWhiteSpace(str, end) || IsFormat(str[end]))
continue;
if (char.IsLowSurrogate(str, end)
&& end > start
&& char.IsHighSurrogate(str, end - 1)
&& char.GetUnicodeCategory(str, end - 1) == UnicodeCategory.OtherNotAssigned
&& str[end-1] >= 0xdb40)
continue;
if (char.IsHighSurrogate(str, end)
&& char.GetUnicodeCategory(str, end) == UnicodeCategory.OtherNotAssigned
&& str[end] >= 0xdb40)
continue;
break;
}
return CreateTrimmedString(str, start, end);
}

View File

@@ -36,8 +36,12 @@ namespace Tests
[TestCase("GodPan กับยูนิตแขนที่หายไป", false)]
[TestCase("⛧Bζ͜͡annerBomb⛧", false)]
[TestCase("(_A_Y_A_Z_) (͡๏̯͡๏)", false)]
[TestCase("🥛🥛", false)]
[TestCase("🎮P̷͙͋a̵̛̳k̶̫̀o̸̿͜ỏ̸̝🎮", true)]
[TestCase("Cindellด้้้", true)]
[TestCase("󠂪󠂪󠂪󠂪 󠂪󠂪󠂪󠂪󠂪󠂪󠂪󠂪 󠂪󠂪󠂪", true)]
[TestCase("󠀀󠀀", true)]
[TestCase("", true)]
public void ZalgoDetectionTest(string name, bool isBad)
{
Assert.That(UsernameZalgoMonitor.NeedsRename(name), Is.EqualTo(isBad));