From b6dd4bfd29fa1fd29abe549a2e1b4b1426fadb33 Mon Sep 17 00:00:00 2001 From: 13xforever Date: Sat, 1 May 2021 22:54:07 +0500 Subject: [PATCH] strip the whole cuneiform block from nicknames --- .../EventHandlers/UsernameZalgoMonitor.cs | 25 +++++++++++++------ Tests/ZalgoTests.cs | 1 + 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/CompatBot/EventHandlers/UsernameZalgoMonitor.cs b/CompatBot/EventHandlers/UsernameZalgoMonitor.cs index bba8f92a..3c8b3fe3 100644 --- a/CompatBot/EventHandlers/UsernameZalgoMonitor.cs +++ b/CompatBot/EventHandlers/UsernameZalgoMonitor.cs @@ -20,11 +20,6 @@ namespace CompatBot.EventHandlers '꧁', '꧂', '⎝', '⎠', '⧹', '⧸', '⎛', '⎞', '﷽', '⸻', 'ဪ', '꧅', '꧄', }; - private static readonly List OversizedLiterals = new() - { - "𒐫", "𒈙", - }; - public static async Task OnUserUpdated(DiscordClient c, UserUpdateEventArgs args) { try @@ -119,14 +114,14 @@ namespace CompatBot.EventHandlers public static string StripZalgo(string displayName, ulong userId, NormalizationForm normalizationForm = NormalizationForm.FormD, int level = 0) { displayName = displayName.Normalize(normalizationForm).TrimEager(); - foreach (var literal in OversizedLiterals) - displayName = displayName.Replace(literal, ""); if (string.IsNullOrEmpty(displayName)) return GenerateRandomName(userId); var builder = new StringBuilder(); bool skipLowSurrogate = false; int consecutive = 0; + int codePoint = 0; + char highSurrogate = '\0'; foreach (var c in displayName) { switch (char.GetUnicodeCategory(c)) @@ -141,6 +136,22 @@ namespace CompatBot.EventHandlers case UnicodeCategory.Format: break; + case UnicodeCategory.Surrogate: + if (char.IsHighSurrogate(c)) + { + codePoint = 0x10000 | ((c & 0x3ff) << 10); + highSurrogate = c; + } + else + { + codePoint |= c & 0x3ff; + if (codePoint is >= 0x12000 and < 0x13000) //Cuneiform + continue; + + builder.Append(highSurrogate).Append(c); + } + break; + case UnicodeCategory.OtherNotAssigned when c >= 0xdb40: skipLowSurrogate = true; break; diff --git a/Tests/ZalgoTests.cs b/Tests/ZalgoTests.cs index a00d6e10..e6101179 100644 --- a/Tests/ZalgoTests.cs +++ b/Tests/ZalgoTests.cs @@ -77,6 +77,7 @@ namespace Tests [TestCase("꧁꧂🥴🥴🥴HOJU🥴🥴🥴╲⎝⧹", true)] [TestCase("", true)] [TestCase("᲼᲼᲼", true, "Reserved block")] + [TestCase("𒁃𒃋𒋬𒑭𒕃", true, "Cuneiform block")] public void ZalgoDetectionTest(string name, bool isBad, string comment = null) { Assert.That(UsernameZalgoMonitor.NeedsRename(name), Is.EqualTo(isBad), comment);