tweak more zalgo variables to pass all tests on detection and stripping

This commit is contained in:
13xforever
2025-06-15 14:05:16 +05:00
parent b904eaafd1
commit 7f5b665070
3 changed files with 16 additions and 4 deletions

View File

@@ -124,7 +124,7 @@ public static class UsernameZalgoMonitor
}
}
public static async ValueTask<string> StripZalgoAsync(string displayName, string? userName, ulong userId, NormalizationForm normalizationForm = NormalizationForm.FormD, int level = 3)
public static async ValueTask<string> StripZalgoAsync(string displayName, string? userName, ulong userId, NormalizationForm normalizationForm = NormalizationForm.FormD, int level = 5)
{
const int minNicknameLength = 2;
displayName = displayName.Normalize(normalizationForm).TrimEager();
@@ -134,8 +134,10 @@ public static class UsernameZalgoMonitor
return await GenerateRandomNameAsync(userId).ConfigureAwait(false);
var builder = new StringBuilder();
var strippedBuilder = new StringBuilder();
var skipLowSurrogate = false;
var consecutive = 0;
var total = 0;
var codePoint = 0;
var highSurrogate = '\0';
var hasNormalCharacterBefore = false;
@@ -146,7 +148,9 @@ public static class UsernameZalgoMonitor
case UnicodeCategory.EnclosingMark:
case UnicodeCategory.ModifierSymbol:
case UnicodeCategory.NonSpacingMark:
if (++consecutive < level && hasNormalCharacterBefore)
if (consecutive++ < 2
&& total++ < level
&& hasNormalCharacterBefore)
builder.Append(c);
break;
@@ -171,6 +175,7 @@ public static class UsernameZalgoMonitor
continue;
builder.Append(highSurrogate).Append(c);
strippedBuilder.Append(highSurrogate).Append(c);
hasNormalCharacterBefore = true;
consecutive = 0;
}
@@ -188,6 +193,7 @@ public static class UsernameZalgoMonitor
if (!OversizedChars.Contains(c))
{
builder.Append(c);
strippedBuilder.Append(c);
hasNormalCharacterBefore = true;
consecutive = 0;
}
@@ -195,7 +201,10 @@ public static class UsernameZalgoMonitor
break;
}
}
var result = builder.ToString().TrimEager().Normalize(NormalizationForm.FormC);
var result = (total <= level ? builder : strippedBuilder)
.ToString()
.TrimEager()
.Normalize(NormalizationForm.FormC);
if (result is null or {Length: <minNicknameLength})
return await GenerateRandomNameAsync(userId).ConfigureAwait(false);
return result;

View File

@@ -52,6 +52,9 @@ public class StringUtilTests
}
[TestCase("jò̵͗s̷̑͠ẻ̵͝p̸̆̂h̸͐̿", "joseph")]
[TestCase("çöćķåťřĩĉȅ", "cockatrice")]
[TestCase("blazé", "blazé", Description = "Base character with diacritics to same base character")]
[TestCase("blazé", "blazé", Description = "Combining diacritics to base character")]
public async Task StripZalgoTest(string input, string expected)
{
var stripped = await UsernameZalgoMonitor.StripZalgoAsync(input, null, 0ul).ConfigureAwait(false);

View File

@@ -86,7 +86,7 @@ public class ZalgoTests
public async Task ZalgoDetectionTest(string name, bool isBad, string? comment = null)
{
var result = await UsernameZalgoMonitor.NeedsRenameAsync(name).ConfigureAwait(false);
Assert.That(result, Is.EqualTo(isBad), comment);
Assert.That(result, Is.EqualTo(isBad), comment ?? $"Expected to be {(isBad ? "bad" : "acceptable")}");
}
}