mirror of
https://github.com/RPCS3/discord-bot.git
synced 2026-01-31 01:25:22 +01:00
convert stylized characters to normal latin/greek/digit code points
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
using System.Globalization;
|
||||
using CompatApiClient.Utils;
|
||||
using CompatBot.Database;
|
||||
using HomoglyphConverter;
|
||||
|
||||
namespace CompatBot.EventHandlers;
|
||||
|
||||
@@ -135,70 +136,52 @@ public static class UsernameZalgoMonitor
|
||||
|
||||
var builder = new StringBuilder();
|
||||
var strippedBuilder = new StringBuilder();
|
||||
var skipLowSurrogate = false;
|
||||
var consecutive = 0;
|
||||
var total = 0;
|
||||
var codePoint = 0;
|
||||
var highSurrogate = '\0';
|
||||
var hasNormalCharacterBefore = false;
|
||||
foreach (var c in displayName)
|
||||
foreach (var r in displayName.EnumerateRunes())
|
||||
{
|
||||
switch (char.GetUnicodeCategory(c))
|
||||
switch (Rune.GetUnicodeCategory(r))
|
||||
{
|
||||
case UnicodeCategory.EnclosingMark:
|
||||
case UnicodeCategory.ModifierSymbol:
|
||||
case UnicodeCategory.NonSpacingMark:
|
||||
{
|
||||
if (consecutive++ < 2
|
||||
&& total++ < level
|
||||
&& hasNormalCharacterBefore)
|
||||
builder.Append(c);
|
||||
builder.Append(r);
|
||||
break;
|
||||
}
|
||||
|
||||
case UnicodeCategory.Control:
|
||||
case UnicodeCategory.Format:
|
||||
case UnicodeCategory.PrivateUse:
|
||||
break;
|
||||
|
||||
case UnicodeCategory.Surrogate:
|
||||
if (char.IsHighSurrogate(c))
|
||||
{
|
||||
codePoint = 0x10000 | ((c & 0x3ff) << 10);
|
||||
highSurrogate = c;
|
||||
}
|
||||
else
|
||||
{
|
||||
codePoint |= c & 0x3ff;
|
||||
if (codePoint is >= 0x016a0 and < 0x01700 // Runic
|
||||
or >= 0x101d0 and < 0x10200 // Phaistos Disc
|
||||
or >= 0x10380 and < 0x10400 // Ugaritic and Old Persian
|
||||
or >= 0x12000 and < 0x13000) // Cuneiform
|
||||
continue;
|
||||
|
||||
builder.Append(highSurrogate).Append(c);
|
||||
strippedBuilder.Append(highSurrogate).Append(c);
|
||||
hasNormalCharacterBefore = true;
|
||||
consecutive = 0;
|
||||
}
|
||||
break;
|
||||
|
||||
case UnicodeCategory.OtherNotAssigned when c >= 0xdb40:
|
||||
skipLowSurrogate = true;
|
||||
break;
|
||||
|
||||
default:
|
||||
if (char.IsLowSurrogate(c) && skipLowSurrogate)
|
||||
skipLowSurrogate = false;
|
||||
{
|
||||
if (r.Value is >= 0x16a0 and < 0x1700 // Runic
|
||||
or >= 0x1_01d0 and < 0x1_0200 // Phaistos Disc
|
||||
or >= 0x1_0380 and < 0x1_0400 // Ugaritic and Old Persian
|
||||
or >= 0x1_2000 and < 0x1_3000 // Cuneiform
|
||||
|| r.Value < 0x1_0000 && OversizedChars.Contains((char)r.Value))
|
||||
continue;
|
||||
|
||||
if (UnicodeStyles.StyledToBasicCharacterMap.TryGetValue(r, out var c))
|
||||
{
|
||||
builder.Append(c);
|
||||
strippedBuilder.Append(c);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!OversizedChars.Contains(c))
|
||||
{
|
||||
builder.Append(c);
|
||||
strippedBuilder.Append(c);
|
||||
hasNormalCharacterBefore = true;
|
||||
consecutive = 0;
|
||||
}
|
||||
builder.Append(r);
|
||||
strippedBuilder.Append(r);
|
||||
}
|
||||
hasNormalCharacterBefore = true;
|
||||
consecutive = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
var result = (total <= level ? builder : strippedBuilder)
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
using System;
|
||||
using System.Collections.Frozen;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.Linq;
|
||||
@@ -10,7 +11,7 @@ public static class Normalizer
|
||||
{
|
||||
private static readonly Encoding Utf32 = new UTF32Encoding(false, false, true);
|
||||
|
||||
private static readonly Dictionary<string, string> HomoglyphSequences = new()
|
||||
private static readonly FrozenDictionary<string, string> HomoglyphSequences = new Dictionary<string, string>
|
||||
{
|
||||
["rn"] = "m",
|
||||
["cl"] = "d",
|
||||
@@ -24,7 +25,7 @@ public static class Normalizer
|
||||
["WV"] = "VW",
|
||||
["ĸ"] = "k",
|
||||
["◌"] = "o",
|
||||
};
|
||||
}.ToFrozenDictionary();
|
||||
|
||||
// as per https://www.unicode.org/reports/tr39/#Confusable_Detection
|
||||
[return: NotNullIfNotNull(nameof(input))]
|
||||
@@ -68,10 +69,10 @@ public static class Normalizer
|
||||
{
|
||||
var utf32Input = Utf32.GetBytes(input);
|
||||
var convertedLength = utf32Input.Length / 4;
|
||||
var uintInput = convertedLength < 256 / sizeof(uint) ? stackalloc uint[convertedLength] : new uint[convertedLength];
|
||||
var uintInput = convertedLength < 256 / sizeof(int) ? stackalloc int[convertedLength] : new int[convertedLength];
|
||||
for (var i = 0; i < uintInput.Length; i++)
|
||||
uintInput[i] = BitConverter.ToUInt32(utf32Input, i * 4);
|
||||
var result = new List<uint>(convertedLength);
|
||||
uintInput[i] = BitConverter.ToInt32(utf32Input, i * 4);
|
||||
var result = new List<int>(convertedLength);
|
||||
foreach (var ch in uintInput)
|
||||
{
|
||||
if (Confusables.Mapping.TryGetValue(ch, out var replacement))
|
||||
|
||||
143
HomoglyphConverter/UnicodeStyles.cs
Normal file
143
HomoglyphConverter/UnicodeStyles.cs
Normal file
@@ -0,0 +1,143 @@
|
||||
using System.Collections.Frozen;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
|
||||
namespace HomoglyphConverter;
|
||||
|
||||
public static class UnicodeStyles
|
||||
{
|
||||
// https://babelstone.co.uk/Unicode/text.html
|
||||
// Latin
|
||||
private const string LatinBase = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
|
||||
private const string MathematicalBold = "𝐀𝐁𝐂𝐃𝐄𝐅𝐆𝐇𝐈𝐉𝐊𝐋𝐌𝐍𝐎𝐏𝐐𝐑𝐒𝐓𝐔𝐕𝐖𝐗𝐘𝐙𝐚𝐛𝐜𝐝𝐞𝐟𝐠𝐡𝐢𝐣𝐤𝐥𝐦𝐧𝐨𝐩𝐪𝐫𝐬𝐭𝐮𝐯𝐰𝐱𝐲𝐳";
|
||||
private const string MathematicalItalic = "𝐴𝐵𝐶𝐷𝐸𝐹𝐺𝐻𝐼𝐽𝐾𝐿𝑀𝑁𝑂𝑃𝑄𝑅𝑆𝑇𝑈𝑉𝑊𝑋𝑌𝑍𝑎𝑏𝑐𝑑𝑒𝑓𝑔ℎ𝑖𝑗𝑘𝑙𝑚𝑛𝑜𝑝𝑞𝑟𝑠𝑡𝑢𝑣𝑤𝑥𝑦𝑧";
|
||||
private const string MathematicalBoldItalic = "𝑨𝑩𝑪𝑫𝑬𝑭𝑮𝑯𝑰𝑱𝑲𝑳𝑴𝑵𝑶𝑷𝑸𝑹𝑺𝑻𝑼𝑽𝑾𝑿𝒀𝒁𝒂𝒃𝒄𝒅𝒆𝒇𝒈𝒉𝒊𝒋𝒌𝒍𝒎𝒏𝒐𝒑𝒒𝒓𝒔𝒕𝒖𝒗𝒘𝒙𝒚𝒛";
|
||||
private const string MathematicalScript = "𝒜ℬ𝒞𝒟ℰℱ𝒢ℋℐ𝒥𝒦ℒℳ𝒩𝒪𝒫𝒬ℛ𝒮𝒯𝒰𝒱𝒲𝒳𝒴𝒵𝒶𝒷𝒸𝒹ℯ𝒻ℊ𝒽𝒾𝒿𝓀𝓁𝓂𝓃ℴ𝓅𝓆𝓇𝓈𝓉𝓊𝓋𝓌𝓍𝓎𝓏";
|
||||
private const string MathematicalBoldScript = "𝓐𝓑𝓒𝓓𝓔𝓕𝓖𝓗𝓘𝓙𝓚𝓛𝓜𝓝𝓞𝓟𝓠𝓡𝓢𝓣𝓤𝓥𝓦𝓧𝓨𝓩𝓪𝓫𝓬𝓭𝓮𝓯𝓰𝓱𝓲𝓳𝓴𝓵𝓶𝓷𝓸𝓹𝓺𝓻𝓼𝓽𝓾𝓿𝔀𝔁𝔂𝔃";
|
||||
private const string MathematicalFraktur = "𝔄𝔅ℭ𝔇𝔈𝔉𝔊ℌℑ𝔍𝔎𝔏𝔐𝔑𝔒𝔓𝔔ℜ𝔖𝔗𝔘𝔙𝔚𝔛𝔜ℨ𝔞𝔟𝔠𝔡𝔢𝔣𝔤𝔥𝔦𝔧𝔨𝔩𝔪𝔫𝔬𝔭𝔮𝔯𝔰𝔱𝔲𝔳𝔴𝔵𝔶𝔷";
|
||||
private const string MathematicalDoubleStruck = "𝔸𝔹ℂ𝔻𝔼𝔽𝔾ℍ𝕀𝕁𝕂𝕃𝕄ℕ𝕆ℙℚℝ𝕊𝕋𝕌𝕍𝕎𝕏𝕐ℤ𝕒𝕓𝕔𝕕𝕖𝕗𝕘𝕙𝕚𝕛𝕜𝕝𝕞𝕟𝕠𝕡𝕢𝕣𝕤𝕥𝕦𝕧𝕨𝕩𝕪𝕫";
|
||||
private const string MathematicalBoldFraktur = "𝕬𝕭𝕮𝕯𝕰𝕱𝕲𝕳𝕴𝕵𝕶𝕷𝕸𝕹𝕺𝕻𝕼𝕽𝕾𝕿𝖀𝖁𝖂𝖃𝖄𝖅𝖆𝖇𝖈𝖉𝖊𝖋𝖌𝖍𝖎𝖏𝖐𝖑𝖒𝖓𝖔𝖕𝖖𝖗𝖘𝖙𝖚𝖛𝖜𝖝𝖞𝖟";
|
||||
private const string MathematicalSansSerif = "𝖠𝖡𝖢𝖣𝖤𝖥𝖦𝖧𝖨𝖩𝖪𝖫𝖬𝖭𝖮𝖯𝖰𝖱𝖲𝖳𝖴𝖵𝖶𝖷𝖸𝖹𝖺𝖻𝖼𝖽𝖾𝖿𝗀𝗁𝗂𝗃𝗄𝗅𝗆𝗇𝗈𝗉𝗊𝗋𝗌𝗍𝗎𝗏𝗐𝗑𝗒𝗓";
|
||||
private const string MathematicalSansSerifBold = "𝗔𝗕𝗖𝗗𝗘𝗙𝗚𝗛𝗜𝗝𝗞𝗟𝗠𝗡𝗢𝗣𝗤𝗥𝗦𝗧𝗨𝗩𝗪𝗫𝗬𝗭𝗮𝗯𝗰𝗱𝗲𝗳𝗴𝗵𝗶𝗷𝗸𝗹𝗺𝗻𝗼𝗽𝗾𝗿𝘀𝘁𝘂𝘃𝘄𝘅𝘆𝘇";
|
||||
private const string MathematicalSansSerifItalic = "𝘈𝘉𝘊𝘋𝘌𝘍𝘎𝘏𝘐𝘑𝘒𝘓𝘔𝘕𝘖𝘗𝘘𝘙𝘚𝘛𝘜𝘝𝘞𝘟𝘠𝘡𝘢𝘣𝘤𝘥𝘦𝘧𝘨𝘩𝘪𝘫𝘬𝘭𝘮𝘯𝘰𝘱𝘲𝘳𝘴𝘵𝘶𝘷𝘸𝘹𝘺𝘻";
|
||||
private const string MathematicalSansSerifBoldItalic = "𝘼𝘽𝘾𝘿𝙀𝙁𝙂𝙃𝙄𝙅𝙆𝙇𝙈𝙉𝙊𝙋𝙌𝙍𝙎𝙏𝙐𝙑𝙒𝙓𝙔𝙕𝙖𝙗𝙘𝙙𝙚𝙛𝙜𝙝𝙞𝙟𝙠𝙡𝙢𝙣𝙤𝙥𝙦𝙧𝙨𝙩𝙪𝙫𝙬𝙭𝙮𝙯";
|
||||
private const string MathematicalMonospace = "𝙰𝙱𝙲𝙳𝙴𝙵𝙶𝙷𝙸𝙹𝙺𝙻𝙼𝙽𝙾𝙿𝚀𝚁𝚂𝚃𝚄𝚅𝚆𝚇𝚈𝚉𝚊𝚋𝚌𝚍𝚎𝚏𝚐𝚑𝚒𝚓𝚔𝚕𝚖𝚗𝚘𝚙𝚚𝚛𝚜𝚝𝚞𝚟𝚠𝚡𝚢𝚣";
|
||||
private const string Circled = "ⒶⒷⒸⒹⒺⒻⒼⒽⒾⒿⓀⓁⓂⓃⓄⓅⓆⓇⓈⓉⓊⓋⓌⓍⓎⓏⓐⓑⓒⓓⓔⓕⓖⓗⓘⓙⓚⓛⓜⓝⓞⓟⓠⓡⓢⓣⓤⓥⓦⓧⓨⓩ";
|
||||
private const string Parenthesized = "🄐🄑🄒🄓🄔🄕🄖🄗🄘🄙🄚🄛🄜🄝🄞🄟🄠🄡🄢🄣🄤🄥🄦🄧🄨🄩⒜⒝⒞⒟⒠⒡⒢⒣⒤⒥⒦⒧⒨⒩⒪⒫⒬⒭⒮⒯⒰⒱⒲⒳⒴⒵";
|
||||
private const string Superscript = "ᴬᴮꟲᴰᴱꟳᴳᴴᴵᴶᴷᴸᴹᴺᴼᴾꟴᴿSᵀᵁⱽᵂXYZᵃᵇᶜᵈᵉᶠᵍʰⁱʲᵏˡᵐⁿᵒᵖ𐞥ʳˢᵗᵘᵛʷˣʸᶻ";
|
||||
private const string Subscript = "ABCDEFGHIJKLMNOPQRSTUVWXYZₐbcdₑfgₕᵢⱼₖₗₘₙₒₚqᵣₛₜᵤᵥwₓyz";
|
||||
// U+E0041 - U+E007A
|
||||
private const string Tags = "";
|
||||
|
||||
private const string InverseCircled = "🅐🅑🅒🅓🅔🅕🅖🅗🅘🅙🅚🅛🅜🅝🅞🅟🅠🅡🅢🅣🅤🅥🅦🅧🅨🅩";
|
||||
private const string Squared = "🄰🄱🄲🄳🄴🄵🄶🄷🄸🄹🄺🄻🄼🄽🄾🄿🅀🅁🅂🅃🅄🅅🅆🅇🅈🅉";
|
||||
private const string InverseSquared = "🅰🅱🅲🅳🅴🅵🅶🅷🅸🅹🅺🅻🅼🅽🅾🅿🆀🆁🆂🆃🆄🆅🆆🆇🆈🆉";
|
||||
private const string SmallCaps = "ᴀʙᴄᴅᴇꜰɢʜɪᴊᴋʟᴍɴᴏᴘꞯʀꜱᴛᴜᴠᴡXʏᴢ";
|
||||
private const string Outlined = "";
|
||||
|
||||
// Greek
|
||||
private const string GreekBase = "ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡϴΣΤΥΦΧΨΩ∇αβγδεζηθικλμνξοπρςστυφχψω∂ϵϑϰϕϱϖ";
|
||||
private const string MathematicalGreekBold = "𝚨𝚩𝚪𝚫𝚬𝚭𝚮𝚯𝚰𝚱𝚲𝚳𝚴𝚵𝚶𝚷𝚸𝚹𝚺𝚻𝚼𝚽𝚾𝚿𝛀𝛁𝛂𝛃𝛄𝛅𝛆𝛇𝛈𝛉𝛊𝛋𝛌𝛍𝛎𝛏𝛐𝛑𝛒𝛓𝛔𝛕𝛖𝛗𝛘𝛙𝛚𝛛𝛜𝛝𝛞𝛟𝛠𝛡";
|
||||
private const string MathematicalGreekItalic = "𝛢𝛣𝛤𝛥𝛦𝛧𝛨𝛩𝛪𝛫𝛬𝛭𝛮𝛯𝛰𝛱𝛲𝛳𝛴𝛵𝛶𝛷𝛸𝛹𝛺𝛻𝛼𝛽𝛾𝛿𝜀𝜁𝜂𝜃𝜄𝜅𝜆𝜇𝜈𝜉𝜊𝜋𝜌𝜍𝜎𝜏𝜐𝜑𝜒𝜓𝜔𝜕𝜖𝜗𝜘𝜙𝜚𝜛";
|
||||
private const string MathematicalGreekBoldItalic = "𝜜𝜝𝜞𝜟𝜠𝜡𝜢𝜣𝜤𝜥𝜦𝜧𝜨𝜩𝜪𝜫𝜬𝜭𝜮𝜯𝜰𝜱𝜲𝜳𝜴𝜵𝜶𝜷𝜸𝜹𝜺𝜻𝜼𝜽𝜾𝜿𝝀𝝁𝝂𝝃𝝄𝝅𝝆𝝇𝝈𝝉𝝊𝝋𝝌𝝍𝝎𝝏𝝐𝝑𝝒𝝓𝝔𝝕";
|
||||
private const string MathematicalGreekSansSerifBold = "𝝖𝝗𝝘𝝙𝝚𝝛𝝜𝝝𝝞𝝟𝝠𝝡𝝢𝝣𝝤𝝥𝝦𝝧𝝨𝝩𝝪𝝫𝝬𝝭𝝮𝝯𝝰𝝱𝝲𝝳𝝴𝝵𝝶𝝷𝝸𝝹𝝺𝝻𝝼𝝽𝝾𝝿𝞀𝞁𝞂𝞃𝞄𝞅𝞆𝞇𝞈𝞉𝞊𝞋𝞌𝞍𝞎𝞏";
|
||||
private const string MathematicalGreekSansSerifBoldItalic = "𝞐𝞑𝞒𝞓𝞔𝞕𝞖𝞗𝞘𝞙𝞚𝞛𝞜𝞝𝞞𝞟𝞠𝞡𝞢𝞣𝞤𝞥𝞦𝞧𝞨𝞩𝞪𝞫𝞬𝞭𝞮𝞯𝞰𝞱𝞲𝞳𝞴𝞵𝞶𝞷𝞸𝞹𝞺𝞻𝞼𝞽𝞾𝞿𝟀𝟁𝟂𝟃𝟄𝟅𝟆𝟇𝟈𝟉";
|
||||
|
||||
// Digits
|
||||
private const string DigitsBase = "0123456789";
|
||||
private const string MathematicalBoldDigits = "𝟎𝟏𝟐𝟑𝟒𝟓𝟔𝟕𝟖𝟗";
|
||||
private const string MathematicalDoubleStruckDigits = "𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡";
|
||||
private const string MathematicalSansSerifDigits = "𝟢𝟣𝟤𝟥𝟦𝟧𝟨𝟩𝟪𝟫";
|
||||
private const string MathematicalSansSerifBoldDigits = "𝟬𝟭𝟮𝟯𝟰𝟱𝟲𝟳𝟴𝟵";
|
||||
private const string MathematicalMonospaceDigits = "𝟶𝟷𝟸𝟹𝟺𝟻𝟼𝟽𝟾𝟿";
|
||||
private const string CircledDigits = "⓪①②③④⑤⑥⑦⑧⑨";
|
||||
private const string InverseCircledDigits = "⓿❶❷❸❹❺❻❼❽❾";
|
||||
private const string ParenthesizedDigits = "0⑴⑵⑶⑷⑸⑹⑺⑻⑼";
|
||||
private const string SuperscriptDigits = "⁰¹²³⁴⁵⁶⁷⁸⁹";
|
||||
private const string SubscriptDigits = "₀₁₂₃₄₅₆₇₈₉";
|
||||
private const string SegmentedDigits = "🯰🯱🯲🯳🯴🯵🯶🯷🯸🯹";
|
||||
private const string OutlinedDigits = "";
|
||||
// U+E0030 - U+E0039
|
||||
private const string TagsDigits = "";
|
||||
|
||||
public static FrozenDictionary<Rune, char> StyledToBasicCharacterMap;
|
||||
|
||||
static UnicodeStyles()
|
||||
{
|
||||
var result = new Dictionary<Rune, char>();
|
||||
string[] styleList =
|
||||
[
|
||||
MathematicalBold,
|
||||
MathematicalItalic,
|
||||
MathematicalBoldItalic,
|
||||
MathematicalScript,
|
||||
MathematicalBoldScript,
|
||||
MathematicalFraktur,
|
||||
MathematicalDoubleStruck,
|
||||
MathematicalBoldFraktur,
|
||||
MathematicalSansSerif,
|
||||
MathematicalSansSerifBold,
|
||||
MathematicalSansSerifItalic,
|
||||
MathematicalSansSerifBoldItalic,
|
||||
MathematicalMonospace,
|
||||
Circled,
|
||||
Parenthesized,
|
||||
Superscript,
|
||||
Subscript,
|
||||
Tags,
|
||||
InverseCircled,
|
||||
Squared,
|
||||
InverseSquared,
|
||||
SmallCaps,
|
||||
Outlined,
|
||||
];
|
||||
BuildMap(styleList, LatinBase, result);
|
||||
|
||||
styleList =
|
||||
[
|
||||
MathematicalGreekBold,
|
||||
MathematicalGreekItalic,
|
||||
MathematicalGreekBoldItalic,
|
||||
MathematicalGreekSansSerifBold,
|
||||
MathematicalGreekSansSerifBoldItalic,
|
||||
];
|
||||
BuildMap(styleList, GreekBase, result);
|
||||
|
||||
styleList =
|
||||
[
|
||||
MathematicalBoldDigits,
|
||||
MathematicalDoubleStruckDigits,
|
||||
MathematicalSansSerifDigits,
|
||||
MathematicalSansSerifBoldDigits,
|
||||
MathematicalMonospaceDigits,
|
||||
CircledDigits,
|
||||
InverseCircledDigits,
|
||||
ParenthesizedDigits,
|
||||
SuperscriptDigits,
|
||||
SubscriptDigits,
|
||||
SegmentedDigits,
|
||||
OutlinedDigits,
|
||||
TagsDigits,
|
||||
];
|
||||
BuildMap(styleList, DigitsBase, result);
|
||||
|
||||
StyledToBasicCharacterMap = result.ToFrozenDictionary();
|
||||
}
|
||||
|
||||
private static void BuildMap(string[] styleList, string target, Dictionary<Rune, char> result)
|
||||
{
|
||||
var basicRunes = target.EnumerateRunes().ToArray();
|
||||
foreach (var str in styleList)
|
||||
{
|
||||
var styleRunes = str.EnumerateRunes().ToArray();
|
||||
for (var i = 0; i < styleRunes.Length; i++)
|
||||
{
|
||||
var baseRune = basicRunes[i];
|
||||
var styleRune = styleRunes[i];
|
||||
if (baseRune != styleRune)
|
||||
result[styleRune] = (char)baseRune.Value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -55,7 +55,7 @@ public class ConfusablesSourceGenerator: IIncrementalGenerator
|
||||
if (stream is null)
|
||||
throw new InvalidOperationException("Failed to get confusables.txt stream");
|
||||
|
||||
var mapping = new Dictionary<uint, uint[]>();
|
||||
var mapping = new Dictionary<int, int[]>();
|
||||
var date = "";
|
||||
var version = "";
|
||||
using var reader = new StreamReader(stream, Encoding.UTF8, false);
|
||||
@@ -80,8 +80,8 @@ public class ConfusablesSourceGenerator: IIncrementalGenerator
|
||||
|
||||
try
|
||||
{
|
||||
var confusableChar = uint.Parse(mappingParts[0].Trim(), NumberStyles.HexNumber);
|
||||
var skeletonChars = mappingParts[1].Split(PairSplitter, StringSplitOptions.RemoveEmptyEntries).Select(l => uint.Parse(l, NumberStyles.HexNumber)).ToArray();
|
||||
var confusableChar = int.Parse(mappingParts[0].Trim(), NumberStyles.HexNumber);
|
||||
var skeletonChars = mappingParts[1].Split(PairSplitter, StringSplitOptions.RemoveEmptyEntries).Select(l => int.Parse(l, NumberStyles.HexNumber)).ToArray();
|
||||
mapping.Add(confusableChar, skeletonChars);
|
||||
}
|
||||
catch (Exception e)
|
||||
@@ -112,13 +112,13 @@ public class ConfusablesSourceGenerator: IIncrementalGenerator
|
||||
{
|
||||
public const string Version = "{{version}}";
|
||||
public const string Date = "{{date}}";
|
||||
public static readonly Dictionary<uint, uint[]> Mapping = new()
|
||||
public static readonly Dictionary<int, int[]> Mapping = new()
|
||||
{
|
||||
"""
|
||||
);
|
||||
foreach (var kvp in mapping.OrderBy(i => i.Key))
|
||||
result.AppendLine($"""
|
||||
[0x{kvp.Key:X5}u] = [{string.Join(", ", kvp.Value!.OrderBy(i => i).Select(n => $"0x{n:X5}u"))}],
|
||||
[0x{kvp.Key:X5}] = [{string.Join(", ", kvp.Value!.OrderBy(i => i).Select(n => $"0x{n:X5}"))}],
|
||||
""");
|
||||
result.AppendLine("""
|
||||
};
|
||||
|
||||
@@ -65,7 +65,7 @@ public class ZalgoTests
|
||||
}
|
||||
}
|
||||
|
||||
[TestCase("ᵇᶦᵒˢʰᵒᶜᵏ96", false)]
|
||||
[TestCase("ᵇᶦᵒˢʰᵒᶜᵏ96", true)]
|
||||
[TestCase("GodPan กับยูนิตแขนที่หายไป", false)]
|
||||
[TestCase("⛧Bζ͜͡annerBomb⛧", false)]
|
||||
[TestCase("(_A_Y_A_Z_) (͡๏̯͡๏)", false)]
|
||||
@@ -83,6 +83,7 @@ public class ZalgoTests
|
||||
[TestCase("҉҉҉҉", true, "Combining sign")]
|
||||
[TestCase(" ", true, "Private block")]
|
||||
[TestCase("녇", true, "Single character after normalization")]
|
||||
[TestCase("𝖊𝖝𝖋𝖎𝖇0", true)]
|
||||
public async Task ZalgoDetectionTest(string name, bool isBad, string? comment = null)
|
||||
{
|
||||
var result = await UsernameZalgoMonitor.NeedsRenameAsync(name).ConfigureAwait(false);
|
||||
|
||||
Reference in New Issue
Block a user