From 97ea398d563ac7b8fd74f540a59bb0e282e9e38f Mon Sep 17 00:00:00 2001 From: 13xforever Date: Wed, 11 Nov 2020 01:31:22 +0500 Subject: [PATCH] update homoglyph library for new c# language features --- HomoglyphConverter/ConfusablesBuilder.cs | 51 +++++++++++--------- HomoglyphConverter/HomoglyphConverter.csproj | 1 + HomoglyphConverter/Normalizer.cs | 15 +++--- 3 files changed, 36 insertions(+), 31 deletions(-) diff --git a/HomoglyphConverter/ConfusablesBuilder.cs b/HomoglyphConverter/ConfusablesBuilder.cs index 90e0d819..90be7800 100644 --- a/HomoglyphConverter/ConfusablesBuilder.cs +++ b/HomoglyphConverter/ConfusablesBuilder.cs @@ -20,32 +20,35 @@ namespace HomoglyphConverter { var result = new Dictionary(); var assembly = Assembly.GetAssembly(typeof(ConfusablesBuilder)); - var resourceName = assembly.GetManifestResourceNames().FirstOrDefault(n => n.EndsWith("confusables.txt.gz", StringComparison.InvariantCultureIgnoreCase)); - using (var stream = assembly.GetManifestResourceStream(resourceName)) + var resourceName = assembly?.GetManifestResourceNames().FirstOrDefault(n => n.EndsWith("confusables.txt.gz", StringComparison.InvariantCultureIgnoreCase)); + if (string.IsNullOrEmpty(resourceName)) + throw new InvalidOperationException("Confusables embedded resource was not found"); + + using var stream = assembly?.GetManifestResourceStream(resourceName); + if (stream is null) + throw new InvalidOperationException("Failed to get confusables resource stream"); + + using var gzip = new GZipStream(stream, CompressionMode.Decompress); + using var reader = new StreamReader(gzip, Encoding.UTF8, false); + while (reader.ReadLine() is string line) { - using var gzip = new GZipStream(stream, CompressionMode.Decompress); - using var reader = new StreamReader(gzip, Encoding.UTF8, false); - string line; - while ((line = reader.ReadLine()) != null) + if (string.IsNullOrEmpty(line) || line.StartsWith("#")) + continue; + + var lineParts = line.Split(CommentSplitter, 2); + var mapping = lineParts[0].Split(FieldSplitter, 3); + if (mapping.Length < 2) + throw new InvalidOperationException("Invalid confusable mapping line: " + line); + + try { - if (string.IsNullOrEmpty(line) || line.StartsWith("#")) - continue; - - var lineParts = line.Split(CommentSplitter, 2); - var mapping = lineParts[0].Split(FieldSplitter, 3); - if (mapping.Length < 2) - throw new InvalidOperationException("Invalid confusable mapping line: " + line); - - try - { - var confusableChar = uint.Parse(mapping[0].Trim(), NumberStyles.HexNumber); - var skeletonChars = mapping[1].Split(PairSplitter, StringSplitOptions.RemoveEmptyEntries).Select(l => uint.Parse(l, NumberStyles.HexNumber)).ToArray(); - result.Add(confusableChar, skeletonChars); - } - catch (Exception e) - { - throw new InvalidOperationException("Invalid confusable mapping line:" + line, e); - } + var confusableChar = uint.Parse(mapping[0].Trim(), NumberStyles.HexNumber); + var skeletonChars = mapping[1].Split(PairSplitter, StringSplitOptions.RemoveEmptyEntries).Select(l => uint.Parse(l, NumberStyles.HexNumber)).ToArray(); + result.Add(confusableChar, skeletonChars); + } + catch (Exception e) + { + throw new InvalidOperationException("Invalid confusable mapping line:" + line, e); } } if (result.Count == 0) diff --git a/HomoglyphConverter/HomoglyphConverter.csproj b/HomoglyphConverter/HomoglyphConverter.csproj index 1f69f2dd..eaaa55d7 100644 --- a/HomoglyphConverter/HomoglyphConverter.csproj +++ b/HomoglyphConverter/HomoglyphConverter.csproj @@ -3,6 +3,7 @@ net5.0 latest + enable diff --git a/HomoglyphConverter/Normalizer.cs b/HomoglyphConverter/Normalizer.cs index b3dd77fb..acdf46f7 100644 --- a/HomoglyphConverter/Normalizer.cs +++ b/HomoglyphConverter/Normalizer.cs @@ -46,29 +46,30 @@ namespace HomoglyphConverter return input; input = ToSkeletonString(input); - var result = ReplaceMultiletterConfusables(input); + var result = ReplaceMultiLetterConfusables(input); for (var i = 0; result != input && i < 128; i++) { input = result; - result = ReplaceMultiletterConfusables(input); + result = ReplaceMultiLetterConfusables(input); } return result; } - private static string ReplaceMultiletterConfusables(string input) + private static string ReplaceMultiLetterConfusables(string input) { - foreach (var pair in HomoglyphSequences) - input = input.Replace(pair.Key, pair.Value); + foreach (var (sequence, replacement) in HomoglyphSequences) + input = input.Replace(sequence, replacement); return input; } private static string ReplaceConfusables(string input) { var utf32Input = Utf32.GetBytes(input); - var uintInput = new uint[utf32Input.Length / 4]; + var convertedLength = utf32Input.Length / 4; + var uintInput = convertedLength < 256 / sizeof(uint) ? stackalloc uint[convertedLength] : new uint[convertedLength]; for (var i = 0; i < uintInput.Length; i++) uintInput[i] = BitConverter.ToUInt32(utf32Input, i * 4); - var result = new List(uintInput.Length); + var result = new List(convertedLength); foreach (var ch in uintInput) { if (Mapping.TryGetValue(ch, out var replacement))