update homoglyph library for new c# language features

This commit is contained in:
13xforever
2020-11-11 01:31:22 +05:00
parent 5881296696
commit 97ea398d56
3 changed files with 36 additions and 31 deletions

View File

@@ -20,32 +20,35 @@ namespace HomoglyphConverter
{
var result = new Dictionary<uint, uint[]>();
var assembly = Assembly.GetAssembly(typeof(ConfusablesBuilder));
var resourceName = assembly.GetManifestResourceNames().FirstOrDefault(n => n.EndsWith("confusables.txt.gz", StringComparison.InvariantCultureIgnoreCase));
using (var stream = assembly.GetManifestResourceStream(resourceName))
var resourceName = assembly?.GetManifestResourceNames().FirstOrDefault(n => n.EndsWith("confusables.txt.gz", StringComparison.InvariantCultureIgnoreCase));
if (string.IsNullOrEmpty(resourceName))
throw new InvalidOperationException("Confusables embedded resource was not found");
using var stream = assembly?.GetManifestResourceStream(resourceName);
if (stream is null)
throw new InvalidOperationException("Failed to get confusables resource stream");
using var gzip = new GZipStream(stream, CompressionMode.Decompress);
using var reader = new StreamReader(gzip, Encoding.UTF8, false);
while (reader.ReadLine() is string line)
{
using var gzip = new GZipStream(stream, CompressionMode.Decompress);
using var reader = new StreamReader(gzip, Encoding.UTF8, false);
string line;
while ((line = reader.ReadLine()) != null)
if (string.IsNullOrEmpty(line) || line.StartsWith("#"))
continue;
var lineParts = line.Split(CommentSplitter, 2);
var mapping = lineParts[0].Split(FieldSplitter, 3);
if (mapping.Length < 2)
throw new InvalidOperationException("Invalid confusable mapping line: " + line);
try
{
if (string.IsNullOrEmpty(line) || line.StartsWith("#"))
continue;
var lineParts = line.Split(CommentSplitter, 2);
var mapping = lineParts[0].Split(FieldSplitter, 3);
if (mapping.Length < 2)
throw new InvalidOperationException("Invalid confusable mapping line: " + line);
try
{
var confusableChar = uint.Parse(mapping[0].Trim(), NumberStyles.HexNumber);
var skeletonChars = mapping[1].Split(PairSplitter, StringSplitOptions.RemoveEmptyEntries).Select(l => uint.Parse(l, NumberStyles.HexNumber)).ToArray();
result.Add(confusableChar, skeletonChars);
}
catch (Exception e)
{
throw new InvalidOperationException("Invalid confusable mapping line:" + line, e);
}
var confusableChar = uint.Parse(mapping[0].Trim(), NumberStyles.HexNumber);
var skeletonChars = mapping[1].Split(PairSplitter, StringSplitOptions.RemoveEmptyEntries).Select(l => uint.Parse(l, NumberStyles.HexNumber)).ToArray();
result.Add(confusableChar, skeletonChars);
}
catch (Exception e)
{
throw new InvalidOperationException("Invalid confusable mapping line:" + line, e);
}
}
if (result.Count == 0)

View File

@@ -3,6 +3,7 @@
<PropertyGroup>
<TargetFramework>net5.0</TargetFramework>
<LangVersion>latest</LangVersion>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>

View File

@@ -46,29 +46,30 @@ namespace HomoglyphConverter
return input;
input = ToSkeletonString(input);
var result = ReplaceMultiletterConfusables(input);
var result = ReplaceMultiLetterConfusables(input);
for (var i = 0; result != input && i < 128; i++)
{
input = result;
result = ReplaceMultiletterConfusables(input);
result = ReplaceMultiLetterConfusables(input);
}
return result;
}
private static string ReplaceMultiletterConfusables(string input)
private static string ReplaceMultiLetterConfusables(string input)
{
foreach (var pair in HomoglyphSequences)
input = input.Replace(pair.Key, pair.Value);
foreach (var (sequence, replacement) in HomoglyphSequences)
input = input.Replace(sequence, replacement);
return input;
}
private static string ReplaceConfusables(string input)
{
var utf32Input = Utf32.GetBytes(input);
var uintInput = new uint[utf32Input.Length / 4];
var convertedLength = utf32Input.Length / 4;
var uintInput = convertedLength < 256 / sizeof(uint) ? stackalloc uint[convertedLength] : new uint[convertedLength];
for (var i = 0; i < uintInput.Length; i++)
uintInput[i] = BitConverter.ToUInt32(utf32Input, i * 4);
var result = new List<uint>(uintInput.Length);
var result = new List<uint>(convertedLength);
foreach (var ch in uintInput)
{
if (Mapping.TryGetValue(ch, out var replacement))