replace runtime builder with source generator for unicode confusables

This commit is contained in:
13xforever
2021-01-28 18:37:37 +05:00
parent 396a051282
commit 377e3177e5
8 changed files with 176 additions and 66 deletions

View File

@@ -1,60 +0,0 @@
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.IO.Compression;
using System.Linq;
using System.Reflection;
using System.Text;
namespace HomoglyphConverter
{
public static class ConfusablesBuilder
{
private static readonly char[] CommentSplitter = {'#'};
private static readonly char[] FieldSplitter = {';'};
private static readonly char[] PairSplitter = {' '};
// requires a gzipped mapping from http://www.unicode.org/Public/security/latest/confusables.txt
public static Dictionary<uint, uint[]> Build()
{
var result = new Dictionary<uint, uint[]>();
var assembly = Assembly.GetAssembly(typeof(ConfusablesBuilder));
var resourceName = assembly?.GetManifestResourceNames().FirstOrDefault(n => n.EndsWith("confusables.txt.gz", StringComparison.InvariantCultureIgnoreCase));
if (string.IsNullOrEmpty(resourceName))
throw new InvalidOperationException("Confusables embedded resource was not found");
using var stream = assembly?.GetManifestResourceStream(resourceName);
if (stream is null)
throw new InvalidOperationException("Failed to get confusables resource stream");
using var gzip = new GZipStream(stream, CompressionMode.Decompress);
using var reader = new StreamReader(gzip, Encoding.UTF8, false);
while (reader.ReadLine() is string line)
{
if (string.IsNullOrEmpty(line) || line.StartsWith("#"))
continue;
var lineParts = line.Split(CommentSplitter, 2);
var mapping = lineParts[0].Split(FieldSplitter, 3);
if (mapping.Length < 2)
throw new InvalidOperationException("Invalid confusable mapping line: " + line);
try
{
var confusableChar = uint.Parse(mapping[0].Trim(), NumberStyles.HexNumber);
var skeletonChars = mapping[1].Split(PairSplitter, StringSplitOptions.RemoveEmptyEntries).Select(l => uint.Parse(l, NumberStyles.HexNumber)).ToArray();
result.Add(confusableChar, skeletonChars);
}
catch (Exception e)
{
throw new InvalidOperationException("Invalid confusable mapping line:" + line, e);
}
}
if (result.Count == 0)
throw new InvalidOperationException("Empty confusable mapping source");
return result;
}
}
}

View File

@@ -7,11 +7,12 @@
</PropertyGroup>
<ItemGroup>
<None Remove="confusables.txt.gz" />
<None Remove="confusables.txt" />
<AdditionalFiles Include="confusables.txt" />
</ItemGroup>
<ItemGroup>
<EmbeddedResource Include="confusables.txt.gz" />
<ProjectReference Include="..\SourceGenerators\SourceGenerators.csproj" OutputItemType="Analyzer" ReferenceOutputAssembly="false"/>
</ItemGroup>
</Project>

View File

@@ -7,7 +7,6 @@ namespace HomoglyphConverter
{
public static class Normalizer
{
private static readonly Dictionary<uint, uint[]> Mapping = ConfusablesBuilder.Build();
private static readonly Encoding Utf32 = new UTF32Encoding(false, false, true);
private static readonly Dictionary<string, string> HomoglyphSequences = new()
@@ -72,7 +71,7 @@ namespace HomoglyphConverter
var result = new List<uint>(convertedLength);
foreach (var ch in uintInput)
{
if (Mapping.TryGetValue(ch, out var replacement))
if (Confusables.Mapping.TryGetValue(ch, out var replacement))
result.AddRange(replacement);
else
result.Add(ch);

View File

@@ -1,9 +1,9 @@
Homoglyph Converter
===================
This is a straight up implementation of the recommended [confusable detection algorithm](http://www.unicode.org/reports/tr39/#Confusable_Detection). It is mainly used to check for mod impersonation.
This is a straight up implementation of the recommended [confusable detection algorithm](https://www.unicode.org/reports/tr39/#Confusable_Detection). It is mainly used to check for mod impersonation.
You can get the latest version of the mappings from the [Unicode.org](http://www.unicode.org/Public/security/latest/confusables.txt). You'll need to manually gzip it for embedding in the resources.
You can get the latest version of the mappings from the [Unicode.org](https://www.unicode.org/Public/security/latest/confusables.txt). You'll need to manually gzip it for embedding in the resources.
Code is split in two parts:
* Builder will load the mapping file from the resources and will build the mapping dictionary that can be used to quickly substitute the character sequences.

View File

@@ -0,0 +1,151 @@
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Net.Http;
using System.Text;
using Microsoft.CodeAnalysis;
using Microsoft.CodeAnalysis.Text;
namespace SourceGenerators
{
[Generator]
public class ConfusablesSourceGenerator : ISourceGenerator
{
private static readonly char[] CommentSplitter = {'#'};
private static readonly char[] FieldSplitter = {';'};
private static readonly char[] PairSplitter = {' '};
private static readonly DiagnosticDescriptor ConfusablesCheckWarning = new(
id: "CONFUSABLES001",
title: "Failed to check confusables version",
messageFormat: "Error while checking confusables version: '{0}'",
category: nameof(ConfusablesSourceGenerator),
DiagnosticSeverity.Warning,
isEnabledByDefault: true
);
private static readonly DiagnosticDescriptor ConfusablesVersionWarning = new(
id: "CONFUSABLES002",
title: "Outdated confusables version",
messageFormat: "Local confusables version: {0} ({1}), remote confusables version: {2} ({3})",
category: nameof(ConfusablesSourceGenerator),
DiagnosticSeverity.Warning,
isEnabledByDefault: true
);
public void Initialize(GeneratorInitializationContext context)
{
}
public void Execute(GeneratorExecutionContext context)
{
var resourceName = context.AdditionalFiles.FirstOrDefault(f => Path.GetFileName(f.Path).Equals("confusables.txt"));
if (resourceName is null)
return;
using var httpClient = new HttpClient();
using var msg = new HttpRequestMessage(HttpMethod.Get, "https://www.unicode.org/Public/security/latest/confusables.txt");
msg.Headers.Range = new(0, 512);
var requestTask = httpClient.SendAsync(msg);
using var stream = File.Open(resourceName.Path, FileMode.Open, FileAccess.Read, FileShare.Read);
if (stream is null)
throw new InvalidOperationException("Failed to get confusables stream");
var mapping = new Dictionary<uint, uint[]>();
var date = "";
var version = "";
using var reader = new StreamReader(stream, Encoding.UTF8, false);
while (reader.ReadLine() is string line)
{
if (string.IsNullOrEmpty(line) || line.StartsWith("#"))
{
if (line is {Length: > 10})
{
if (line.StartsWith("# Date: "))
date = line.Substring(8).Trim();
else if (line.StartsWith("# Version: "))
version = line.Substring(11).Trim();
}
continue;
}
var lineParts = line.Split(CommentSplitter, 2);
var mappingParts = lineParts[0].Split(FieldSplitter, 3);
if (mappingParts.Length < 2)
throw new InvalidOperationException("Invalid confusable mapping line: " + line);
try
{
var confusableChar = uint.Parse(mappingParts[0].Trim(), NumberStyles.HexNumber);
var skeletonChars = mappingParts[1].Split(PairSplitter, StringSplitOptions.RemoveEmptyEntries).Select(l => uint.Parse(l, NumberStyles.HexNumber)).ToArray();
mapping.Add(confusableChar, skeletonChars);
}
catch (Exception e)
{
throw new InvalidOperationException("Invalid confusable mapping line:" + line, e);
}
}
if (mapping.Count == 0)
throw new InvalidOperationException("Empty confusable mapping source");
var ns = context.Compilation.AssemblyName;
var cn = Path.GetFileNameWithoutExtension(resourceName.Path);
if (cn.Length == 1)
cn = cn.ToUpper();
else
cn = char.ToUpper(cn[0]) + cn.Substring(1);
if (!Version.TryParse(version, out _))
version = "";
var result = new StringBuilder()
.AppendLine("using System;")
.AppendLine("using System.Collections.Generic;")
.AppendLine()
.AppendLine($"namespace {ns}")
.AppendLine("{")
.AppendLine($" internal static class {cn}")
.AppendLine(" {")
.AppendLine($" public const string Version = \"{version}\";")
.AppendLine()
.AppendLine($" public const string Date = \"{date}\";")
.AppendLine()
.AppendLine(" public static readonly Dictionary<uint, uint[]> Mapping = new()")
.AppendLine(" {");
foreach (var kvp in mapping.OrderBy(i => i.Key))
result.AppendLine($@" [0x{kvp.Key:X5}u] = new[] {{ {string.Join(", ", kvp.Value!.OrderBy(i => i).Select(n => $"0x{n:X5}u"))} }},");
result.AppendLine(" };")
.AppendLine(" }")
.AppendLine("}");
context.AddSource($"{cn}.Generated.cs", SourceText.From(result.ToString(), Encoding.UTF8));
try
{
var requestResult = requestTask.ConfigureAwait(false).GetAwaiter().GetResult();
var response = requestResult.Content.ReadAsStringAsync().ConfigureAwait(false).GetAwaiter().GetResult().Split('\n');
var remoteVer = "";
var remoteDate = "";
foreach (var l in response)
{
if (l.StartsWith("# Date: "))
remoteDate = l.Substring(8).Trim();
else if (l.StartsWith("# Version: "))
remoteVer = l.Substring(11).Trim();
}
if (!string.IsNullOrEmpty(remoteDate) && remoteDate != date
|| !string.IsNullOrEmpty(remoteVer) && remoteVer != version)
{
context.ReportDiagnostic(Diagnostic.Create(ConfusablesVersionWarning, Location.None, version, date, remoteVer, remoteDate));
}
}
catch
{
context.ReportDiagnostic(Diagnostic.Create(ConfusablesCheckWarning, Location.None));
}
}
}
}

View File

@@ -0,0 +1,13 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>netstandard2.0</TargetFramework>
<LangVersion>latest</LangVersion>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.CodeAnalysis.CSharp" Version="3.8.0" PrivateAssets="all" />
<PackageReference Include="Microsoft.CodeAnalysis.Analyzers" Version="3.3.2" PrivateAssets="all" />
</ItemGroup>
</Project>

View File

@@ -38,6 +38,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MediafireClient", "Clients\
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "YandexDiskClient", "Clients\YandexDiskClient\YandexDiskClient.csproj", "{CABC3E5E-2153-443B-A5A8-DA3E389359EC}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SourceGenerators", "SourceGenerators\SourceGenerators.csproj", "{1A75FAF1-1DD1-43FF-A789-1AB216F4B94E}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -83,6 +85,10 @@ Global
{CABC3E5E-2153-443B-A5A8-DA3E389359EC}.Debug|Any CPU.Build.0 = Debug|Any CPU
{CABC3E5E-2153-443B-A5A8-DA3E389359EC}.Release|Any CPU.ActiveCfg = Release|Any CPU
{CABC3E5E-2153-443B-A5A8-DA3E389359EC}.Release|Any CPU.Build.0 = Release|Any CPU
{1A75FAF1-1DD1-43FF-A789-1AB216F4B94E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{1A75FAF1-1DD1-43FF-A789-1AB216F4B94E}.Debug|Any CPU.Build.0 = Debug|Any CPU
{1A75FAF1-1DD1-43FF-A789-1AB216F4B94E}.Release|Any CPU.ActiveCfg = Release|Any CPU
{1A75FAF1-1DD1-43FF-A789-1AB216F4B94E}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE