mirror of
https://github.com/RPCS3/discord-bot.git
synced 2026-01-31 01:25:22 +01:00
replace runtime builder with source generator for unicode confusables
This commit is contained in:
@@ -1,60 +0,0 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Globalization;
|
||||
using System.IO;
|
||||
using System.IO.Compression;
|
||||
using System.Linq;
|
||||
using System.Reflection;
|
||||
using System.Text;
|
||||
|
||||
namespace HomoglyphConverter
|
||||
{
|
||||
public static class ConfusablesBuilder
|
||||
{
|
||||
private static readonly char[] CommentSplitter = {'#'};
|
||||
private static readonly char[] FieldSplitter = {';'};
|
||||
private static readonly char[] PairSplitter = {' '};
|
||||
|
||||
// requires a gzipped mapping from http://www.unicode.org/Public/security/latest/confusables.txt
|
||||
public static Dictionary<uint, uint[]> Build()
|
||||
{
|
||||
var result = new Dictionary<uint, uint[]>();
|
||||
var assembly = Assembly.GetAssembly(typeof(ConfusablesBuilder));
|
||||
var resourceName = assembly?.GetManifestResourceNames().FirstOrDefault(n => n.EndsWith("confusables.txt.gz", StringComparison.InvariantCultureIgnoreCase));
|
||||
if (string.IsNullOrEmpty(resourceName))
|
||||
throw new InvalidOperationException("Confusables embedded resource was not found");
|
||||
|
||||
using var stream = assembly?.GetManifestResourceStream(resourceName);
|
||||
if (stream is null)
|
||||
throw new InvalidOperationException("Failed to get confusables resource stream");
|
||||
|
||||
using var gzip = new GZipStream(stream, CompressionMode.Decompress);
|
||||
using var reader = new StreamReader(gzip, Encoding.UTF8, false);
|
||||
while (reader.ReadLine() is string line)
|
||||
{
|
||||
if (string.IsNullOrEmpty(line) || line.StartsWith("#"))
|
||||
continue;
|
||||
|
||||
var lineParts = line.Split(CommentSplitter, 2);
|
||||
var mapping = lineParts[0].Split(FieldSplitter, 3);
|
||||
if (mapping.Length < 2)
|
||||
throw new InvalidOperationException("Invalid confusable mapping line: " + line);
|
||||
|
||||
try
|
||||
{
|
||||
var confusableChar = uint.Parse(mapping[0].Trim(), NumberStyles.HexNumber);
|
||||
var skeletonChars = mapping[1].Split(PairSplitter, StringSplitOptions.RemoveEmptyEntries).Select(l => uint.Parse(l, NumberStyles.HexNumber)).ToArray();
|
||||
result.Add(confusableChar, skeletonChars);
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new InvalidOperationException("Invalid confusable mapping line:" + line, e);
|
||||
}
|
||||
}
|
||||
if (result.Count == 0)
|
||||
throw new InvalidOperationException("Empty confusable mapping source");
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -7,11 +7,12 @@
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<None Remove="confusables.txt.gz" />
|
||||
<None Remove="confusables.txt" />
|
||||
<AdditionalFiles Include="confusables.txt" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<EmbeddedResource Include="confusables.txt.gz" />
|
||||
<ProjectReference Include="..\SourceGenerators\SourceGenerators.csproj" OutputItemType="Analyzer" ReferenceOutputAssembly="false"/>
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
||||
@@ -7,7 +7,6 @@ namespace HomoglyphConverter
|
||||
{
|
||||
public static class Normalizer
|
||||
{
|
||||
private static readonly Dictionary<uint, uint[]> Mapping = ConfusablesBuilder.Build();
|
||||
private static readonly Encoding Utf32 = new UTF32Encoding(false, false, true);
|
||||
|
||||
private static readonly Dictionary<string, string> HomoglyphSequences = new()
|
||||
@@ -72,7 +71,7 @@ namespace HomoglyphConverter
|
||||
var result = new List<uint>(convertedLength);
|
||||
foreach (var ch in uintInput)
|
||||
{
|
||||
if (Mapping.TryGetValue(ch, out var replacement))
|
||||
if (Confusables.Mapping.TryGetValue(ch, out var replacement))
|
||||
result.AddRange(replacement);
|
||||
else
|
||||
result.Add(ch);
|
||||
|
||||
Binary file not shown.
@@ -1,9 +1,9 @@
|
||||
Homoglyph Converter
|
||||
===================
|
||||
|
||||
This is a straight up implementation of the recommended [confusable detection algorithm](http://www.unicode.org/reports/tr39/#Confusable_Detection). It is mainly used to check for mod impersonation.
|
||||
This is a straight up implementation of the recommended [confusable detection algorithm](https://www.unicode.org/reports/tr39/#Confusable_Detection). It is mainly used to check for mod impersonation.
|
||||
|
||||
You can get the latest version of the mappings from the [Unicode.org](http://www.unicode.org/Public/security/latest/confusables.txt). You'll need to manually gzip it for embedding in the resources.
|
||||
You can get the latest version of the mappings from the [Unicode.org](https://www.unicode.org/Public/security/latest/confusables.txt). You'll need to manually gzip it for embedding in the resources.
|
||||
|
||||
Code is split in two parts:
|
||||
* Builder will load the mapping file from the resources and will build the mapping dictionary that can be used to quickly substitute the character sequences.
|
||||
|
||||
151
SourceGenerators/ConfusablesSourceGenerator.cs
Normal file
151
SourceGenerators/ConfusablesSourceGenerator.cs
Normal file
@@ -0,0 +1,151 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Globalization;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using Microsoft.CodeAnalysis;
|
||||
using Microsoft.CodeAnalysis.Text;
|
||||
|
||||
namespace SourceGenerators
|
||||
{
|
||||
[Generator]
|
||||
public class ConfusablesSourceGenerator : ISourceGenerator
|
||||
{
|
||||
private static readonly char[] CommentSplitter = {'#'};
|
||||
private static readonly char[] FieldSplitter = {';'};
|
||||
private static readonly char[] PairSplitter = {' '};
|
||||
|
||||
private static readonly DiagnosticDescriptor ConfusablesCheckWarning = new(
|
||||
id: "CONFUSABLES001",
|
||||
title: "Failed to check confusables version",
|
||||
messageFormat: "Error while checking confusables version: '{0}'",
|
||||
category: nameof(ConfusablesSourceGenerator),
|
||||
DiagnosticSeverity.Warning,
|
||||
isEnabledByDefault: true
|
||||
);
|
||||
|
||||
private static readonly DiagnosticDescriptor ConfusablesVersionWarning = new(
|
||||
id: "CONFUSABLES002",
|
||||
title: "Outdated confusables version",
|
||||
messageFormat: "Local confusables version: {0} ({1}), remote confusables version: {2} ({3})",
|
||||
category: nameof(ConfusablesSourceGenerator),
|
||||
DiagnosticSeverity.Warning,
|
||||
isEnabledByDefault: true
|
||||
);
|
||||
|
||||
public void Initialize(GeneratorInitializationContext context)
|
||||
{
|
||||
}
|
||||
|
||||
public void Execute(GeneratorExecutionContext context)
|
||||
{
|
||||
var resourceName = context.AdditionalFiles.FirstOrDefault(f => Path.GetFileName(f.Path).Equals("confusables.txt"));
|
||||
if (resourceName is null)
|
||||
return;
|
||||
|
||||
using var httpClient = new HttpClient();
|
||||
using var msg = new HttpRequestMessage(HttpMethod.Get, "https://www.unicode.org/Public/security/latest/confusables.txt");
|
||||
msg.Headers.Range = new(0, 512);
|
||||
var requestTask = httpClient.SendAsync(msg);
|
||||
|
||||
using var stream = File.Open(resourceName.Path, FileMode.Open, FileAccess.Read, FileShare.Read);
|
||||
if (stream is null)
|
||||
throw new InvalidOperationException("Failed to get confusables stream");
|
||||
|
||||
var mapping = new Dictionary<uint, uint[]>();
|
||||
var date = "";
|
||||
var version = "";
|
||||
using var reader = new StreamReader(stream, Encoding.UTF8, false);
|
||||
while (reader.ReadLine() is string line)
|
||||
{
|
||||
if (string.IsNullOrEmpty(line) || line.StartsWith("#"))
|
||||
{
|
||||
if (line is {Length: > 10})
|
||||
{
|
||||
if (line.StartsWith("# Date: "))
|
||||
date = line.Substring(8).Trim();
|
||||
else if (line.StartsWith("# Version: "))
|
||||
version = line.Substring(11).Trim();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
var lineParts = line.Split(CommentSplitter, 2);
|
||||
var mappingParts = lineParts[0].Split(FieldSplitter, 3);
|
||||
if (mappingParts.Length < 2)
|
||||
throw new InvalidOperationException("Invalid confusable mapping line: " + line);
|
||||
|
||||
try
|
||||
{
|
||||
var confusableChar = uint.Parse(mappingParts[0].Trim(), NumberStyles.HexNumber);
|
||||
var skeletonChars = mappingParts[1].Split(PairSplitter, StringSplitOptions.RemoveEmptyEntries).Select(l => uint.Parse(l, NumberStyles.HexNumber)).ToArray();
|
||||
mapping.Add(confusableChar, skeletonChars);
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new InvalidOperationException("Invalid confusable mapping line:" + line, e);
|
||||
}
|
||||
}
|
||||
if (mapping.Count == 0)
|
||||
throw new InvalidOperationException("Empty confusable mapping source");
|
||||
|
||||
var ns = context.Compilation.AssemblyName;
|
||||
var cn = Path.GetFileNameWithoutExtension(resourceName.Path);
|
||||
if (cn.Length == 1)
|
||||
cn = cn.ToUpper();
|
||||
else
|
||||
cn = char.ToUpper(cn[0]) + cn.Substring(1);
|
||||
if (!Version.TryParse(version, out _))
|
||||
version = "";
|
||||
|
||||
|
||||
var result = new StringBuilder()
|
||||
.AppendLine("using System;")
|
||||
.AppendLine("using System.Collections.Generic;")
|
||||
.AppendLine()
|
||||
.AppendLine($"namespace {ns}")
|
||||
.AppendLine("{")
|
||||
.AppendLine($" internal static class {cn}")
|
||||
.AppendLine(" {")
|
||||
.AppendLine($" public const string Version = \"{version}\";")
|
||||
.AppendLine()
|
||||
.AppendLine($" public const string Date = \"{date}\";")
|
||||
.AppendLine()
|
||||
.AppendLine(" public static readonly Dictionary<uint, uint[]> Mapping = new()")
|
||||
.AppendLine(" {");
|
||||
foreach (var kvp in mapping.OrderBy(i => i.Key))
|
||||
result.AppendLine($@" [0x{kvp.Key:X5}u] = new[] {{ {string.Join(", ", kvp.Value!.OrderBy(i => i).Select(n => $"0x{n:X5}u"))} }},");
|
||||
result.AppendLine(" };")
|
||||
.AppendLine(" }")
|
||||
.AppendLine("}");
|
||||
|
||||
context.AddSource($"{cn}.Generated.cs", SourceText.From(result.ToString(), Encoding.UTF8));
|
||||
|
||||
try
|
||||
{
|
||||
var requestResult = requestTask.ConfigureAwait(false).GetAwaiter().GetResult();
|
||||
var response = requestResult.Content.ReadAsStringAsync().ConfigureAwait(false).GetAwaiter().GetResult().Split('\n');
|
||||
var remoteVer = "";
|
||||
var remoteDate = "";
|
||||
foreach (var l in response)
|
||||
{
|
||||
if (l.StartsWith("# Date: "))
|
||||
remoteDate = l.Substring(8).Trim();
|
||||
else if (l.StartsWith("# Version: "))
|
||||
remoteVer = l.Substring(11).Trim();
|
||||
}
|
||||
if (!string.IsNullOrEmpty(remoteDate) && remoteDate != date
|
||||
|| !string.IsNullOrEmpty(remoteVer) && remoteVer != version)
|
||||
{
|
||||
context.ReportDiagnostic(Diagnostic.Create(ConfusablesVersionWarning, Location.None, version, date, remoteVer, remoteDate));
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
context.ReportDiagnostic(Diagnostic.Create(ConfusablesCheckWarning, Location.None));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
13
SourceGenerators/SourceGenerators.csproj
Normal file
13
SourceGenerators/SourceGenerators.csproj
Normal file
@@ -0,0 +1,13 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>netstandard2.0</TargetFramework>
|
||||
<LangVersion>latest</LangVersion>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.CodeAnalysis.CSharp" Version="3.8.0" PrivateAssets="all" />
|
||||
<PackageReference Include="Microsoft.CodeAnalysis.Analyzers" Version="3.3.2" PrivateAssets="all" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -38,6 +38,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MediafireClient", "Clients\
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "YandexDiskClient", "Clients\YandexDiskClient\YandexDiskClient.csproj", "{CABC3E5E-2153-443B-A5A8-DA3E389359EC}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SourceGenerators", "SourceGenerators\SourceGenerators.csproj", "{1A75FAF1-1DD1-43FF-A789-1AB216F4B94E}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
@@ -83,6 +85,10 @@ Global
|
||||
{CABC3E5E-2153-443B-A5A8-DA3E389359EC}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{CABC3E5E-2153-443B-A5A8-DA3E389359EC}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{CABC3E5E-2153-443B-A5A8-DA3E389359EC}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{1A75FAF1-1DD1-43FF-A789-1AB216F4B94E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{1A75FAF1-1DD1-43FF-A789-1AB216F4B94E}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{1A75FAF1-1DD1-43FF-A789-1AB216F4B94E}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{1A75FAF1-1DD1-43FF-A789-1AB216F4B94E}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
|
||||
Reference in New Issue
Block a user