add anilist character name dump

This commit is contained in:
13xforever 2021-03-09 23:04:57 +05:00
parent 6699b27a7b
commit 67667e6855
4 changed files with 84814 additions and 10 deletions

View File

@ -15,7 +15,12 @@ namespace CompatBot.EventHandlers
{
private static readonly HashSet<char> OversizedChars = new()
{
'꧁', '꧂', '⎝', '⎠', '', '', '⎛', '⎞', '﷽',
'꧁', '꧂', '⎝', '⎠', '', '', '⎛', '⎞', '﷽', '⸻', 'ဪ', '꧅',
};
private static readonly List<string> OversizedLiterals = new()
{
"𒐫", "𒈙",
};
public static async Task OnUserUpdated(DiscordClient c, UserUpdateEventArgs args)
@ -112,6 +117,8 @@ namespace CompatBot.EventHandlers
public static string StripZalgo(string displayName, ulong userId, NormalizationForm normalizationForm = NormalizationForm.FormD, int level = 0)
{
displayName = displayName.Normalize(normalizationForm).TrimEager();
foreach (var literal in OversizedLiterals)
displayName = displayName.Replace(literal, "");
if (string.IsNullOrEmpty(displayName))
return GenerateRandomName(userId);
@ -160,7 +167,7 @@ namespace CompatBot.EventHandlers
var hash = userId.GetHashCode();
var rng = new Random(hash);
var name = NamesPool.List[rng.Next(NamesPool.NameCount)];
return $"{name}{NamesPool.NameSuffix} #{hash:x8}";
return name + NamesPool.NameSuffix;
}
}
}

View File

@ -12,8 +12,9 @@ namespace SourceGenerators
public class NamesSourceGenerator : ISourceGenerator
{
private const string Indent = " ";
private const string NameSuffix = " the Rule 7 Breaker";
private const int DiscordUsernameLengthLimit = 32-10; //" #12345678"
private const string NameSuffix = " (Rule 7)";
//private const int DiscordUsernameLengthLimit = 32-10; //" #12345678"
private const int DiscordUsernameLengthLimit = 32;
public void Initialize(GeneratorInitializationContext context)
{
@ -21,7 +22,10 @@ namespace SourceGenerators
public void Execute(GeneratorExecutionContext context)
{
var resources = context.AdditionalFiles.Where(f => Path.GetFileName(f.Path).ToLower().StartsWith("names_") && f.Path.ToLower().EndsWith(".txt")).ToList();
var resources = context.AdditionalFiles
.Where(f => Path.GetFileName(f.Path).ToLower().StartsWith("names_") && f.Path.ToLower().EndsWith(".txt"))
.OrderBy(f => f.Path)
.ToList();
if (resources.Count == 0)
return;
@ -38,15 +42,23 @@ namespace SourceGenerators
var commentPos = line.IndexOf(" (");
if (commentPos > 1)
line = line.Substring(0, commentPos);
line = line.Trim();
if (line.Length + NameSuffix.Length > DiscordUsernameLengthLimit)
line = line.Split(' ')[0];
line = line.Trim()
.Replace(" ", " ")
.Replace('`', '\'') // consider
.Replace("\"", "\\\"");
//if (line.Length + NameSuffix.Length > DiscordUsernameLengthLimit)
// line = line.Split(' ')[0];
if (line.Length + NameSuffix.Length > DiscordUsernameLengthLimit)
continue;
if (line.Contains('@')
|| line.Contains('#')
|| line.Contains(':'))
continue;
names.Add(line);
if (line.Contains(' '))
names.Add(line.Split(' ')[0]);
//if (line.Contains(' '))
// names.Add(line.Split(' ')[0]);
}
}

58
get-names_anilist.ps1 Normal file
View File

@ -0,0 +1,58 @@
#!/usr/bin/pwsh
<#
.SYNOPSIS
Script to crawl AniList for character names
.PARAMETER output
Output filename.
Default is names_anilist.txt.
#>
param(
[string]$output = 'names_anilist.txt'
)
# disable extra noise for invoke-webrequest
#$ProgressPreference = "SilentlyContinue"
$requestTemplate = '{ "query": "query { Page (page: {0}, perPage: 50) { characters { name { full } } pageInfo { hasNextPage lastPage } } }" }'
$startTime = [DateTime]::UtcNow
$page = 0
$result = @()
$hasNextPage = $false
do
{
try
{
$json = $requestTemplate.Replace('{0}', $page)
$response = Invoke-RestMethod 'https://graphql.anilist.co' -Method Post -Body $json -ContentType "application/json"
$response = $response.data.Page
$hasNextPage = $response.pageInfo.hasNextPage
$chars = $response.characters
foreach ($char in $chars)
{
$result += $char.name.full.Trim()
}
$page++
$total = $response.pageInfo.lastPage
$remainingSeconds = ($total - $page) * 1.7
if ($page -gt 100)
{
$remainingSeconds = ([DateTime]::UtcNow - $startTime).TotalSeconds / $page * ($total - $page)
}
Write-Progress Downloading -CurrentOperation "Page $page out of $total" -PercentComplete ($page * 100 / $total) -SecondsRemaining $remainingSeconds
Start-Sleep -Seconds 1
}
catch
{
Write-Host "Failed to request page $page"
$hasNextPage = $false
}
} while ($hasNextPage)
Write-Progress Downloading -Completed
Write-Host 'Saving the results...'
'# https://anilist.co/search/characters' | Out-File -LiteralPath $output
$result | Sort-Object | Get-Unique | Out-File -LiteralPath $output -Append
Write-Host 'Done.'

84727
names_anilist.txt Normal file

File diff suppressed because it is too large Load Diff