update anime name lists

This commit is contained in:
13xforever
2025-02-20 12:48:53 +05:00
parent e00eee4b60
commit 13a92ad3f2
4 changed files with 35840 additions and 3849 deletions

View File

@@ -22,13 +22,14 @@ param(
#$ProgressPreference = "SilentlyContinue" #$ProgressPreference = "SilentlyContinue"
$page = $startpage $page = $startpage
$total = 3971 $total = 4528
$result = @() $result = @()
$hasNextPage = $false $hasNextPage = $false
$delay = 3.5
# get anonymous sesssion # get anonymous sesssion
Invoke-WebRequest "https://anidb.net" -SessionVariable 'Session' | Out-Null Invoke-WebRequest "https://anidb.net" -SessionVariable 'Session' | Out-Null
Start-Sleep -Seconds 2 Start-Sleep -Seconds $delay
function Request-Page($num) function Request-Page($num)
{ {
@@ -37,7 +38,7 @@ function Request-Page($num)
$url = "https://anidb.net/character/?noalias=1&orderby.name=0.1&page=$num&view=list" $url = "https://anidb.net/character/?noalias=1&orderby.name=0.1&page=$num&view=list"
$response = Invoke-WebRequest $url -WebSession $Session $response = Invoke-WebRequest $url -WebSession $Session
#$links = @($response.links | Where-Object { $_.href -match '/character/\d+$' } | Where-Object { $_.outerHTML -match '^<a [^>]+>[^<][^\n]+</a>$' }) #$links = @($response.links | Where-Object { $_.href -match '/character/\d+$' } | Where-Object { $_.outerHTML -match '^<a [^>]+>[^<][^\n]+</a>$' })
Start-Sleep -Seconds 3 Start-Sleep -Seconds $delay
return $response.Content return $response.Content
} }
@@ -101,8 +102,8 @@ do
Write-Host 'Saving the results...' Write-Host 'Saving the results...'
'# https://anidb.net/character' | Out-File -LiteralPath $output '# https://anidb.net/character' | Out-File -LiteralPath $output
$result | Sort-Object | Get-Unique | Out-File -LiteralPath $output -Append $result | Sort-Object | Get-Unique | Out-File -LiteralPath $output -Append
Read-Host -Prompt 'Rate limited, plz unban...' Write-Host 'Rate limited, plz unban...'
continue break
} }
else else
{ {
@@ -123,7 +124,7 @@ do
$pos = $html.IndexOf('<a href=', $pos) $pos = $html.IndexOf('<a href=', $pos)
$pos = $html.IndexOf('>', $pos) $pos = $html.IndexOf('>', $pos)
$endPos = $html.IndexOf('</a></td>', $pos) $endPos = $html.IndexOf('</a></td>', $pos)
$name = $html.Substring($pos + 1, $endPos - $pos - 1).Replace(' ', ' ').Trim() $name = $html.Substring($pos + 1, $endPos - $pos - 1).Replace(' ', ' ').Replace('`', "'").Trim()
$pos = $html.IndexOf('<td data-label="Type"', $endPos) $pos = $html.IndexOf('<td data-label="Type"', $endPos)
$pos = $html.IndexOf('>', $pos) $pos = $html.IndexOf('>', $pos)
@@ -163,7 +164,7 @@ do
$hasNextPage = $false $hasNextPage = $false
} }
} while ($hasNextPage) } while ($hasNextPage)
Write-Host "Stopped on page $page" Write-Host "Stopped on page $page/$total"
Write-Progress -Activity "Downloading" -Completed Write-Progress -Activity "Downloading" -Completed
Write-Host 'Saving the results...' Write-Host 'Saving the results...'

View File

@@ -20,15 +20,16 @@ param(
$requestTemplate = '{ "query": "query { Page (page: {0}, perPage: 50) { characters { name { full } } pageInfo { hasNextPage lastPage } } }" }' $requestTemplate = '{ "query": "query { Page (page: {0}, perPage: 50) { characters { name { full } } pageInfo { hasNextPage lastPage } } }" }'
$page = $startPage $page = $startPage
$total = 2565 $total = 3171
$result = @() $result = @()
$hasNextPage = $false $hasNextPage = $false
$delay = 3
function Request-Page($num) function Request-Page($num)
{ {
$json = $requestTemplate.Replace('{0}', $num) $json = $requestTemplate.Replace('{0}', $num)
$response = Invoke-RestMethod 'https://graphql.anilist.co' -Method Post -Body $json -ContentType "application/json" -TimeoutSec 30 $response = Invoke-RestMethod 'https://graphql.anilist.co' -Method Post -Body $json -ContentType "application/json" -TimeoutSec 30
Start-Sleep -Seconds 1.5 Start-Sleep -Seconds $delay
return $response.data.Page return $response.data.Page
} }
@@ -89,7 +90,7 @@ do
$name = $char.name.full $name = $char.name.full
if ($null -eq $name) { continue } if ($null -eq $name) { continue }
$name = $char.name.full.Replace(' ', ' ').Trim() $name = $char.name.full.Replace(' ', ' ').Replace('`', "'").Trim()
if (($name.Length -lt 2) -or ("$name" -match '^\d+$')) if (($name.Length -lt 2) -or ("$name" -match '^\d+$'))
{ {
Write-Host "Skipping $name" Write-Host "Skipping $name"
@@ -115,7 +116,7 @@ do
$hasNextPage = $false $hasNextPage = $false
} }
} while ($hasNextPage) } while ($hasNextPage)
Write-Host "Stopped on page $page" Write-Host "Stopped on page $page/$total"
Write-Progress -Activity "Downloading" -Completed Write-Progress -Activity "Downloading" -Completed
Write-Host 'Saving the results...' Write-Host 'Saving the results...'

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff