try all image orientations for tesseract ocr to improve results slightly

This commit is contained in:
13xforever
2026-01-27 14:42:14 +05:00
parent 707b88d7bc
commit 4dd543ad7c
3 changed files with 40 additions and 5 deletions

View File

@@ -7,7 +7,7 @@
<ItemGroup> <ItemGroup>
<PackageReference Include="Microsoft.Extensions.Caching.Memory" Version="10.0.2" /> <PackageReference Include="Microsoft.Extensions.Caching.Memory" Version="10.0.2" />
<PackageReference Include="Octokit" Version="14.0.0" /> <PackageReference Include="Octokit" Version="14.0.0" />
<PackageReference Include="SharpCompress" Version="0.44.1" /> <PackageReference Include="SharpCompress" Version="0.44.4" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ProjectReference Include="..\CompatApiClient\CompatApiClient.csproj" /> <ProjectReference Include="..\CompatApiClient\CompatApiClient.csproj" />

View File

@@ -72,7 +72,7 @@
<PackageReference Include="NLog.Extensions.Logging" Version="6.1.0" /> <PackageReference Include="NLog.Extensions.Logging" Version="6.1.0" />
<PackageReference Include="NReco.Text.AhoCorasickDoubleArrayTrie" Version="1.1.1" /> <PackageReference Include="NReco.Text.AhoCorasickDoubleArrayTrie" Version="1.1.1" />
<PackageReference Include="Result.Net" Version="1.7.0" /> <PackageReference Include="Result.Net" Version="1.7.0" />
<PackageReference Include="SharpCompress" Version="0.44.1" /> <PackageReference Include="SharpCompress" Version="0.44.4" />
<PackageReference Include="SixLabors.ImageSharp.Drawing" Version="2.1.7" /> <PackageReference Include="SixLabors.ImageSharp.Drawing" Version="2.1.7" />
<PackageReference Include="TesseractCSharp" Version="1.0.5" /> <PackageReference Include="TesseractCSharp" Version="1.0.5" />
</ItemGroup> </ItemGroup>

View File

@@ -9,6 +9,7 @@ namespace CompatBot.Ocr.Backend;
internal class Tesseract: BackendBase internal class Tesseract: BackendBase
{ {
private TesseractEngine engine; private TesseractEngine engine;
private static readonly SemaphoreSlim limiter = new(1, 1);
public override string Name => "tesseract"; public override string Name => "tesseract";
@@ -68,9 +69,37 @@ internal class Tesseract: BackendBase
public override async Task<(string result, double confidence)> GetTextAsync(string imgUrl, CancellationToken cancellationToken) public override async Task<(string result, double confidence)> GetTextAsync(string imgUrl, CancellationToken cancellationToken)
{ {
var imgData = await HttpClient.GetByteArrayAsync(imgUrl, cancellationToken).ConfigureAwait(false); var imgData = await HttpClient.GetByteArrayAsync(imgUrl, cancellationToken).ConfigureAwait(false);
using var img = Pix.LoadFromMemory(imgData); var img = Pix.LoadFromMemory(imgData);
using var page = engine.Process(img); var result = new (string text, float confidence)[4];
return (page.GetText() ?? "", page.GetMeanConfidence()); await limiter.WaitAsync(Config.Cts.Token).ConfigureAwait(false);
try
{
var pass = 0;
do
{
using (var page = engine.Process(img))
result[pass] = (page.GetText() ?? "", page.GetMeanConfidence());
if (pass < 3)
{
var img2 = img.Rotate90((int)RotationDirection.Clockwise);
img.Dispose();
img = img2;
}
pass++;
} while (pass < 4);
var longestText = result
.Where(i => i.confidence > 0.5)
.OrderByDescending(i => i.text.Length)
.FirstOrDefault();
if (longestText is { confidence: > 0.5f, text.Length: > 0 })
return longestText;
else
return result.MaxBy(i => i.confidence);
}
finally
{
limiter.Release();
}
} }
public override void Dispose() public override void Dispose()
@@ -78,4 +107,10 @@ internal class Tesseract: BackendBase
base.Dispose(); base.Dispose();
engine.Dispose(); engine.Dispose();
} }
private enum RotationDirection
{
Clockwise = 1,
CounterClockwise = -1,
}
} }