add ocr confidence logging and add filter threshold

This commit is contained in:
13xforever
2025-07-08 23:07:13 +05:00
parent 2a1dac6ce2
commit 935e8585cb
7 changed files with 18 additions and 16 deletions

View File

@@ -76,17 +76,18 @@ internal sealed class MediaScreenshotMonitor
try
{
if (await OcrProvider.GetTextAsync(item.imgUrl, Config.Cts.Token).ConfigureAwait(false) is {Length: >0} result
if (await OcrProvider.GetTextAsync(item.imgUrl, Config.Cts.Token).ConfigureAwait(false) is ({Length: >0} result, var confidence)
&& !Config.Cts.Token.IsCancellationRequested)
{
var cnt = true;
var prefix = $"[{item.msg.Id % 100:00}]";
var ocrTextBuf = new StringBuilder($"OCR result of message <{item.msg.JumpLink}>:").AppendLine();
Config.Log.Debug($"{prefix} OCR result of message {item.msg.JumpLink}:");
var ocrTextBuf = new StringBuilder($"OCR result of message <{item.msg.JumpLink}> ({confidence*100:0.00}%):").AppendLine();
Config.Log.Debug($"{prefix} OCR result of message {item.msg.JumpLink} ({confidence*100:0.00}%):");
var duplicates = new HashSet<string>();
ocrTextBuf.AppendLine(result.Sanitize());
Config.Log.Debug($"{prefix} {result}");
if (cnt
&& confidence > 0.65
&& await ContentFilter.FindTriggerAsync(FilterContext.Chat, result).ConfigureAwait(false) is Piracystring hit
&& duplicates.Add(hit.String))
{

View File

@@ -22,7 +22,7 @@ public class AzureVision: IOcrBackend
return Task.FromResult(true);
}
public async Task<string> GetTextAsync(string imgUrl, CancellationToken cancellationToken)
public async Task<(string result, double confidence)> GetTextAsync(string imgUrl, CancellationToken cancellationToken)
{
var headers = await cvClient.ReadAsync(imgUrl, cancellationToken: cancellationToken).ConfigureAwait(false);
var operationId = new Guid(new Uri(headers.OperationLocation).Segments.Last());
@@ -43,10 +43,10 @@ public class AzureVision: IOcrBackend
foreach (var r in result.AnalyzeResult.ReadResults)
foreach (var l in r.Lines)
ocrTextBuf.AppendLine(l.Text);
return ocrTextBuf.ToString();
return (ocrTextBuf.ToString(), 1);
}
}
Config.Log.Warn($"Failed to OCR image {imgUrl}: {result.Status}");
return "";
return ("", 0);
}
}

View File

@@ -25,7 +25,7 @@ public abstract class BackendBase: IOcrBackend, IDisposable
return Task.FromResult(true);
}
public abstract Task<string> GetTextAsync(string imgUrl, CancellationToken cancellationToken);
public abstract Task<(string result, double confidence)> GetTextAsync(string imgUrl, CancellationToken cancellationToken);
public virtual void Dispose() => HttpClient.Dispose();

View File

@@ -53,13 +53,13 @@ public class Florence2: BackendBase
return true;
}
public override async Task<string> GetTextAsync(string imgUrl, CancellationToken cancellationToken)
public override async Task<(string result, double confidence)> GetTextAsync(string imgUrl, CancellationToken cancellationToken)
{
await using var imgStream = await HttpClient.GetStreamAsync(imgUrl, cancellationToken).ConfigureAwait(false);
var results = model.Run(TaskTypes.OCR_WITH_REGION, [imgStream], "", CancellationToken.None);
var result = new StringBuilder();
foreach (var box in results[0].OCRBBox)
result.AppendLine(box.Text);
return result.ToString().TrimEnd();
return (result.ToString().TrimEnd(), 1);
}
}

View File

@@ -4,5 +4,5 @@ public interface IOcrBackend
{
string Name { get; }
Task<bool> InitializeAsync(CancellationToken cancellationToken);
Task<string> GetTextAsync(string imgUrl, CancellationToken cancellationToken);
Task<(string result, double confidence)> GetTextAsync(string imgUrl, CancellationToken cancellationToken);
}

View File

@@ -6,7 +6,7 @@ using TesseractCSharp.Interop;
namespace CompatBot.Ocr.Backend;
internal class Tesseract: BackendBase, IDisposable
internal class Tesseract: BackendBase
{
private TesseractEngine engine;
@@ -65,12 +65,12 @@ internal class Tesseract: BackendBase, IDisposable
return true;
}
public override async Task<string> GetTextAsync(string imgUrl, CancellationToken cancellationToken)
public override async Task<(string result, double confidence)> GetTextAsync(string imgUrl, CancellationToken cancellationToken)
{
var imgData = await HttpClient.GetByteArrayAsync(imgUrl, cancellationToken).ConfigureAwait(false);
using var img = Pix.LoadFromMemory(imgData);
using var page = engine.Process(img);
return page.GetText() ?? "";
return (page.GetText() ?? "", page.GetMeanConfidence());
}
public override void Dispose()

View File

@@ -33,10 +33,11 @@ public static class OcrProvider
}
}
public static async Task<string> GetTextAsync(string imageUrl, CancellationToken cancellationToken)
public static async Task<(string result, double confidence)> GetTextAsync(string imageUrl, CancellationToken cancellationToken)
{
if (backend is null)
return "";
return ("", -1);
try
{
return await backend.GetTextAsync(imageUrl, cancellationToken).ConfigureAwait(false);
@@ -44,7 +45,7 @@ public static class OcrProvider
catch (Exception e)
{
Config.Log.Warn(e, $"Failed to OCR image {imageUrl}");
return "";
return ("", 0);
}
}