From 935e8585cba33f90109aeb48f92973a7886db7b5 Mon Sep 17 00:00:00 2001 From: 13xforever Date: Tue, 8 Jul 2025 23:07:13 +0500 Subject: [PATCH] add ocr confidence logging and add filter threshold --- CompatBot/EventHandlers/MediaScreenshotMonitor.cs | 7 ++++--- CompatBot/Ocr/Backend/AzureVision.cs | 6 +++--- CompatBot/Ocr/Backend/BackendBase.cs | 2 +- CompatBot/Ocr/Backend/Florence2.cs | 4 ++-- CompatBot/Ocr/Backend/IOcrBackend.cs | 2 +- CompatBot/Ocr/Backend/Tesseract.cs | 6 +++--- CompatBot/Ocr/OcrProvider.cs | 7 ++++--- 7 files changed, 18 insertions(+), 16 deletions(-) diff --git a/CompatBot/EventHandlers/MediaScreenshotMonitor.cs b/CompatBot/EventHandlers/MediaScreenshotMonitor.cs index 8c128d5e..e627471c 100644 --- a/CompatBot/EventHandlers/MediaScreenshotMonitor.cs +++ b/CompatBot/EventHandlers/MediaScreenshotMonitor.cs @@ -76,17 +76,18 @@ internal sealed class MediaScreenshotMonitor try { - if (await OcrProvider.GetTextAsync(item.imgUrl, Config.Cts.Token).ConfigureAwait(false) is {Length: >0} result + if (await OcrProvider.GetTextAsync(item.imgUrl, Config.Cts.Token).ConfigureAwait(false) is ({Length: >0} result, var confidence) && !Config.Cts.Token.IsCancellationRequested) { var cnt = true; var prefix = $"[{item.msg.Id % 100:00}]"; - var ocrTextBuf = new StringBuilder($"OCR result of message <{item.msg.JumpLink}>:").AppendLine(); - Config.Log.Debug($"{prefix} OCR result of message {item.msg.JumpLink}:"); + var ocrTextBuf = new StringBuilder($"OCR result of message <{item.msg.JumpLink}> ({confidence*100:0.00}%):").AppendLine(); + Config.Log.Debug($"{prefix} OCR result of message {item.msg.JumpLink} ({confidence*100:0.00}%):"); var duplicates = new HashSet(); ocrTextBuf.AppendLine(result.Sanitize()); Config.Log.Debug($"{prefix} {result}"); if (cnt + && confidence > 0.65 && await ContentFilter.FindTriggerAsync(FilterContext.Chat, result).ConfigureAwait(false) is Piracystring hit && duplicates.Add(hit.String)) { diff --git a/CompatBot/Ocr/Backend/AzureVision.cs b/CompatBot/Ocr/Backend/AzureVision.cs index 6db97e9f..5b3ab4c1 100644 --- a/CompatBot/Ocr/Backend/AzureVision.cs +++ b/CompatBot/Ocr/Backend/AzureVision.cs @@ -22,7 +22,7 @@ public class AzureVision: IOcrBackend return Task.FromResult(true); } - public async Task GetTextAsync(string imgUrl, CancellationToken cancellationToken) + public async Task<(string result, double confidence)> GetTextAsync(string imgUrl, CancellationToken cancellationToken) { var headers = await cvClient.ReadAsync(imgUrl, cancellationToken: cancellationToken).ConfigureAwait(false); var operationId = new Guid(new Uri(headers.OperationLocation).Segments.Last()); @@ -43,10 +43,10 @@ public class AzureVision: IOcrBackend foreach (var r in result.AnalyzeResult.ReadResults) foreach (var l in r.Lines) ocrTextBuf.AppendLine(l.Text); - return ocrTextBuf.ToString(); + return (ocrTextBuf.ToString(), 1); } } Config.Log.Warn($"Failed to OCR image {imgUrl}: {result.Status}"); - return ""; + return ("", 0); } } \ No newline at end of file diff --git a/CompatBot/Ocr/Backend/BackendBase.cs b/CompatBot/Ocr/Backend/BackendBase.cs index c27a40ad..7851049b 100644 --- a/CompatBot/Ocr/Backend/BackendBase.cs +++ b/CompatBot/Ocr/Backend/BackendBase.cs @@ -25,7 +25,7 @@ public abstract class BackendBase: IOcrBackend, IDisposable return Task.FromResult(true); } - public abstract Task GetTextAsync(string imgUrl, CancellationToken cancellationToken); + public abstract Task<(string result, double confidence)> GetTextAsync(string imgUrl, CancellationToken cancellationToken); public virtual void Dispose() => HttpClient.Dispose(); diff --git a/CompatBot/Ocr/Backend/Florence2.cs b/CompatBot/Ocr/Backend/Florence2.cs index 9898a450..2b844a16 100644 --- a/CompatBot/Ocr/Backend/Florence2.cs +++ b/CompatBot/Ocr/Backend/Florence2.cs @@ -53,13 +53,13 @@ public class Florence2: BackendBase return true; } - public override async Task GetTextAsync(string imgUrl, CancellationToken cancellationToken) + public override async Task<(string result, double confidence)> GetTextAsync(string imgUrl, CancellationToken cancellationToken) { await using var imgStream = await HttpClient.GetStreamAsync(imgUrl, cancellationToken).ConfigureAwait(false); var results = model.Run(TaskTypes.OCR_WITH_REGION, [imgStream], "", CancellationToken.None); var result = new StringBuilder(); foreach (var box in results[0].OCRBBox) result.AppendLine(box.Text); - return result.ToString().TrimEnd(); + return (result.ToString().TrimEnd(), 1); } } \ No newline at end of file diff --git a/CompatBot/Ocr/Backend/IOcrBackend.cs b/CompatBot/Ocr/Backend/IOcrBackend.cs index 4cd3ad12..707c7a02 100644 --- a/CompatBot/Ocr/Backend/IOcrBackend.cs +++ b/CompatBot/Ocr/Backend/IOcrBackend.cs @@ -4,5 +4,5 @@ public interface IOcrBackend { string Name { get; } Task InitializeAsync(CancellationToken cancellationToken); - Task GetTextAsync(string imgUrl, CancellationToken cancellationToken); + Task<(string result, double confidence)> GetTextAsync(string imgUrl, CancellationToken cancellationToken); } \ No newline at end of file diff --git a/CompatBot/Ocr/Backend/Tesseract.cs b/CompatBot/Ocr/Backend/Tesseract.cs index d33b563e..d76af7ec 100644 --- a/CompatBot/Ocr/Backend/Tesseract.cs +++ b/CompatBot/Ocr/Backend/Tesseract.cs @@ -6,7 +6,7 @@ using TesseractCSharp.Interop; namespace CompatBot.Ocr.Backend; -internal class Tesseract: BackendBase, IDisposable +internal class Tesseract: BackendBase { private TesseractEngine engine; @@ -65,12 +65,12 @@ internal class Tesseract: BackendBase, IDisposable return true; } - public override async Task GetTextAsync(string imgUrl, CancellationToken cancellationToken) + public override async Task<(string result, double confidence)> GetTextAsync(string imgUrl, CancellationToken cancellationToken) { var imgData = await HttpClient.GetByteArrayAsync(imgUrl, cancellationToken).ConfigureAwait(false); using var img = Pix.LoadFromMemory(imgData); using var page = engine.Process(img); - return page.GetText() ?? ""; + return (page.GetText() ?? "", page.GetMeanConfidence()); } public override void Dispose() diff --git a/CompatBot/Ocr/OcrProvider.cs b/CompatBot/Ocr/OcrProvider.cs index 0552a3ba..a0f9052f 100644 --- a/CompatBot/Ocr/OcrProvider.cs +++ b/CompatBot/Ocr/OcrProvider.cs @@ -33,10 +33,11 @@ public static class OcrProvider } } - public static async Task GetTextAsync(string imageUrl, CancellationToken cancellationToken) + public static async Task<(string result, double confidence)> GetTextAsync(string imageUrl, CancellationToken cancellationToken) { if (backend is null) - return ""; + return ("", -1); + try { return await backend.GetTextAsync(imageUrl, cancellationToken).ConfigureAwait(false); @@ -44,7 +45,7 @@ public static class OcrProvider catch (Exception e) { Config.Log.Warn(e, $"Failed to OCR image {imageUrl}"); - return ""; + return ("", 0); } }