mirror of
https://github.com/RPCS3/discord-bot.git
synced 2026-01-31 01:25:22 +01:00
add ocr confidence logging and add filter threshold
This commit is contained in:
@@ -76,17 +76,18 @@ internal sealed class MediaScreenshotMonitor
|
||||
|
||||
try
|
||||
{
|
||||
if (await OcrProvider.GetTextAsync(item.imgUrl, Config.Cts.Token).ConfigureAwait(false) is {Length: >0} result
|
||||
if (await OcrProvider.GetTextAsync(item.imgUrl, Config.Cts.Token).ConfigureAwait(false) is ({Length: >0} result, var confidence)
|
||||
&& !Config.Cts.Token.IsCancellationRequested)
|
||||
{
|
||||
var cnt = true;
|
||||
var prefix = $"[{item.msg.Id % 100:00}]";
|
||||
var ocrTextBuf = new StringBuilder($"OCR result of message <{item.msg.JumpLink}>:").AppendLine();
|
||||
Config.Log.Debug($"{prefix} OCR result of message {item.msg.JumpLink}:");
|
||||
var ocrTextBuf = new StringBuilder($"OCR result of message <{item.msg.JumpLink}> ({confidence*100:0.00}%):").AppendLine();
|
||||
Config.Log.Debug($"{prefix} OCR result of message {item.msg.JumpLink} ({confidence*100:0.00}%):");
|
||||
var duplicates = new HashSet<string>();
|
||||
ocrTextBuf.AppendLine(result.Sanitize());
|
||||
Config.Log.Debug($"{prefix} {result}");
|
||||
if (cnt
|
||||
&& confidence > 0.65
|
||||
&& await ContentFilter.FindTriggerAsync(FilterContext.Chat, result).ConfigureAwait(false) is Piracystring hit
|
||||
&& duplicates.Add(hit.String))
|
||||
{
|
||||
|
||||
@@ -22,7 +22,7 @@ public class AzureVision: IOcrBackend
|
||||
return Task.FromResult(true);
|
||||
}
|
||||
|
||||
public async Task<string> GetTextAsync(string imgUrl, CancellationToken cancellationToken)
|
||||
public async Task<(string result, double confidence)> GetTextAsync(string imgUrl, CancellationToken cancellationToken)
|
||||
{
|
||||
var headers = await cvClient.ReadAsync(imgUrl, cancellationToken: cancellationToken).ConfigureAwait(false);
|
||||
var operationId = new Guid(new Uri(headers.OperationLocation).Segments.Last());
|
||||
@@ -43,10 +43,10 @@ public class AzureVision: IOcrBackend
|
||||
foreach (var r in result.AnalyzeResult.ReadResults)
|
||||
foreach (var l in r.Lines)
|
||||
ocrTextBuf.AppendLine(l.Text);
|
||||
return ocrTextBuf.ToString();
|
||||
return (ocrTextBuf.ToString(), 1);
|
||||
}
|
||||
}
|
||||
Config.Log.Warn($"Failed to OCR image {imgUrl}: {result.Status}");
|
||||
return "";
|
||||
return ("", 0);
|
||||
}
|
||||
}
|
||||
@@ -25,7 +25,7 @@ public abstract class BackendBase: IOcrBackend, IDisposable
|
||||
return Task.FromResult(true);
|
||||
}
|
||||
|
||||
public abstract Task<string> GetTextAsync(string imgUrl, CancellationToken cancellationToken);
|
||||
public abstract Task<(string result, double confidence)> GetTextAsync(string imgUrl, CancellationToken cancellationToken);
|
||||
|
||||
public virtual void Dispose() => HttpClient.Dispose();
|
||||
|
||||
|
||||
@@ -53,13 +53,13 @@ public class Florence2: BackendBase
|
||||
return true;
|
||||
}
|
||||
|
||||
public override async Task<string> GetTextAsync(string imgUrl, CancellationToken cancellationToken)
|
||||
public override async Task<(string result, double confidence)> GetTextAsync(string imgUrl, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var imgStream = await HttpClient.GetStreamAsync(imgUrl, cancellationToken).ConfigureAwait(false);
|
||||
var results = model.Run(TaskTypes.OCR_WITH_REGION, [imgStream], "", CancellationToken.None);
|
||||
var result = new StringBuilder();
|
||||
foreach (var box in results[0].OCRBBox)
|
||||
result.AppendLine(box.Text);
|
||||
return result.ToString().TrimEnd();
|
||||
return (result.ToString().TrimEnd(), 1);
|
||||
}
|
||||
}
|
||||
@@ -4,5 +4,5 @@ public interface IOcrBackend
|
||||
{
|
||||
string Name { get; }
|
||||
Task<bool> InitializeAsync(CancellationToken cancellationToken);
|
||||
Task<string> GetTextAsync(string imgUrl, CancellationToken cancellationToken);
|
||||
Task<(string result, double confidence)> GetTextAsync(string imgUrl, CancellationToken cancellationToken);
|
||||
}
|
||||
@@ -6,7 +6,7 @@ using TesseractCSharp.Interop;
|
||||
|
||||
namespace CompatBot.Ocr.Backend;
|
||||
|
||||
internal class Tesseract: BackendBase, IDisposable
|
||||
internal class Tesseract: BackendBase
|
||||
{
|
||||
private TesseractEngine engine;
|
||||
|
||||
@@ -65,12 +65,12 @@ internal class Tesseract: BackendBase, IDisposable
|
||||
return true;
|
||||
}
|
||||
|
||||
public override async Task<string> GetTextAsync(string imgUrl, CancellationToken cancellationToken)
|
||||
public override async Task<(string result, double confidence)> GetTextAsync(string imgUrl, CancellationToken cancellationToken)
|
||||
{
|
||||
var imgData = await HttpClient.GetByteArrayAsync(imgUrl, cancellationToken).ConfigureAwait(false);
|
||||
using var img = Pix.LoadFromMemory(imgData);
|
||||
using var page = engine.Process(img);
|
||||
return page.GetText() ?? "";
|
||||
return (page.GetText() ?? "", page.GetMeanConfidence());
|
||||
}
|
||||
|
||||
public override void Dispose()
|
||||
|
||||
@@ -33,10 +33,11 @@ public static class OcrProvider
|
||||
}
|
||||
}
|
||||
|
||||
public static async Task<string> GetTextAsync(string imageUrl, CancellationToken cancellationToken)
|
||||
public static async Task<(string result, double confidence)> GetTextAsync(string imageUrl, CancellationToken cancellationToken)
|
||||
{
|
||||
if (backend is null)
|
||||
return "";
|
||||
return ("", -1);
|
||||
|
||||
try
|
||||
{
|
||||
return await backend.GetTextAsync(imageUrl, cancellationToken).ConfigureAwait(false);
|
||||
@@ -44,7 +45,7 @@ public static class OcrProvider
|
||||
catch (Exception e)
|
||||
{
|
||||
Config.Log.Warn(e, $"Failed to OCR image {imageUrl}");
|
||||
return "";
|
||||
return ("", 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user