implement pluggable ocr provider and add tesseract and florence2 impl

This commit is contained in:
13xforever
2025-07-05 18:16:25 +05:00
parent e15464d4c0
commit 2259febe87
15 changed files with 401 additions and 107 deletions

View File

@@ -24,6 +24,8 @@ internal static class ContentFilterMonitor
if (message?.Author is null)
message = await e.Channel.GetMessageAsync(e.Message.Id).ConfigureAwait(false);
}
if (message.Attachments.Any())
MediaScreenshotMonitor.EnqueueOcrTask(message);
await ContentFilter.IsClean(c, message).ConfigureAwait(false);
}
}

View File

@@ -3,56 +3,54 @@ using CompatApiClient.Utils;
using CompatBot.Commands;
using CompatBot.Database;
using CompatBot.Database.Providers;
using CompatBot.Ocr;
using CompatBot.Utils.Extensions;
using Microsoft.Azure.CognitiveServices.Vision.ComputerVision;
using Microsoft.Azure.CognitiveServices.Vision.ComputerVision.Models;
namespace CompatBot.EventHandlers;
internal sealed class MediaScreenshotMonitor
{
private readonly ComputerVisionClient cvClient = new(new ApiKeyServiceClientCredentials(Config.AzureComputerVisionKey)) {Endpoint = Config.AzureComputerVisionEndpoint};
private readonly SemaphoreSlim workSemaphore = new(0);
private readonly ConcurrentQueue<(MessageCreatedEventArgs evt, Guid readOperationId)> workQueue = new ConcurrentQueue<(MessageCreatedEventArgs args, Guid readOperationId)>();
private static readonly SemaphoreSlim WorkSemaphore = new(0);
private static readonly ConcurrentQueue<(DiscordMessage msg, string imgUrl)> WorkQueue = new();
public DiscordClient Client { get; internal set; } = null!;
public static int MaxQueueLength { get; private set; }
public async Task OnMessageCreated(DiscordClient client, MessageCreatedEventArgs evt)
{
if (string.IsNullOrEmpty(Config.AzureComputerVisionKey))
if (!OcrProvider.IsAvailable)
return;
var message = evt.Message;
if (message == null)
if (evt.Message is not {} message)
return;
if (!Config.Moderation.OcrChannels.Contains(evt.Channel.Id))
return;
// if (!Config.Moderation.OcrChannels.Contains(evt.Channel.Id))
// return;
#if !DEBUG
if (message.Author.IsBotSafeCheck())
return;
#if !DEBUG
if (await message.Author.IsSmartlistedAsync(client).ConfigureAwait(false))
return;
#endif
EnqueueOcrTask(evt.Message);
}
public static void EnqueueOcrTask(DiscordMessage message)
{
if (!message.Attachments.Any())
return;
var images = Vision.GetImageAttachments(message).Select(att => att.Url)
var images = Vision.GetImageAttachments(message)
.Concat(Vision.GetImagesFromEmbeds(message))
.ToList();
var tasks = new List<Task<ReadHeaders>>(images.Count);
foreach (var url in images)
tasks.Add(cvClient.ReadAsync(url, cancellationToken: Config.Cts.Token));
foreach (var t in tasks)
{
try
{
var headers = await t.ConfigureAwait(false);
workQueue.Enqueue((evt, new(new Uri(headers.OperationLocation).Segments.Last())));
workSemaphore.Release();
WorkQueue.Enqueue((message, url));
WorkSemaphore.Release();
}
catch (Exception ex)
{
@@ -63,84 +61,66 @@ internal sealed class MediaScreenshotMonitor
public async Task ProcessWorkQueue()
{
if (string.IsNullOrEmpty(Config.AzureComputerVisionKey))
if (!OcrProvider.IsAvailable)
return;
Guid? reEnqueueId = null;
do
{
await workSemaphore.WaitAsync(Config.Cts.Token).ConfigureAwait(false);
await WorkSemaphore.WaitAsync(Config.Cts.Token).ConfigureAwait(false);
if (Config.Cts.IsCancellationRequested)
return;
MaxQueueLength = Math.Max(MaxQueueLength, workQueue.Count);
if (!workQueue.TryDequeue(out var item))
MaxQueueLength = Math.Max(MaxQueueLength, WorkQueue.Count);
if (!WorkQueue.TryDequeue(out var item))
continue;
if (item.readOperationId == reEnqueueId)
{
await Task.Delay(100).ConfigureAwait(false);
reEnqueueId = null;
if (Config.Cts.IsCancellationRequested)
return;
}
try
{
var result = await cvClient.GetReadResultAsync(item.readOperationId, Config.Cts.Token).ConfigureAwait(false);
if (result.Status == OperationStatusCodes.Succeeded)
if (await OcrProvider.GetTextAsync(item.imgUrl, Config.Cts.Token).ConfigureAwait(false) is {Length: >0} result
&& !Config.Cts.Token.IsCancellationRequested)
{
if (result.AnalyzeResult?.ReadResults?.SelectMany(r => r.Lines).Any() ?? false)
var cnt = true;
var prefix = $"[{item.msg.Id % 100:00}]";
var ocrTextBuf = new StringBuilder($"OCR result of message <{item.msg.JumpLink}>:").AppendLine();
Config.Log.Debug($"{prefix} OCR result of message {item.msg.JumpLink}:");
var duplicates = new HashSet<string>();
ocrTextBuf.AppendLine(result.Sanitize());
Config.Log.Debug($"{prefix} {result}");
if (cnt
&& await ContentFilter.FindTriggerAsync(FilterContext.Chat, result).ConfigureAwait(false) is Piracystring hit
&& duplicates.Add(hit.String))
{
var cnt = true;
var prefix = $"[{item.evt.Message.Id % 100:00}]";
var ocrTextBuf = new StringBuilder($"OCR result of message <{item.evt.Message.JumpLink}>:").AppendLine();
Config.Log.Debug($"{prefix} OCR result of message {item.evt.Message.JumpLink}:");
var duplicates = new HashSet<string>();
foreach (var r in result.AnalyzeResult.ReadResults)
foreach (var l in r.Lines)
FilterAction suppressFlags = 0;
if ("media".Equals(item.msg.Channel?.Name))
{
ocrTextBuf.AppendLine(l.Text.Sanitize());
Config.Log.Debug($"{prefix} {l.Text}");
if (cnt
&& await ContentFilter.FindTriggerAsync(FilterContext.Chat, l.Text).ConfigureAwait(false) is Piracystring hit
&& duplicates.Add(hit.String))
{
FilterAction suppressFlags = 0;
if ("media".Equals(item.evt.Channel.Name))
suppressFlags = FilterAction.SendMessage | FilterAction.ShowExplain;
await ContentFilter.PerformFilterActions(
Client,
item.evt.Message,
hit,
suppressFlags,
l.Text,
"🖼 Screenshot of an undesirable content",
"Screenshot of an undesirable content"
).ConfigureAwait(false);
cnt &= !hit.Actions.HasFlag(FilterAction.RemoveContent) && !hit.Actions.HasFlag(FilterAction.IssueWarning);
}
suppressFlags = FilterAction.SendMessage | FilterAction.ShowExplain;
}
await ContentFilter.PerformFilterActions(
Client,
item.msg,
hit,
suppressFlags,
result,
"🖼 Screenshot of an undesirable content",
"Screenshot of an undesirable content"
).ConfigureAwait(false);
cnt &= !hit.Actions.HasFlag(FilterAction.RemoveContent) && !hit.Actions.HasFlag(FilterAction.IssueWarning);
}
var ocrText = ocrTextBuf.ToString();
var hasVkDiagInfo = ocrText.Contains("Vulkan Diagnostics Tool v")
|| ocrText.Contains("VkDiag Version:");
if (!cnt || hasVkDiagInfo)
{
try
{
var botSpamCh = await Client.GetChannelAsync(Config.ThumbnailSpamId).ConfigureAwait(false);
await botSpamCh.SendAutosplitMessageAsync(ocrTextBuf, blockStart: "", blockEnd: "").ConfigureAwait(false);
}
catch (Exception ex)
{
Config.Log.Warn(ex);
}
var ocrText = ocrTextBuf.ToString();
var hasVkDiagInfo = ocrText.Contains("Vulkan Diagnostics Tool v")
|| ocrText.Contains("VkDiag Version:");
if (!cnt || hasVkDiagInfo)
try
{
var botSpamCh = await Client.GetChannelAsync(Config.ThumbnailSpamId).ConfigureAwait(false);
await botSpamCh.SendAutosplitMessageAsync(ocrTextBuf, blockStart: "", blockEnd: "").ConfigureAwait(false);
}
catch (Exception ex)
{
Config.Log.Warn(ex);
}
}
}
else if (result.Status is OperationStatusCodes.NotStarted or OperationStatusCodes.Running)
{
workQueue.Enqueue(item);
reEnqueueId ??= item.readOperationId;
workSemaphore.Release();
}
}
catch (Exception e)