From a5a3ff9d43059dfd23d7a5737cd615fcc5a50eb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nils=20F=C3=BCrni=C3=9F?= Date: Sat, 12 Mar 2022 22:07:44 +0100 Subject: [PATCH] Improve AniDB ID searching and matching --- CONTRIBUTORS.md | 1 + .../Configuration/PluginConfiguration.cs | 3 + .../Configuration/configPage.html | 7 + .../AniDB/Metadata/AniDbSeriesProvider.cs | 166 +++-- .../Providers/equals_check.cs | 583 ++++-------------- 5 files changed, 225 insertions(+), 535 deletions(-) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index a63f8b3..1d9fcef 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -5,6 +5,7 @@ - [hawken93](https://github.com/hawken93) - [dkanada](https://github.com/dkanada) - [StillLoading](https://github.com/StillLoading) + - [Nalsai](https://github.com/Nalsai) # Emby Contributors diff --git a/Jellyfin.Plugin.AniDB/Configuration/PluginConfiguration.cs b/Jellyfin.Plugin.AniDB/Configuration/PluginConfiguration.cs index d7dc8a7..a6fb8fe 100644 --- a/Jellyfin.Plugin.AniDB/Configuration/PluginConfiguration.cs +++ b/Jellyfin.Plugin.AniDB/Configuration/PluginConfiguration.cs @@ -30,6 +30,7 @@ namespace Jellyfin.Plugin.AniDB.Configuration public PluginConfiguration() { TitlePreference = TitlePreferenceType.Localized; + TitleSimilarityThreshold = 50; MaxGenres = 5; TidyGenreList = true; TitleCaseGenres = false; @@ -40,6 +41,8 @@ namespace Jellyfin.Plugin.AniDB.Configuration public TitlePreferenceType TitlePreference { get; set; } + public int TitleSimilarityThreshold { get; set; } + public int MaxGenres { get; set; } public bool TidyGenreList { get; set; } diff --git a/Jellyfin.Plugin.AniDB/Configuration/configPage.html b/Jellyfin.Plugin.AniDB/Configuration/configPage.html index a514a23..133d249 100644 --- a/Jellyfin.Plugin.AniDB/Configuration/configPage.html +++ b/Jellyfin.Plugin.AniDB/Configuration/configPage.html @@ -18,6 +18,11 @@ +
+ + +
Set this to zero to only automatically match if the title of the show is exactly the same. A value of one means that one character can be inserted or deleted.
+
@@ -72,6 +77,7 @@ ApiClient.getPluginConfiguration(AniDBConfigurationPage.pluginUniqueId).then(function (config) { document.getElementById('titleLanguage').value = config.TitlePreference; + document.getElementById('chkTitleSimilarityThreshold').value = config.TitleSimilarityThreshold; document.getElementById('chkMaxGenres').value = config.MaxGenres; document.getElementById('chkTitleCaseGenres').checked = config.TitleCaseGenres; document.getElementById('chkTidyGenres').checked = config.TidyGenreList; @@ -88,6 +94,7 @@ ApiClient.getPluginConfiguration(AniDBConfigurationPage.pluginUniqueId).then(function (config) { config.TitlePreference = document.getElementById('titleLanguage').value; + config.TitleSimilarityThreshold = document.getElementById('chkTitleSimilarityThreshold').value; config.MaxGenres = document.getElementById('chkMaxGenres').value; config.TitleCaseGenres = document.getElementById('chkTitleCaseGenres').checked; config.TidyGenreList = document.getElementById('chkTidyGenres').checked; diff --git a/Jellyfin.Plugin.AniDB/Providers/AniDB/Metadata/AniDbSeriesProvider.cs b/Jellyfin.Plugin.AniDB/Providers/AniDB/Metadata/AniDbSeriesProvider.cs index eb0ad3e..c95e1df 100644 --- a/Jellyfin.Plugin.AniDB/Providers/AniDB/Metadata/AniDbSeriesProvider.cs +++ b/Jellyfin.Plugin.AniDB/Providers/AniDB/Metadata/AniDbSeriesProvider.cs @@ -2,7 +2,6 @@ using System.Collections.Generic; using System.Globalization; using System.IO; -using System.IO.Compression; using System.Linq; using System.Text; using System.Text.RegularExpressions; @@ -12,11 +11,9 @@ using System.Xml; using System.Xml.Linq; using System.Xml.Serialization; using System.Net.Http; -using System.Net.Http.Headers; using Jellyfin.Plugin.AniDB.Configuration; using Jellyfin.Plugin.AniDB.Providers.AniDB.Identity; using MediaBrowser.Common.Configuration; -using MediaBrowser.Common.Net; using MediaBrowser.Controller.Entities; using MediaBrowser.Controller.Entities.TV; using MediaBrowser.Controller.Providers; @@ -47,71 +44,109 @@ namespace Jellyfin.Plugin.AniDB.Providers.AniDB.Metadata public AniDbSeriesProvider(IApplicationPaths appPaths) { _appPaths = appPaths; - TitleMatcher = AniDbTitleMatcher.DefaultInstance; - Current = this; } private static AniDbSeriesProvider Current { get; set; } private IAniDbTitleMatcher TitleMatcher { get; set; } public int Order => -1; + public string Name => "AniDB"; public async Task> GetMetadata(SeriesInfo info, CancellationToken cancellationToken) { var result = new MetadataResult(); + var animeId = info.ProviderIds.GetOrDefault(ProviderNames.AniDb); - var aid = info.ProviderIds.GetOrDefault(ProviderNames.AniDb); - if (string.IsNullOrEmpty(aid) && !string.IsNullOrEmpty(info.Name)) + if (string.IsNullOrEmpty(animeId) && !string.IsNullOrEmpty(info.Name)) { - aid = await Equals_check.Fast_xml_search(info.Name, info.Name, cancellationToken, true); - if (string.IsNullOrEmpty(aid)) - { - aid = await Equals_check.Fast_xml_search(await Equals_check.Clear_name(info.Name, cancellationToken), await Equals_check.Clear_name(info.Name, cancellationToken), cancellationToken, true); - } + animeId = await Equals_check.XmlFindId(info.Name, cancellationToken); } - if (!string.IsNullOrEmpty(aid)) + if (!string.IsNullOrEmpty(animeId)) { - result.Item = new Series(); - result.HasMetadata = true; - - result.Item.ProviderIds.Add(ProviderNames.AniDb, aid); - - var seriesDataPath = await GetSeriesData(_appPaths, aid, cancellationToken); - await FetchSeriesInfo(result, seriesDataPath, info.MetadataLanguage ?? "en").ConfigureAwait(false); + result = await GetMetadataForId(animeId, info, cancellationToken); } return result; } - public string Name => "AniDB"; + private async Task> GetMetadataForId(string animeId, SeriesInfo info, CancellationToken cancellationToken) + { + var result = new MetadataResult(); + + result.Item = new Series(); + result.HasMetadata = true; + + result.Item.ProviderIds.Add(ProviderNames.AniDb, animeId); + + var seriesDataPath = await GetSeriesData(_appPaths, animeId, cancellationToken); + await FetchSeriesInfo(result, seriesDataPath, info.MetadataLanguage ?? "en").ConfigureAwait(false); + + return result; + } public async Task> GetSearchResults(SeriesInfo searchInfo, CancellationToken cancellationToken) { - var metadata = await GetMetadata(searchInfo, cancellationToken).ConfigureAwait(false); + var results = new List(); + var animeId = searchInfo.ProviderIds.GetOrDefault(ProviderNames.AniDb); - var list = new List(); - - if (metadata.HasMetadata) + if (!string.IsNullOrEmpty(animeId)) { - var seriesId = metadata.Item.ProviderIds.GetOrDefault(ProviderNames.AniDb); - var imageProvider = new AniDbImageProvider(_appPaths); - var images = await imageProvider.GetImages(seriesId, cancellationToken); - var res = new RemoteSearchResult - { - Name = metadata.Item.Name, - PremiereDate = metadata.Item.PremiereDate, - ProductionYear = metadata.Item.ProductionYear, - ImageUrl = images.Any() ? images.First().Url : null, - ProviderIds = metadata.Item.ProviderIds, - SearchProviderName = Name - }; + var resultMetadata = await GetMetadataForId(animeId, searchInfo, cancellationToken); - list.Add(res); + if (resultMetadata.HasMetadata) + { + var imageProvider = new AniDbImageProvider(_appPaths); + var images = await imageProvider.GetImages(animeId, cancellationToken); + results.Add(MetadataToRemoteSearchResult(resultMetadata, images)); + } } - return list; + if (!string.IsNullOrEmpty(searchInfo.Name)) + { + List name_results = await GetSearchResultsByName(searchInfo.Name, searchInfo, cancellationToken).ConfigureAwait(false); + + foreach (var media in name_results) + { + results.Add(media); + } + } + + return results; + } + + public async Task> GetSearchResultsByName(string name, SeriesInfo searchInfo, CancellationToken cancellationToken) + { + var imageProvider = new AniDbImageProvider(_appPaths); + var results = new List(); + + List ids = await Equals_check.XmlSearch(name, cancellationToken); + + foreach (string id in ids) + { + var resultMetadata = await GetMetadataForId(id, searchInfo, cancellationToken); + + if (resultMetadata.HasMetadata) + { + var images = await imageProvider.GetImages(id, cancellationToken); + results.Add(MetadataToRemoteSearchResult(resultMetadata, images)); + } + } + return results; + } + + public RemoteSearchResult MetadataToRemoteSearchResult(MetadataResult metadata, IEnumerable images) + { + return new RemoteSearchResult + { + Name = metadata.Item.Name, + ProductionYear = metadata.Item.PremiereDate?.Year, + PremiereDate = metadata.Item.PremiereDate, + ImageUrl = images.Any() ? images.First().Url : null, + ProviderIds = metadata.Item.ProviderIds, + SearchProviderName = ProviderNames.AniDb + }; } public async Task GetImageResponse(string url, CancellationToken cancellationToken) @@ -123,7 +158,7 @@ namespace Jellyfin.Plugin.AniDB.Providers.AniDB.Metadata public static async Task GetSeriesData(IApplicationPaths appPaths, string seriesId, CancellationToken cancellationToken) { - var dataPath = CalculateSeriesDataPath(appPaths, seriesId); + var dataPath = GetSeriesDataPath(appPaths, seriesId); var seriesDataPath = Path.Combine(dataPath, SeriesDataFile); var fileInfo = new FileInfo(seriesDataPath); @@ -136,11 +171,6 @@ namespace Jellyfin.Plugin.AniDB.Providers.AniDB.Metadata return seriesDataPath; } - public static string CalculateSeriesDataPath(IApplicationPaths paths, string seriesId) - { - return Path.Combine(paths.CachePath, "anidb", "series", seriesId); - } - private async Task FetchSeriesInfo(MetadataResult result, string seriesDataPath, string preferredMetadataLangauge) { var series = result.Item; @@ -489,7 +519,8 @@ namespace Jellyfin.Plugin.AniDB.Providers.AniDB.Metadata } } - private PersonInfo CreatePerson(string name, string type, string role = null) { + private PersonInfo CreatePerson(string name, string type, string role = null) + { // todo find nationality of person and conditionally reverse name order if (!_typeMappings.TryGetValue(type, out string mappedType)) @@ -829,23 +860,9 @@ namespace Jellyfin.Plugin.AniDB.Providers.AniDB.Metadata /// The app paths. /// The series id. /// System.String. - internal static string GetSeriesDataPath(IApplicationPaths appPaths, string seriesId) + public static string GetSeriesDataPath(IApplicationPaths appPaths, string seriesId) { - var seriesDataPath = Path.Combine(GetSeriesDataPath(appPaths), seriesId); - - return seriesDataPath; - } - - /// - /// Gets the series data path. - /// - /// The app paths. - /// System.String. - internal static string GetSeriesDataPath(IApplicationPaths appPaths) - { - var dataPath = Path.Combine(appPaths.CachePath, "anidb\\series"); - - return dataPath; + return Path.Combine(appPaths.CachePath, "anidb", "series", seriesId); } private struct GenreInfo @@ -899,30 +916,5 @@ namespace Jellyfin.Plugin.AniDB.Providers.AniDB.Metadata titlesList.FirstOrDefault(t => t.Type == "main") ?? titlesList.FirstOrDefault(); } - - /// - /// Gets the series data path. - /// - /// The app paths. - /// The series id. - /// System.String. - internal static string GetSeriesDataPath(IApplicationPaths appPaths, string seriesId) - { - var seriesDataPath = Path.Combine(GetSeriesDataPath(appPaths), seriesId); - - return seriesDataPath; - } - - /// - /// Gets the series data path. - /// - /// The app paths. - /// System.String. - internal static string GetSeriesDataPath(IApplicationPaths appPaths) - { - var dataPath = Path.Combine(appPaths.CachePath, "tvdb"); - - return dataPath; - } } } diff --git a/Jellyfin.Plugin.AniDB/Providers/equals_check.cs b/Jellyfin.Plugin.AniDB/Providers/equals_check.cs index 64fd972..31e43b0 100644 --- a/Jellyfin.Plugin.AniDB/Providers/equals_check.cs +++ b/Jellyfin.Plugin.AniDB/Providers/equals_check.cs @@ -1,11 +1,9 @@ using System; using System.Collections.Generic; using System.IO; -using System.Linq; using System.Text.RegularExpressions; using System.Threading; using System.Threading.Tasks; -using System.Xml.Linq; using Jellyfin.Plugin.AniDB.Providers.AniDB.Identity; using Microsoft.Extensions.Logging; @@ -20,132 +18,96 @@ namespace Jellyfin.Plugin.AniDB.Providers _logger = logger; } - /// - /// Clear name - /// - /// - /// - public async static Task Clear_name(string a, CancellationToken cancellationToken) - { - try - { - a = a.Trim().Replace(await One_line_regex(new Regex(@"(?s) \(.*?\)"), a.Trim(), cancellationToken, 0), ""); - } - catch (Exception) - { } - a = a.Replace(".", " "); - a = a.Replace("-", " "); - a = a.Replace("`", ""); - a = a.Replace("'", ""); - a = a.Replace("&", "and"); - a = a.Replace("(", ""); - a = a.Replace(")", ""); - try - { - a = a.Replace(await One_line_regex(new Regex(@"(?s)(S[0-9]+)"), a.Trim(), cancellationToken), await One_line_regex(new Regex(@"(?s)S([0-9]+)"), a.Trim(), cancellationToken)); - } - catch (Exception) - { - } - return a; - } - - /// - /// Clear name heavy. - /// Example: Text & Text to Text and Text - /// - /// - /// - public async static Task Clear_name_step2(string a, CancellationToken cancellationToken) - { - if(a.Contains("Gekijyouban")) - a= (a.Replace("Gekijyouban", "") + " Movie").Trim(); - if (a.Contains("gekijyouban")) - a = (a.Replace("gekijyouban", "") + " Movie").Trim(); - try - { - a = a.Trim().Replace(await One_line_regex(new Regex(@"(?s) \(.*?\)"), a.Trim(), cancellationToken, 0), ""); - } - catch (Exception) - { } - a = a.Replace(".", " "); - a = a.Replace("-", " "); - a = a.Replace("`", ""); - a = a.Replace("'", ""); - a = a.Replace("&", "and"); - a = a.Replace(":", ""); - a = a.Replace("␣", ""); - a = a.Replace("2wei", "zwei"); - a = a.Replace("3rei", "drei"); - a = a.Replace("4ier", "vier"); - return a; - } - - /// - /// If a and b match it return true - /// - /// - /// - /// - public async static Task Compare_strings(string a, string b, CancellationToken cancellationToken) - { - if (!string.IsNullOrEmpty(a) && !string.IsNullOrEmpty(b)) - { - if (await Simple_compare(a, b, cancellationToken)) - return true; - if (await Fast_xml_search(a, b, cancellationToken)) - return true; - - return false; - } - return false; - } - /// /// Cut p(%) away from the string /// - /// - /// + /// + /// /// /// - public async static Task Half_string(string string_, CancellationToken cancellationToken, int min_lenght = 0, int p = 50) + public static string ShortenString(string input, int minLenght = 0, int p = 50) { decimal length = 0; - if (await Task.Run(() => ((int)((decimal)string_.Length - (((decimal)string_.Length / 100m) * (decimal)p)) > min_lenght), cancellationToken)) + if ((int)((decimal)input.Length - (((decimal)input.Length / 100m) * (decimal)p)) > minLenght) { - length = (decimal)string_.Length - (((decimal)string_.Length / 100m) * (decimal)p); + length = (decimal)input.Length - (((decimal)input.Length / 100m) * (decimal)p); } else { - if (string_.Length < min_lenght) + if (input.Length < minLenght) { - length = string_.Length; + length = input.Length; } else { - length = min_lenght; + length = minLenght; } } - return string_.Substring(0, (int)length); + return input.Substring(0, (int)length); + } + + /// + /// Escape string for regex, but fuzzy + /// + /// + /// + public static string FuzzyRegexEscape(string a) + { + a = Regex.Escape(a); + + // make characters that were escaped fuzzy + a = a.Replace(@"\\", ".?"); + a = a.Replace(@"\*", ".?"); + a = a.Replace(@"\+", ".?"); + a = a.Replace(@"\?", ".?"); + a = a.Replace(@"\|", ".?"); + a = a.Replace(@"\{", ".?"); + a = a.Replace(@"\[", ".?"); + a = a.Replace(@"\(", ".?"); + a = a.Replace(@"\)", ".?"); + a = a.Replace(@"\^", ".?"); + a = a.Replace(@"\$", ".?"); + a = a.Replace(@"\.", ".?"); + a = a.Replace(@"\#", ".?"); + + // whitespace + a = a.Replace(@"\ ", ".?.?.?"); + a = Regex.Replace(a, @"\s", ".?.?.?"); + + // other characters + a = Regex.Replace(a, @"[  !,–—_=~'`‚‘’„“”:;␣#@<>}\]\/\-]", ".?"); + + // "words" + a = Regex.Replace(a, @"s\b", ".?s"); + a = a.Replace("c", "(c|k)", StringComparison.OrdinalIgnoreCase); + a = a.Replace("k", "(c|k)", StringComparison.OrdinalIgnoreCase); + a = a.Replace("&", "(&|(and))", StringComparison.OrdinalIgnoreCase); + a = a.Replace("and", "(&|(and))", StringComparison.OrdinalIgnoreCase); + a = a.Replace("OVA", "((OVA)|(OAD))", StringComparison.OrdinalIgnoreCase); + a = a.Replace("OAD", "((OVA)|(OAD))", StringComparison.OrdinalIgnoreCase); + a = a.Replace("re", "re.?", StringComparison.OrdinalIgnoreCase); + a = a.Replace("Gekijyouban", "Gekijouban", StringComparison.OrdinalIgnoreCase); + a = a.Replace("to aru", "to.?aru", StringComparison.OrdinalIgnoreCase); + + return a; } /// /// simple regex /// - /// - /// + /// + /// /// - /// + /// /// - public async static Task One_line_regex(Regex regex, string match, CancellationToken cancellationToken, int group = 1, int match_int = 0) + public async static Task OneLineRegex(string pattern, string input, CancellationToken cancellationToken, int group = 1, int matchInt = 0) { - Regex _regex = regex; int x = 0; - foreach (Match _match in regex.Matches(match)) + foreach (Match match in Regex.Matches(input, pattern, RegexOptions.IgnoreCase)) { - if (x == match_int) + if (x == matchInt) { - return await Task.Run(() => _match.Groups[group].Value.ToString(), cancellationToken); + return await Task.Run(() => match.Groups[group].Value, cancellationToken); } x++; } @@ -153,395 +115,120 @@ namespace Jellyfin.Plugin.AniDB.Providers } /// - ///Return true if a and b match return false if not - ///It loads the titles.xml on exceptions + /// Searches for possible AniDB IDs for name /// - private async static Task Fast_xml_search(string a, string b, CancellationToken cancellationToken, bool return_AniDBid = false, bool retry = false) + public async static Task> XmlSearch(string name, CancellationToken cancellationToken, int x_ = 0) { - //Get AID aid=\"([s\S].*)\"> + var results = new List(); + try { - List pre_aid = new List(); - string xml = File.ReadAllText(Get_anidb_xml_file()); + string xml = File.ReadAllText(GetAnidbXml()); + string s = "-"; int x = 0; - string s1 = "-"; - string s2 = "-"; - while (!string.IsNullOrEmpty(s1) && !string.IsNullOrEmpty(s2)) + while (!string.IsNullOrEmpty(s)) { - s1 = await One_line_regex(new Regex("(?>[^<>]+|<(?!\/anime>)[^<>]*>)*?" + Regex.Escape(await Half_string(a, cancellationToken,4))), xml, cancellationToken,1, x); - if (s1 != "") + s = await OneLineRegex(@"(?>[^<>]+|<(?!\/anime>)[^<>]*>)*?.*" + await Task.Run(() => FuzzyRegexEscape(ShortenString(name, 6, 20)), cancellationToken), xml, cancellationToken, 1, x); + if (s != "") { - pre_aid.Add(s1); - } - s2 = await One_line_regex(new Regex("(?>[^<>]+|<(?!\/anime>)[^<>]*>)*?" + Regex.Escape(await Half_string(b, cancellationToken,4))), xml, cancellationToken, 1, x); - if (s1 != "") - { - if (s1 != s2) - { - pre_aid.Add(s2); - } + results.Add(s); } x++; } - foreach (string _aid in pre_aid) - { - XElement doc = await Task.Run(async () => XElement.Parse("" + "" + await One_line_regex(await Task.Run(() => new Regex("" + @"(?s)(.*?)<\/anime>"), cancellationToken), xml, cancellationToken, 0) + ""), cancellationToken); - var a_ = from page in doc.Elements("anime") - where _aid == page.Attribute("aid").Value - select page; - if (await Simple_compare( a_.Elements("title"), b, cancellationToken) && await Simple_compare(a_.Elements("title"), a, cancellationToken)) - { - return true; - } - } - return false; } catch (Exception) { - if (retry) - { - return false; - } - else + if (x_ == 0) { await Task.Run(() => AniDbTitleDownloader.Load_static(cancellationToken), cancellationToken); - return await Fast_xml_search(a, b, cancellationToken, false, true); + return await XmlSearch(name, cancellationToken, 1); } } + + return results; } /// - /// Return the AniDB ID if a and b match + /// Finds an AniDB ID for name /// - public async static Task Fast_xml_search(string a, string b, CancellationToken cancellationToken, bool return_AniDBid, int x_ = 0) + public async static Task XmlFindId(string name, CancellationToken cancellationToken, int x_ = 0) { - //Get AID aid=\"([s\S].*)\"> - try + var results = await XmlSearch(name, cancellationToken); + + if (results.Count == 1) { - List pre_aid = new List(); - string xml = File.ReadAllText(Get_anidb_xml_file()); - int x = 0; - string s1 = "-"; - string s2 = "-"; - while (!string.IsNullOrEmpty(s1) && !string.IsNullOrEmpty(s2)) + return results[0]; + } + + string xml = File.ReadAllText(GetAnidbXml()); + int lowestDistance = Plugin.Instance.Configuration.TitleSimilarityThreshold; + string currentId = ""; + foreach (string id in results) + { + string nameXmlFromId = await OneLineRegex(@"", xml, cancellationToken); + + string[] lines = nameXmlFromId.Split( + new string[] { "\r\n", "\r", "\n" }, + StringSplitOptions.None + ); + + foreach (string line in lines) { - s1 = await One_line_regex(new Regex("(?>[^<>]+|<(?!\/anime>)[^<>]*>)*?" + Regex.Escape(await Half_string(a, cancellationToken, 4))), xml, cancellationToken, 1, x); - if (s1 != "") + string nameFromId = await OneLineRegex(@"([^<]+)", line, cancellationToken); + + if (!String.IsNullOrEmpty(nameFromId)) { - pre_aid.Add(s1); - } - s2 = await One_line_regex(new Regex("(?>[^<>]+|<(?!\/anime>)[^<>]*>)*?" + Regex.Escape(await Half_string(b, cancellationToken, 4))), xml, cancellationToken, 1, x); - if (s1 != "") - { - if (s1 != s2) + int stringDistance = LevenshteinDistance(name, nameFromId); + if (lowestDistance > stringDistance) { - pre_aid.Add(s2); + lowestDistance = stringDistance; + currentId = id; } } - x++; } - if (pre_aid.Count == 1) - { - if (!string.IsNullOrEmpty(pre_aid[0])) - { - return pre_aid[0]; - } - } - int biggestcount = 0; - string cache_aid=""; - if (a == b) - { - foreach (string _aid in pre_aid) - { - string result= await One_line_regex(new Regex(@""), xml, cancellationToken); - int count = (result.Length - result.Replace(a, "").Length)/a.Length; - if(biggestcount< count) - { - biggestcount = count; - cache_aid =_aid; - } - } - if (!string.IsNullOrEmpty(cache_aid)) - { - return cache_aid; - } - } - foreach (string _aid in pre_aid) - { - XElement doc = XElement.Parse("" + "" +await One_line_regex(new Regex("" + @"(?s)(.*?)<\/anime>"), xml, cancellationToken,0, 0) + ""); - var a_ = from page in doc.Elements("anime") - where _aid == page.Attribute("aid").Value - select page; + } + return currentId; + } - if (await Simple_compare(a_.Elements("title"), b, cancellationToken) && await Simple_compare(a_.Elements("title"), a, cancellationToken)) - { - return _aid; - } - } - return ""; - } - catch (Exception) + /// + /// Calculates the Levenshtein distance - a metric for measuring the difference between two strings. + /// The higher the number, the more different the two strings are. + /// + private static int LevenshteinDistance(string str1, string str2) + { + var str1Length = str1.Length; + var str2Length = str2.Length; + + if (str1Length == 0) + return str2Length; + + if (str2Length == 0) + return str1Length; + + var matrix = new int[str1Length + 1, str2Length + 1]; + + for (var i = 0; i <= str1Length; matrix[i, 0] = i++) { } + for (var j = 0; j <= str2Length; matrix[0, j] = j++) { } + for (var i = 1; i <= str1Length; i++) { - if (x_ == 1) + for (var j = 1; j <= str2Length; j++) { - return ""; - } - else - { - await Task.Run(() => AniDbTitleDownloader.Load_static(cancellationToken), cancellationToken); - return await Fast_xml_search(a, b, cancellationToken, true, 1); + var cost = (str2[j - 1] == str1[i - 1]) ? 0 : 1; + matrix[i, j] = Math.Min( + Math.Min(matrix[i - 1, j] + 1, matrix[i, j - 1] + 1), + matrix[i - 1, j - 1] + cost); } } + return matrix[str1Length, str2Length]; } /// /// get file Path from anidb xml file /// /// - private static string Get_anidb_xml_file() + private static string GetAnidbXml() { return AniDbTitleDownloader.TitlesFilePath_; } - - /// - /// Compare 2 Strings, and it just works - /// SeriesA S2 == SeriesA Second Season | True; - /// - private async static Task Simple_compare(string a, string b, CancellationToken cancellationToken, bool fastmode = false) - { - if (fastmode) - { - if (a[0] == b[0]) - { - } - else - { - return false; - } - } - - if (await Core_compare(a, b, cancellationToken)) - return true; - if (await Core_compare(b, a, cancellationToken)) - return true; - - return false; - } - - /// - /// Compare 2 Strings, and it just works - /// - private async static Task Core_compare(string a, string b, CancellationToken cancellationToken) - { - if (a == b) - return true; - - a = a.ToLower().Replace(" ", "").Trim().Replace(".", ""); - b = b.ToLower().Replace(" ", "").Trim().Replace(".", ""); - - if (await Clear_name(a, cancellationToken) == await Clear_name(b, cancellationToken)) - return true; - if (await Clear_name_step2(a, cancellationToken) == await Clear_name_step2(b, cancellationToken)) - return true; - if (a.Replace("-", " ") == b.Replace("-", " ")) - return true; - if (a.Replace(" 2", ":secondseason") == b.Replace(" 2", ":secondseason")) - return true; - if (a.Replace("2", "secondseason") == b.Replace("2", "secondseason")) - return true; - if (await Convert_symbols_too_numbers(a, "I", cancellationToken) == await Convert_symbols_too_numbers(b, "I", cancellationToken)) - return true; - if (await Convert_symbols_too_numbers(a, "!", cancellationToken) == await Convert_symbols_too_numbers(b, "!", cancellationToken)) - return true; - if (a.Replace("ndseason", "") == b.Replace("ndseason", "")) - return true; - if (a.Replace("ndseason", "") == b) - return true; - if (await One_line_regex(new Regex(@"((.*)s([0 - 9]))"), a, cancellationToken, 2) + await One_line_regex(new Regex(@"((.*)s([0 - 9]))"), a, cancellationToken, 3) == await One_line_regex(new Regex(@"((.*)s([0 - 9]))"), b, cancellationToken, 2) + await One_line_regex(new Regex(@"((.*)s([0 - 9]))"), b, cancellationToken, 3)) - if (!string.IsNullOrEmpty(await One_line_regex(new Regex(@"((.*)s([0 - 9]))"), a, cancellationToken, 2) + await One_line_regex(new Regex(@"((.*)s([0 - 9]))"), a, cancellationToken, 3))) - return true; - if (await One_line_regex(new Regex(@"((.*)s([0 - 9]))"), a, cancellationToken, 2) + await One_line_regex(new Regex(@"((.*)s([0 - 9]))"), a, cancellationToken, 3) == b) - if (!string.IsNullOrEmpty(await One_line_regex(new Regex(@"((.*)s([0 - 9]))"), a, cancellationToken, 2) + await One_line_regex(new Regex(@"((.*)s([0 - 9]))"), a, cancellationToken, 3))) - return true; - if (a.Replace("rdseason", "") == b.Replace("rdseason", "")) - return true; - if (a.Replace("rdseason", "") == b) - return true; - try - { - if (a.Replace("2", "secondseason").Replace(await One_line_regex(new Regex(@"(?s)\(.*?\)"), a, cancellationToken, 0), "") == b.Replace("2", "secondseason").Replace(await One_line_regex(new Regex(@"(?s)\(.*?\)"), b, cancellationToken, 0), "")) - return true; - } - catch (Exception) - { - } - try - { - if (a.Replace("2", "secondseason").Replace(await One_line_regex(new Regex(@"(?s)\(.*?\)"), a, cancellationToken, 0), "") == b) - return true; - } - catch (Exception) - { - } - try - { - if (a.Replace(" 2", ":secondseason").Replace(await One_line_regex(new Regex(@"(?s)\(.*?\)"), a, cancellationToken, 0), "") == b.Replace(" 2", ":secondseason").Replace(await One_line_regex(new Regex(@"(?s)\(.*?\)"), b, cancellationToken, 0), "")) - return true; - } - catch (Exception) - { - } - try - { - if (a.Replace(" 2", ":secondseason").Replace(await One_line_regex(new Regex(@"(?s)\(.*?\)"), a, cancellationToken, 0), "") == b) - return true; - } - catch (Exception) - { - } - try - { - if (a.Replace(await One_line_regex(new Regex(@"(?s)\(.*?\)"), a, cancellationToken, 0), "") == b.Replace(await One_line_regex(new Regex(@"(?s)\(.*?\)"), b, cancellationToken, 0), "")) - return true; - } - catch (Exception) - { - } - try - { - if (a.Replace(await One_line_regex(new Regex(@"(?s)\(.*?\)"), a, cancellationToken, 0), "") == b) - return true; - } - catch (Exception) - { - } - try - { - if (b.Replace(await One_line_regex(new Regex(@"(?s)\(.*?\)"), b, cancellationToken, 0), "").Replace(" 2", ": second Season") == a) - return true; - } - catch (Exception) - { - } - try - { - if (a.Replace(" 2ndseason", ":secondseason") + " vs " + b == a) - return true; - } - catch (Exception) - { - } - try - { - if (a.Replace(await One_line_regex(new Regex(@"(?s)\(.*?\)"), a, cancellationToken, 0), "").Replace(" 2", ":secondseason") == b) - return true; - } - catch (Exception) - { - } - return false; - } - - /// - /// Example: Convert II to 2 - /// - /// - /// - /// - private async static Task Convert_symbols_too_numbers(string input, string symbol, CancellationToken cancellationToken) - { - try - { - string regex_c = "_"; - int x = 0; - int highest_number = 0; - while (!string.IsNullOrEmpty(regex_c)) - { - regex_c = (await One_line_regex(new Regex(@"(" + symbol + @"+)"), input.ToLower().Trim(), cancellationToken, 1, x)).Trim(); - if (highest_number < regex_c.Count()) - highest_number = regex_c.Count(); - x++; - } - x = 0; - string output = ""; - while (x != highest_number) - { - output = output + symbol; - x++; - } - output = input.Replace(output, highest_number.ToString()); - if (string.IsNullOrEmpty(output)) - { - output = input; - } - return output; - } - catch (Exception) - { - return input; - } - } - - /// - /// Simple Compare a XElemtent with a string - /// - /// - /// - /// - private async static Task Simple_compare(IEnumerable a_, string b, CancellationToken cancellationToken) - { - bool ignore_date = true; - string a_date = ""; - string b_date = ""; - - string b_date_ = await One_line_regex(new Regex(@"([0-9][0-9][0-9][0-9])"), b, cancellationToken); - if (!string.IsNullOrEmpty(b_date_)) - { - b_date = b_date_; - } - if (!string.IsNullOrEmpty(b_date)) - { - foreach (XElement a in a_) - { - if (ignore_date) - { - string a_date_ = await One_line_regex(new Regex(@"([0-9][0-9][0-9][0-9])"), a.Value, cancellationToken); - if (!string.IsNullOrEmpty(a_date_)) - { - a_date = a_date_; - ignore_date = false; - } - } - } - } - if (!ignore_date) - { - if (a_date.Trim()==b_date.Trim()) - { - foreach (XElement a in a_) - { - if (await Simple_compare(a.Value, b, cancellationToken, true)) - return true; - } - } - else - { - return false; - } - return false; - } - else - { - foreach (XElement a in a_) - { - if (ignore_date) - { - if (await Simple_compare(a.Value, b, cancellationToken, true)) - return true; - } - } - return false; - } - } } -} +} \ No newline at end of file