diff --git a/CompatBot/Utils/Extensions/StringUtils.cs b/CompatBot/Utils/Extensions/StringUtils.cs index 433bfb4e..ee4eff2d 100644 --- a/CompatBot/Utils/Extensions/StringUtils.cs +++ b/CompatBot/Utils/Extensions/StringUtils.cs @@ -372,6 +372,12 @@ namespace CompatBot.Utils private static double GetScoreWithAcronym(this string strA, string strB) { + if (strA.Length > strB.Length) //workaround for the library bug + { + var tmp = strA; + strA = strB; + strB = tmp; + } var fullMatch = strA.DiceCoefficient(strB); var acronymMatch = strA.DiceCoefficient(strB.GetAcronym().ToLowerInvariant()); return Math.Max(fullMatch, acronymMatch); diff --git a/Tests/StringUtilTests.cs b/Tests/StringUtilTests.cs index 33387877..3d434275 100644 --- a/Tests/StringUtilTests.cs +++ b/Tests/StringUtilTests.cs @@ -1,5 +1,8 @@ -using System.Text; +using System.Collections.Generic; +using System.Linq; +using System.Text; using CompatBot.Utils; +using DuoVia.FuzzyStrings; using HomoglyphConverter; using NUnit.Framework; @@ -55,5 +58,37 @@ namespace Tests [TestCase("a grey and white cat sitting in front of a window", ExpectedResult = "a grey and white kot sitting in front of a window")] public string FixKotTest(string input) => input.FixKot(); + + [TestCase("minesweeeper", "minesweeper")] + [TestCase("minesweeeeeeeeeeeeeeeeeeper", "minesweeper")] + [TestCase("ee", "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee")] + [TestCase("eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee", "ee")] + public void DiceCoefficientRangeTest(string strA, string strB) + { + var coef = DiceCoefficient(strA, strB); + if (strA.Length > strB.Length) + { + var tmp = strA; + strA = strB; + strB = tmp; + } + Assert.That(coef, Is.GreaterThanOrEqualTo(0.0).And.LessThanOrEqualTo(1.0)); + Assert.That(coef, Is.EqualTo(strA.DiceCoefficient(strB))); + } + + public static double DiceCoefficient(string input, string comparedTo) + { + var ngrams = input.ToBiGrams(); + var compareToNgrams = comparedTo.ToBiGrams(); + return DiceCoefficient(ngrams, compareToNgrams); + } + + public static double DiceCoefficient(string[] nGrams, string[] compareToNGrams) + { + var matches = nGrams.Intersect(compareToNGrams).Count(); + if (matches == 0) return 0.0d; + double totalBigrams = nGrams.Length + compareToNGrams.Length; + return (2 * matches) / totalBigrams; + } } }