mirror of
https://gitee.com/openharmony/third_party_rust_strsim-rs
synced 2024-11-23 07:39:51 +00:00
Limit Jaro-Winkler to max of 1.0, and fix float comparisons in tests
This commit is contained in:
parent
1a57f1a4a3
commit
089f3cc083
@ -19,6 +19,7 @@ strsim = "0.1.0"
|
||||
extern crate strsim;
|
||||
|
||||
use strsim::{hamming, levenshtein, jaro, jaro_winkler};
|
||||
use std::num::Float;
|
||||
|
||||
fn main() {
|
||||
match hamming("hamming", "hammers") {
|
||||
@ -28,9 +29,11 @@ fn main() {
|
||||
|
||||
assert_eq!(3, levenshtein("kitten", "sitting"));
|
||||
|
||||
assert!(0.392 - jaro("Friedrich Nietzsche", "Jean-Paul Sartre") < 0.001);
|
||||
assert!((0.392 - jaro("Friedrich Nietzsche", "Jean-Paul Sartre")).abs() <
|
||||
0.001);
|
||||
|
||||
assert!(0.911 - jaro_winkler("cheeseburger", "cheese fries") < 0.001);
|
||||
assert!((0.911 - jaro_winkler("cheeseburger", "cheese fries")).abs() <
|
||||
0.001);
|
||||
}
|
||||
```
|
||||
|
||||
|
50
src/lib.rs
50
src/lib.rs
@ -70,16 +70,23 @@ pub fn jaro(a: &str, b: &str) -> f64 {
|
||||
}
|
||||
}
|
||||
|
||||
// Does not limit the length of the common prefix
|
||||
pub fn jaro_winkler(a: &str, b: &str) -> f64 {
|
||||
let jaro_distance = jaro(a, b);
|
||||
|
||||
let prefix = a.chars()
|
||||
.zip(b.chars())
|
||||
.take_while(|&(a_char, b_char)| a_char == b_char)
|
||||
.count();
|
||||
// Don't limit the length of the common prefix
|
||||
let prefix_length = a.chars()
|
||||
.zip(b.chars())
|
||||
.take_while(|&(a_char, b_char)| a_char == b_char)
|
||||
.count();
|
||||
|
||||
jaro_distance + (0.1 * prefix as f64 * (1.0 - jaro_distance))
|
||||
let jaro_winkler_distance =
|
||||
jaro_distance + (0.1 * prefix_length as f64 * (1.0 - jaro_distance));
|
||||
|
||||
if jaro_winkler_distance <= 1.0 {
|
||||
jaro_winkler_distance
|
||||
} else {
|
||||
1.0
|
||||
}
|
||||
}
|
||||
|
||||
pub fn levenshtein(a: &str, b: &str) -> usize {
|
||||
@ -115,6 +122,7 @@ pub fn levenshtein(a: &str, b: &str) -> usize {
|
||||
mod tests {
|
||||
use super::*;
|
||||
use test::Bencher;
|
||||
use std::num::Float;
|
||||
|
||||
#[test]
|
||||
fn hamming_empty() {
|
||||
@ -178,23 +186,23 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn jaro_diff_short() {
|
||||
assert!(0.767 - jaro("dixon", "dicksonx") < 0.001);
|
||||
assert!((0.767 - jaro("dixon", "dicksonx")).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_diff_no_transposition() {
|
||||
assert!(0.822 - jaro("dwayne", "duane") < 0.001);
|
||||
assert!((0.822 - jaro("dwayne", "duane")).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_diff_with_transposition() {
|
||||
assert!(0.944 - jaro("martha", "marhta") < 0.001);
|
||||
assert!((0.944 - jaro("martha", "marhta")).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_names() {
|
||||
assert!((0.392 - jaro("Friedrich Nietzsche",
|
||||
"Jean-Paul Sartre")) < 0.001);
|
||||
"Jean-Paul Sartre")).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -219,45 +227,47 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn jaro_winkler_diff_short() {
|
||||
assert!(0.813 - jaro_winkler("dixon", "dicksonx") < 0.001);
|
||||
assert!(0.813 - jaro_winkler("dicksonx", "dixon") < 0.001);
|
||||
assert!((0.813 - jaro_winkler("dixon", "dicksonx")).abs() < 0.001);
|
||||
assert!((0.813 - jaro_winkler("dicksonx", "dixon")).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_winkler_diff_no_transposition() {
|
||||
assert!(0.840 - jaro_winkler("dwayne", "duane") < 0.001);
|
||||
assert!((0.840 - jaro_winkler("dwayne", "duane")).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_winkler_diff_with_transposition() {
|
||||
assert!(0.961 - jaro_winkler("martha", "marhta") < 0.001);
|
||||
assert!((0.961 - jaro_winkler("martha", "marhta")).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_winkler_names() {
|
||||
assert!((0.562 - jaro_winkler("Friedrich Nietzsche",
|
||||
"Fran-Paul Sartre")) < 0.001);
|
||||
"Fran-Paul Sartre")).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_winkler_long_prefix() {
|
||||
assert!(0.911 - jaro_winkler("cheeseburger", "cheese fries") < 0.001);
|
||||
assert!((0.911 - jaro_winkler("cheeseburger", "cheese fries")).abs() <
|
||||
0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_winkler_more_names() {
|
||||
assert!(0.868 - jaro_winkler("Thorkel", "Thorgier") < 0.001);
|
||||
assert!((0.868 - jaro_winkler("Thorkel", "Thorgier")).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_winkler_length_of_one() {
|
||||
assert!(0.738 - jaro_winkler("Dinsdale", "D") < 0.001);
|
||||
assert!((0.738 - jaro_winkler("Dinsdale", "D")).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_winkler_very_long_prefix() {
|
||||
assert!(1.0 - jaro_winkler("thequickbrownfoxjumpedoverx",
|
||||
"thequickbrownfoxjumpedovery") < 0.001);
|
||||
assert!((1.0 - jaro_winkler("thequickbrownfoxjumpedoverx",
|
||||
"thequickbrownfoxjumpedovery")).abs() <
|
||||
0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -1,6 +1,7 @@
|
||||
extern crate strsim;
|
||||
|
||||
use strsim::{hamming, levenshtein, jaro, jaro_winkler};
|
||||
use std::num::Float;
|
||||
|
||||
#[test]
|
||||
fn hamming_works() {
|
||||
@ -17,10 +18,12 @@ fn levenshtein_works() {
|
||||
|
||||
#[test]
|
||||
fn jaro_works() {
|
||||
assert!(0.392 - jaro("Friedrich Nietzsche", "Jean-Paul Sartre") < 0.001);
|
||||
assert!((0.392 - jaro("Friedrich Nietzsche", "Jean-Paul Sartre")).abs() <
|
||||
0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_winkler_works() {
|
||||
assert!(0.911 - jaro_winkler("cheeseburger", "cheese fries") < 0.001);
|
||||
assert!((0.911 - jaro_winkler("cheeseburger", "cheese fries")).abs() <
|
||||
0.001);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user