2016-08-24 01:50:27 +00:00
|
|
|
# strsim-rs [![Crates.io](https://img.shields.io/crates/v/strsim.svg)](https://crates.io/crates/strsim) [![Crates.io](https://img.shields.io/crates/l/strsim.svg?maxAge=2592000)](https://github.com/dguo/strsim-rs/blob/master/LICENSE) [![Linux build status](https://travis-ci.org/dguo/strsim-rs.svg?branch=master)](https://travis-ci.org/dguo/strsim-rs) [![Windows build status](https://ci.appveyor.com/api/projects/status/ggue6i785618a39w?svg=true)](https://ci.appveyor.com/project/dguo/strsim-rs)
|
2015-02-10 02:02:54 +00:00
|
|
|
|
2016-10-06 12:05:10 +00:00
|
|
|
[Rust](https://www.rust-lang.org) implementations of [string similarity metrics]:
|
2015-02-10 02:02:54 +00:00
|
|
|
- [Hamming]
|
2015-02-19 05:15:07 +00:00
|
|
|
- [Levenshtein] and [Damerau-Levenshtein]
|
|
|
|
- [Jaro and Jaro-Winkler] - this implementation of Jaro-Winkler does not limit the common prefix length
|
2015-02-10 02:02:54 +00:00
|
|
|
|
|
|
|
### Installation
|
|
|
|
|
|
|
|
```toml
|
|
|
|
# Cargo.toml
|
|
|
|
[dependencies]
|
2016-08-24 03:06:30 +00:00
|
|
|
strsim = "0.5.1"
|
2015-02-10 02:02:54 +00:00
|
|
|
```
|
|
|
|
|
2016-09-16 12:54:18 +00:00
|
|
|
### [Documentation](https://docs.rs/strsim/)
|
|
|
|
|
|
|
|
You can change the version in the url to see the documentation for an older
|
|
|
|
version in the
|
|
|
|
[changelog](https://github.com/dguo/strsim-rs/blob/master/CHANGELOG.md).
|
2016-08-24 01:55:39 +00:00
|
|
|
|
2015-02-10 02:02:54 +00:00
|
|
|
### Usage
|
|
|
|
|
|
|
|
```rust
|
|
|
|
extern crate strsim;
|
|
|
|
|
2015-06-10 04:26:44 +00:00
|
|
|
use strsim::{hamming, levenshtein, damerau_levenshtein, jaro, jaro_winkler,
|
|
|
|
levenshtein_against_vec, damerau_levenshtein_against_vec,
|
|
|
|
jaro_against_vec, jaro_winkler_against_vec};
|
2015-02-10 02:02:54 +00:00
|
|
|
|
|
|
|
fn main() {
|
|
|
|
match hamming("hamming", "hammers") {
|
|
|
|
Ok(distance) => assert_eq!(3, distance),
|
|
|
|
Err(why) => panic!("{:?}", why)
|
|
|
|
}
|
2015-03-29 04:17:25 +00:00
|
|
|
|
2015-02-10 02:02:54 +00:00
|
|
|
assert_eq!(3, levenshtein("kitten", "sitting"));
|
|
|
|
|
2015-02-19 05:15:07 +00:00
|
|
|
assert_eq!(1, damerau_levenshtein("specter", "spectre"));
|
|
|
|
|
2015-03-29 04:17:25 +00:00
|
|
|
assert!((0.392 - jaro("Friedrich Nietzsche", "Jean-Paul Sartre")).abs() <
|
2015-02-11 00:07:11 +00:00
|
|
|
0.001);
|
2015-03-29 04:17:25 +00:00
|
|
|
|
|
|
|
assert!((0.911 - jaro_winkler("cheeseburger", "cheese fries")).abs() <
|
2015-02-11 00:07:11 +00:00
|
|
|
0.001);
|
2015-06-09 14:54:20 +00:00
|
|
|
|
2015-06-10 04:26:44 +00:00
|
|
|
// get vectors of values back
|
|
|
|
let v = vec!["test", "test1", "test12", "test123", "", "tset"];
|
|
|
|
|
|
|
|
assert_eq!(levenshtein_against_vec("test", &v),
|
|
|
|
vec![0, 1, 2, 3, 4, 2]);
|
|
|
|
|
|
|
|
assert_eq!(damerau_levenshtein_against_vec("test", &v),
|
|
|
|
vec![0, 1, 2, 3, 4, 1]);
|
|
|
|
|
|
|
|
let jaro_distances = jaro_against_vec("test", &v);
|
|
|
|
let jaro_expected = vec![1.0, 0.933333, 0.888889, 0.857143, 0.0, 0.916667];
|
|
|
|
let jaro_delta: f64 = jaro_distances.iter()
|
|
|
|
.zip(jaro_expected.iter())
|
|
|
|
.map(|(x, y)| (x - y).abs() as f64)
|
|
|
|
.fold(0.0, |x, y| x + y as f64);
|
|
|
|
assert!(jaro_delta < 0.0001);
|
|
|
|
|
|
|
|
let jaro_winkler_distances = jaro_winkler_against_vec("test", &v);
|
|
|
|
let jaro_winkler_expected = vec![1.0, 0.96, 0.933333, 0.914286, 0.0, 0.925];
|
|
|
|
let jaro_winkler_delta = jaro_winkler_distances.iter()
|
|
|
|
.zip(jaro_winkler_expected.iter())
|
|
|
|
.map(|(x, y)| (x - y).abs() as f64)
|
|
|
|
.fold(0.0, |x, y| x + y as f64);
|
|
|
|
assert!(jaro_winkler_delta < 0.0001);
|
2015-02-10 02:02:54 +00:00
|
|
|
}
|
|
|
|
```
|
|
|
|
|
2016-01-30 04:46:17 +00:00
|
|
|
### Development
|
|
|
|
|
|
|
|
Install [Vagrant](https://www.vagrantup.com), and run `vagrant up`.
|
|
|
|
|
2015-02-10 02:02:54 +00:00
|
|
|
### License
|
|
|
|
|
2016-08-24 01:50:27 +00:00
|
|
|
[MIT](https://github.com/dguo/strsim-rs/blob/master/LICENSE)
|
2015-02-10 02:02:54 +00:00
|
|
|
|
|
|
|
[string similarity metrics]:http://en.wikipedia.org/wiki/String_metric
|
|
|
|
[Damerau-Levenshtein]:http://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
|
|
|
|
[Jaro and Jaro-Winkler]:http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance
|
|
|
|
[Levenshtein]:http://en.wikipedia.org/wiki/Levenshtein_distance
|
|
|
|
[Hamming]:http://en.wikipedia.org/wiki/Hamming_distance
|
2016-08-24 03:06:30 +00:00
|
|
|
|