Go to file
moritz9422 14cf91e0ef Fix comparing strings with multibyte chars (#3)
Using `x.len()` on a str returns the number of bytes, which is not
always the same as the number of characters. For example, the characters
'ö' or '香' are representing using more than one byte. This leads
to strange behaviour (e.g. `levenshtein("a", "ä")` would return 0).
Replace calls to `str.len()` with `str.chars().count()`, which returns
the correct number of characters in a string.
2016-04-18 08:46:11 -04:00
src Fix comparing strings with multibyte chars (#3) 2016-04-18 08:46:11 -04:00
tests Add more tests for vector functions 2015-06-10 00:26:44 -04:00
.gitignore Add Vagrant for development 2016-01-29 23:46:17 -05:00
.travis.yml Test against the full Rust suite 2015-10-01 00:01:05 -04:00
appveyor.yml Add configuration for AppVeyor 2016-02-20 12:31:03 -05:00
bootstrap.sh Add Vagrant for development 2016-01-29 23:46:17 -05:00
Cargo.toml Add more tests for vector functions 2015-06-10 00:26:44 -04:00
LICENSE Initial commit 2015-02-09 20:04:10 -05:00
README.md Add an AppVeyor badge 2016-02-20 12:45:58 -05:00
Vagrantfile Add Vagrant for development 2016-01-29 23:46:17 -05:00

strsim-rs Crates.io Linux build status Windows build status

Rust implementations of string similarity metrics:

Installation

# Cargo.toml
[dependencies]
strsim = "0.4.0"

Usage

extern crate strsim;

use strsim::{hamming, levenshtein, damerau_levenshtein, jaro, jaro_winkler,
             levenshtein_against_vec, damerau_levenshtein_against_vec,
             jaro_against_vec, jaro_winkler_against_vec};

fn main() {
    match hamming("hamming", "hammers") {
        Ok(distance) => assert_eq!(3, distance),
        Err(why) => panic!("{:?}", why)
    }

    assert_eq!(3, levenshtein("kitten", "sitting"));

    assert_eq!(1, damerau_levenshtein("specter", "spectre"));

    assert!((0.392 - jaro("Friedrich Nietzsche", "Jean-Paul Sartre")).abs() <
            0.001);

    assert!((0.911 - jaro_winkler("cheeseburger", "cheese fries")).abs() <
            0.001);

    // get vectors of values back
    let v = vec!["test", "test1", "test12", "test123", "", "tset"];

    assert_eq!(levenshtein_against_vec("test", &v),
               vec![0, 1, 2, 3, 4, 2]);

    assert_eq!(damerau_levenshtein_against_vec("test", &v),
               vec![0, 1, 2, 3, 4, 1]);

    let jaro_distances = jaro_against_vec("test", &v);
    let jaro_expected = vec![1.0, 0.933333, 0.888889, 0.857143, 0.0, 0.916667];
    let jaro_delta: f64 = jaro_distances.iter()
                                        .zip(jaro_expected.iter())
                                        .map(|(x, y)| (x - y).abs() as f64)
                                        .fold(0.0, |x, y| x + y as f64);
    assert!(jaro_delta < 0.0001);

    let jaro_winkler_distances = jaro_winkler_against_vec("test", &v);
    let jaro_winkler_expected = vec![1.0, 0.96, 0.933333, 0.914286, 0.0, 0.925];
    let jaro_winkler_delta = jaro_winkler_distances.iter()
                                 .zip(jaro_winkler_expected.iter())
                                 .map(|(x, y)| (x - y).abs() as f64)
                                 .fold(0.0, |x, y| x + y as f64);
    assert!(jaro_winkler_delta < 0.0001);
}

Development

Install Vagrant, and run vagrant up.

License

MIT