Update to a new version with better boundaries.

XMLHttpRequest now XML|Http|Request instead of XMLH|ttp|Request.
This commit is contained in:
Without Boats 2017-03-27 16:32:18 -07:00
parent 8af174b8bb
commit f9b413746f
9 changed files with 128 additions and 81 deletions

View File

@ -1,7 +1,7 @@
[package]
authors = ["Without Boats <woboats@gmail.com>"]
name = "heck"
version = "0.1.0"
version = "0.2.0"
license = "MIT OR Apache-2.0"
description = "heck is a case conversion library."
homepage = "https://github.com/withoutboats/heck"

View File

@ -12,13 +12,14 @@ Word boundaries are defined as the "unicode words" defined in the
`unicode_segmentation` library, as well as within those words in this manner:
1. All underscore characters are considered word boundaries.
2. A single uppercase letter (followed by no letters or by lowercase letters)
is considered to be just after a word boundary.
3. Multiple consecutive uppercase letters are considered to be between two
word boundaries.
2. If an uppercase character is followed by lowercase letters, a word boundary
is considered to be just prior to that uppercase character.
3. If multiple uppercase characters are consecutive, they are considered to be
within a single word, except that the last will be part of the next word if it
is followed by lowercase characters (see rule 2).
That is, "HelloWorld" is segmented "Hello World" whereas "HELLOworld" is
segmented "HELLO world."
That is, "HelloWorld" is segmented `Hello|World` whereas "XMLHttpRequest" is
segmented `XML|Http|Request`.
Characters not within words (such as spaces, punctuations, and underscores)
are not included in the output string except as they are a part of the case

View File

@ -22,13 +22,7 @@ pub trait CamelCase: ToOwned {
impl CamelCase for str {
fn to_camel_case(&self) -> String {
::transform(self, |c, s| s.extend(c.to_uppercase()), |c, s| {
if s.len() == 0 {
s.extend(c.to_uppercase())
} else {
s.extend(c.to_lowercase())
}
})
::transform(self, ::capitalize, |_| {})
}
}
@ -52,6 +46,7 @@ mod tests {
t!(test5: "kebab-case" => "KebabCase");
t!(test6: "SHOUTY_SNAKE_CASE" => "ShoutySnakeCase");
t!(test7: "snake_case" => "SnakeCase");
t!(test8: "this-contains_ ALLkinds OfWord_Boundaries" => "ThisContainsAllKindsOfWordBoundaries");
// TODO unicode tests
t!(test8: "this-contains_ ALLKinds OfWord_Boundaries" => "ThisContainsAllKindsOfWordBoundaries");
t!(test9: "XΣXΣ baffle" => "XσxςBaffle");
t!(test10: "XMLHttpRequest" => "XmlHttpRequest");
}

View File

@ -21,10 +21,7 @@ pub trait KebabCase: ToOwned {
impl KebabCase for str {
fn to_kebab_case(&self) -> Self::Owned {
::transform(self, |c, s| {
s.push('-');
s.extend(c.to_lowercase())
}, |c, s| s.extend(c.to_lowercase()))
::transform(self, ::lowercase, |s| s.push('-'))
}
}
@ -48,5 +45,7 @@ mod tests {
t!(test5: "kebab-case" => "kebab-case");
t!(test6: "SHOUTY_SNAKE_CASE" => "shouty-snake-case");
t!(test7: "snake_case" => "snake-case");
t!(test8: "this-contains_ ALLkinds OfWord_Boundaries" => "this-contains-all-kinds-of-word-boundaries");
t!(test8: "this-contains_ ALLKinds OfWord_Boundaries" => "this-contains-all-kinds-of-word-boundaries");
t!(test9: "XΣXΣ baffle" => "xσxς-baffle");
t!(test10: "XMLHttpRequest" => "xml-http-request");
}

View File

@ -10,13 +10,21 @@
//! `unicode_segmentation` library, as well as within those words in this manner:
//!
//! 1. All underscore characters are considered word boundaries.
//! 2. A single uppercase letter (followed by no letters or by lowercase letters)
//! is considered to be just after a word boundary.
//! 3. Multiple consecutive uppercase letters are considered to be between two
//! word boundaries.
//! 2. If an uppercase character is followed by lowercase letters, a word boundary
//! is considered to be just prior to that uppercase character.
//! 3. If multiple uppercase characters are consecutive, they are considered to be
//! within a single word, except that the last will be part of the next word if it
//! is followed by lowercase characters (see rule 2).
//!
//! That is, "HelloWorld" is segmented "Hello World" whereas "HELLOworld" is
//! segmented "HELLO world."
//! That is, "HelloWorld" is segmented `Hello|World` whereas "XMLHttpRequest" is
//! segmented `XML|Http|Request`.
//!
//! Characters not within words (such as spaces, punctuations, and underscores)
//! are not included in the output string except as they are a part of the case
//! being converted to. Multiple adjacent word boundaries (such as a series of
//! underscores) are folded into one. ("hello__world" in snake case is therefore
//! "hello_world", not the exact same string). Leading or trailing word boundary
//! indicators are dropped, except insofar as CamelCase capitalizes the first word.
//!
//! ### Cases contained in this library:
//!
@ -45,50 +53,101 @@ pub use title::TitleCase;
use unicode_segmentation::UnicodeSegmentation;
fn transform<F, G>(s: &str, word_boundary: F, not_word_boundary: G) -> String
fn transform<F, G>(s: &str, with_word: F, boundary: G) -> String
where
F: Fn(char, &mut String),
G: Fn(char, &mut String),
F: Fn(&str, &mut String),
G: Fn(&mut String)
{
macro_rules! apply {
($s:ident [ $init:ident .. $next:ident ], $out:ident, $boundary:ident, $with_word:ident, $first_word:ident) => {
if !$first_word {
$boundary(&mut $out);
}
$with_word(&$s[$init..$next], &mut $out);
$init = $next_i;
};
}
let mut out = String::new();
let mut after_word_boundary = false;
let mut first_word = true;
for word in s.unicode_words() {
if out.len() != 0 { after_word_boundary = true; }
let mut last_c_was_uppercase = false;
let mut multiple_uppercase = false;
let mut char_indices = word.char_indices().peekable();
let mut init = 0;
let mut previous_is_uppercase = false;
for c in word.chars() {
while let Some((i, c)) = char_indices.next() {
// Skip underscore characters
if c == '_' {
after_word_boundary = true;
if init == i { init += 1; }
continue
}
if c.is_uppercase() {
if out.len() != 0 && !last_c_was_uppercase {
after_word_boundary = true;
match char_indices.peek() {
Some(&(next_i, next)) if next == '_' => {
if !first_word { boundary(&mut out); }
with_word(&word[init..next_i], &mut out);
first_word = false;
init = next_i;
previous_is_uppercase = c.is_uppercase();
}
if last_c_was_uppercase {
multiple_uppercase = true;
}
last_c_was_uppercase = true;
} else {
if multiple_uppercase && !after_word_boundary {
after_word_boundary = true;
Some(&(_, next)) if c.is_uppercase() => {
if next.is_lowercase() && previous_is_uppercase {
if !first_word { boundary(&mut out); }
with_word(&word[init..i], &mut out);
first_word = false;
init = i;
}
previous_is_uppercase = true;
}
multiple_uppercase = false;
last_c_was_uppercase = false;
Some(&(next_i, next)) => {
if next.is_uppercase() {
if !first_word { boundary(&mut out); }
with_word(&word[init..next_i], &mut out);
first_word = false;
init = next_i;
}
previous_is_uppercase = false;
}
None => {
if !first_word { boundary(&mut out); }
with_word(&word[init..], &mut out);
first_word = false;
break;
}
}
if after_word_boundary {
word_boundary(c, &mut out);
} else {
not_word_boundary(c, &mut out);
}
after_word_boundary = false;
}
}
out
}
fn lowercase(s: &str, out: &mut String) {
let mut chars = s.chars().peekable();
while let Some(c) = chars.next() {
if c == 'Σ' && chars.peek().is_none() {
out.push('ς');
} else {
out.extend(c.to_lowercase());
}
}
}
fn uppercase(s: &str, out: &mut String ) {
for c in s.chars() {
out.extend(c.to_uppercase())
}
}
fn capitalize(s: &str, out: &mut String) {
let mut char_indices = s.char_indices();
if let Some((_, c)) = char_indices.next() {
out.extend(c.to_uppercase());
if let Some((i, _)) = char_indices.next() {
lowercase(&s[i..], out);
}
}
}

View File

@ -22,7 +22,10 @@ pub trait MixedCase: ToOwned {
impl MixedCase for str {
fn to_mixed_case(&self) -> String {
::transform(self, |c, s| s.extend(c.to_uppercase()), |c, s| s.extend(c.to_lowercase()))
::transform(self, |s, out| {
if out.is_empty() { ::lowercase(s, out); }
else { ::capitalize(s, out) }
}, |_| {})
}
}
@ -46,6 +49,8 @@ mod tests {
t!(test5: "kebab-case" => "kebabCase");
t!(test6: "SHOUTY_SNAKE_CASE" => "shoutySnakeCase");
t!(test7: "snake_case" => "snakeCase");
t!(test8: "this-contains_ ALLkinds OfWord_Boundaries" => "thisContainsAllKindsOfWordBoundaries");
t!(test8: "this-contains_ ALLKinds OfWord_Boundaries" => "thisContainsAllKindsOfWordBoundaries");
t!(test9: "XΣXΣ baffle" => "xσxςBaffle");
t!(test10: "XMLHttpRequest" => "xmlHttpRequest");
// TODO unicode tests
}

View File

@ -37,10 +37,7 @@ impl<T: ShoutySnakeCase> ShoutySnekCase for T {
impl ShoutySnakeCase for str {
fn to_shouty_snake_case(&self) -> Self::Owned {
::transform(self, |c, s| {
s.push('_');
s.extend(c.to_uppercase())
}, |c, s| s.extend(c.to_uppercase()))
::transform(self, ::uppercase, |s| s.push('_'))
}
}
@ -64,6 +61,7 @@ mod tests {
t!(test5: "kebab-case" => "KEBAB_CASE");
t!(test6: "SHOUTY_SNAKE_CASE" => "SHOUTY_SNAKE_CASE");
t!(test7: "snake_case" => "SNAKE_CASE");
t!(test8: "this-contains_ ALLkinds OfWord_Boundaries" => "THIS_CONTAINS_ALL_KINDS_OF_WORD_BOUNDARIES");
// TODO unicode tests
t!(test8: "this-contains_ ALLKinds OfWord_Boundaries" => "THIS_CONTAINS_ALL_KINDS_OF_WORD_BOUNDARIES");
t!(test9: "XΣXΣ baffle" => "XΣXΣ_BAFFLE");
t!(test10: "XMLHttpRequest" => "XML_HTTP_REQUEST");
}

View File

@ -34,10 +34,7 @@ impl<T: SnakeCase> SnekCase for T {
impl SnakeCase for str {
fn to_snake_case(&self) -> String {
::transform(self, |c, s| {
s.push('_');
s.extend(c.to_lowercase())
}, |c, s| s.extend(c.to_lowercase()))
::transform(self, ::lowercase, |s| s.push('_'))
}
}
@ -56,11 +53,12 @@ mod tests {
t!(test1: "CamelCase" => "camel_case");
t!(test2: "This is Human case." => "this_is_human_case");
t!(test3: "MixedUp CamelCase, with some Spaces" => "mixed_up_camel_case_with_some_spaces");
t!(test3: "MixedUP CamelCase, with some Spaces" => "mixed_up_camel_case_with_some_spaces");
t!(test4: "mixed_up snake_case with some _spaces" => "mixed_up_snake_case_with_some_spaces");
t!(test5: "kebab-case" => "kebab_case");
t!(test6: "SHOUTY_SNAKE_CASE" => "shouty_snake_case");
t!(test7: "snake_case" => "snake_case");
t!(test8: "this-contains_ ALLkinds OfWord_Boundaries" => "this_contains_all_kinds_of_word_boundaries");
// TODO unicode tests
t!(test8: "this-contains_ ALLKinds OfWord_Boundaries" => "this_contains_all_kinds_of_word_boundaries");
t!(test9: "XΣXΣ baffle" => "xσxς_baffle");
t!(test10: "XMLHttpRequest" => "xml_http_request");
}

View File

@ -22,16 +22,7 @@ pub trait TitleCase: ToOwned {
impl TitleCase for str {
fn to_title_case(&self) -> String {
::transform(self, |c, s| {
s.push(' ');
s.extend(c.to_uppercase())
}, |c, s| {
if s.len() == 0 {
s.extend(c.to_uppercase())
} else {
s.extend(c.to_lowercase())
}
})
::transform(self, ::capitalize, |s| s.push(' '))
}
}
@ -55,6 +46,7 @@ mod tests {
t!(test5: "kebab-case" => "Kebab Case");
t!(test6: "SHOUTY_SNAKE_CASE" => "Shouty Snake Case");
t!(test7: "snake_case" => "Snake Case");
t!(test8: "this-contains_ ALLkinds OfWord_Boundaries" => "This Contains All Kinds Of Word Boundaries");
// TODO unicode tests
t!(test8: "this-contains_ ALLKinds OfWord_Boundaries" => "This Contains All Kinds Of Word Boundaries");
t!(test9: "XΣXΣ baffle" => "Xσxς Baffle");
t!(test10: "XMLHttpRequest" => "Xml Http Request");
}