mirror of
https://gitee.com/openharmony/third_party_rust_heck
synced 2024-11-23 07:10:27 +00:00
Update to a new version with better boundaries.
XMLHttpRequest now XML|Http|Request instead of XMLH|ttp|Request.
This commit is contained in:
parent
8af174b8bb
commit
f9b413746f
@ -1,7 +1,7 @@
|
||||
[package]
|
||||
authors = ["Without Boats <woboats@gmail.com>"]
|
||||
name = "heck"
|
||||
version = "0.1.0"
|
||||
version = "0.2.0"
|
||||
license = "MIT OR Apache-2.0"
|
||||
description = "heck is a case conversion library."
|
||||
homepage = "https://github.com/withoutboats/heck"
|
||||
|
13
README.md
13
README.md
@ -12,13 +12,14 @@ Word boundaries are defined as the "unicode words" defined in the
|
||||
`unicode_segmentation` library, as well as within those words in this manner:
|
||||
|
||||
1. All underscore characters are considered word boundaries.
|
||||
2. A single uppercase letter (followed by no letters or by lowercase letters)
|
||||
is considered to be just after a word boundary.
|
||||
3. Multiple consecutive uppercase letters are considered to be between two
|
||||
word boundaries.
|
||||
2. If an uppercase character is followed by lowercase letters, a word boundary
|
||||
is considered to be just prior to that uppercase character.
|
||||
3. If multiple uppercase characters are consecutive, they are considered to be
|
||||
within a single word, except that the last will be part of the next word if it
|
||||
is followed by lowercase characters (see rule 2).
|
||||
|
||||
That is, "HelloWorld" is segmented "Hello World" whereas "HELLOworld" is
|
||||
segmented "HELLO world."
|
||||
That is, "HelloWorld" is segmented `Hello|World` whereas "XMLHttpRequest" is
|
||||
segmented `XML|Http|Request`.
|
||||
|
||||
Characters not within words (such as spaces, punctuations, and underscores)
|
||||
are not included in the output string except as they are a part of the case
|
||||
|
13
src/camel.rs
13
src/camel.rs
@ -22,13 +22,7 @@ pub trait CamelCase: ToOwned {
|
||||
|
||||
impl CamelCase for str {
|
||||
fn to_camel_case(&self) -> String {
|
||||
::transform(self, |c, s| s.extend(c.to_uppercase()), |c, s| {
|
||||
if s.len() == 0 {
|
||||
s.extend(c.to_uppercase())
|
||||
} else {
|
||||
s.extend(c.to_lowercase())
|
||||
}
|
||||
})
|
||||
::transform(self, ::capitalize, |_| {})
|
||||
}
|
||||
}
|
||||
|
||||
@ -52,6 +46,7 @@ mod tests {
|
||||
t!(test5: "kebab-case" => "KebabCase");
|
||||
t!(test6: "SHOUTY_SNAKE_CASE" => "ShoutySnakeCase");
|
||||
t!(test7: "snake_case" => "SnakeCase");
|
||||
t!(test8: "this-contains_ ALLkinds OfWord_Boundaries" => "ThisContainsAllKindsOfWordBoundaries");
|
||||
// TODO unicode tests
|
||||
t!(test8: "this-contains_ ALLKinds OfWord_Boundaries" => "ThisContainsAllKindsOfWordBoundaries");
|
||||
t!(test9: "XΣXΣ baffle" => "XσxςBaffle");
|
||||
t!(test10: "XMLHttpRequest" => "XmlHttpRequest");
|
||||
}
|
||||
|
@ -21,10 +21,7 @@ pub trait KebabCase: ToOwned {
|
||||
|
||||
impl KebabCase for str {
|
||||
fn to_kebab_case(&self) -> Self::Owned {
|
||||
::transform(self, |c, s| {
|
||||
s.push('-');
|
||||
s.extend(c.to_lowercase())
|
||||
}, |c, s| s.extend(c.to_lowercase()))
|
||||
::transform(self, ::lowercase, |s| s.push('-'))
|
||||
}
|
||||
}
|
||||
|
||||
@ -48,5 +45,7 @@ mod tests {
|
||||
t!(test5: "kebab-case" => "kebab-case");
|
||||
t!(test6: "SHOUTY_SNAKE_CASE" => "shouty-snake-case");
|
||||
t!(test7: "snake_case" => "snake-case");
|
||||
t!(test8: "this-contains_ ALLkinds OfWord_Boundaries" => "this-contains-all-kinds-of-word-boundaries");
|
||||
t!(test8: "this-contains_ ALLKinds OfWord_Boundaries" => "this-contains-all-kinds-of-word-boundaries");
|
||||
t!(test9: "XΣXΣ baffle" => "xσxς-baffle");
|
||||
t!(test10: "XMLHttpRequest" => "xml-http-request");
|
||||
}
|
||||
|
125
src/lib.rs
125
src/lib.rs
@ -10,13 +10,21 @@
|
||||
//! `unicode_segmentation` library, as well as within those words in this manner:
|
||||
//!
|
||||
//! 1. All underscore characters are considered word boundaries.
|
||||
//! 2. A single uppercase letter (followed by no letters or by lowercase letters)
|
||||
//! is considered to be just after a word boundary.
|
||||
//! 3. Multiple consecutive uppercase letters are considered to be between two
|
||||
//! word boundaries.
|
||||
//! 2. If an uppercase character is followed by lowercase letters, a word boundary
|
||||
//! is considered to be just prior to that uppercase character.
|
||||
//! 3. If multiple uppercase characters are consecutive, they are considered to be
|
||||
//! within a single word, except that the last will be part of the next word if it
|
||||
//! is followed by lowercase characters (see rule 2).
|
||||
//!
|
||||
//! That is, "HelloWorld" is segmented "Hello World" whereas "HELLOworld" is
|
||||
//! segmented "HELLO world."
|
||||
//! That is, "HelloWorld" is segmented `Hello|World` whereas "XMLHttpRequest" is
|
||||
//! segmented `XML|Http|Request`.
|
||||
//!
|
||||
//! Characters not within words (such as spaces, punctuations, and underscores)
|
||||
//! are not included in the output string except as they are a part of the case
|
||||
//! being converted to. Multiple adjacent word boundaries (such as a series of
|
||||
//! underscores) are folded into one. ("hello__world" in snake case is therefore
|
||||
//! "hello_world", not the exact same string). Leading or trailing word boundary
|
||||
//! indicators are dropped, except insofar as CamelCase capitalizes the first word.
|
||||
//!
|
||||
//! ### Cases contained in this library:
|
||||
//!
|
||||
@ -45,50 +53,101 @@ pub use title::TitleCase;
|
||||
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
|
||||
fn transform<F, G>(s: &str, word_boundary: F, not_word_boundary: G) -> String
|
||||
fn transform<F, G>(s: &str, with_word: F, boundary: G) -> String
|
||||
where
|
||||
F: Fn(char, &mut String),
|
||||
G: Fn(char, &mut String),
|
||||
F: Fn(&str, &mut String),
|
||||
G: Fn(&mut String)
|
||||
{
|
||||
macro_rules! apply {
|
||||
($s:ident [ $init:ident .. $next:ident ], $out:ident, $boundary:ident, $with_word:ident, $first_word:ident) => {
|
||||
if !$first_word {
|
||||
$boundary(&mut $out);
|
||||
}
|
||||
$with_word(&$s[$init..$next], &mut $out);
|
||||
$init = $next_i;
|
||||
};
|
||||
}
|
||||
|
||||
let mut out = String::new();
|
||||
let mut after_word_boundary = false;
|
||||
let mut first_word = true;
|
||||
|
||||
for word in s.unicode_words() {
|
||||
if out.len() != 0 { after_word_boundary = true; }
|
||||
let mut last_c_was_uppercase = false;
|
||||
let mut multiple_uppercase = false;
|
||||
let mut char_indices = word.char_indices().peekable();
|
||||
let mut init = 0;
|
||||
let mut previous_is_uppercase = false;
|
||||
|
||||
for c in word.chars() {
|
||||
while let Some((i, c)) = char_indices.next() {
|
||||
// Skip underscore characters
|
||||
if c == '_' {
|
||||
after_word_boundary = true;
|
||||
if init == i { init += 1; }
|
||||
continue
|
||||
}
|
||||
|
||||
if c.is_uppercase() {
|
||||
if out.len() != 0 && !last_c_was_uppercase {
|
||||
after_word_boundary = true;
|
||||
match char_indices.peek() {
|
||||
Some(&(next_i, next)) if next == '_' => {
|
||||
if !first_word { boundary(&mut out); }
|
||||
with_word(&word[init..next_i], &mut out);
|
||||
first_word = false;
|
||||
init = next_i;
|
||||
previous_is_uppercase = c.is_uppercase();
|
||||
}
|
||||
|
||||
if last_c_was_uppercase {
|
||||
multiple_uppercase = true;
|
||||
}
|
||||
last_c_was_uppercase = true;
|
||||
} else {
|
||||
if multiple_uppercase && !after_word_boundary {
|
||||
after_word_boundary = true;
|
||||
Some(&(_, next)) if c.is_uppercase() => {
|
||||
if next.is_lowercase() && previous_is_uppercase {
|
||||
if !first_word { boundary(&mut out); }
|
||||
with_word(&word[init..i], &mut out);
|
||||
first_word = false;
|
||||
init = i;
|
||||
}
|
||||
previous_is_uppercase = true;
|
||||
}
|
||||
|
||||
multiple_uppercase = false;
|
||||
last_c_was_uppercase = false;
|
||||
Some(&(next_i, next)) => {
|
||||
if next.is_uppercase() {
|
||||
if !first_word { boundary(&mut out); }
|
||||
with_word(&word[init..next_i], &mut out);
|
||||
first_word = false;
|
||||
init = next_i;
|
||||
}
|
||||
previous_is_uppercase = false;
|
||||
}
|
||||
|
||||
None => {
|
||||
if !first_word { boundary(&mut out); }
|
||||
with_word(&word[init..], &mut out);
|
||||
first_word = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if after_word_boundary {
|
||||
word_boundary(c, &mut out);
|
||||
} else {
|
||||
not_word_boundary(c, &mut out);
|
||||
}
|
||||
after_word_boundary = false;
|
||||
}
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
fn lowercase(s: &str, out: &mut String) {
|
||||
let mut chars = s.chars().peekable();
|
||||
while let Some(c) = chars.next() {
|
||||
if c == 'Σ' && chars.peek().is_none() {
|
||||
out.push('ς');
|
||||
} else {
|
||||
out.extend(c.to_lowercase());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn uppercase(s: &str, out: &mut String ) {
|
||||
for c in s.chars() {
|
||||
out.extend(c.to_uppercase())
|
||||
}
|
||||
}
|
||||
|
||||
fn capitalize(s: &str, out: &mut String) {
|
||||
let mut char_indices = s.char_indices();
|
||||
if let Some((_, c)) = char_indices.next() {
|
||||
out.extend(c.to_uppercase());
|
||||
if let Some((i, _)) = char_indices.next() {
|
||||
lowercase(&s[i..], out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -22,7 +22,10 @@ pub trait MixedCase: ToOwned {
|
||||
|
||||
impl MixedCase for str {
|
||||
fn to_mixed_case(&self) -> String {
|
||||
::transform(self, |c, s| s.extend(c.to_uppercase()), |c, s| s.extend(c.to_lowercase()))
|
||||
::transform(self, |s, out| {
|
||||
if out.is_empty() { ::lowercase(s, out); }
|
||||
else { ::capitalize(s, out) }
|
||||
}, |_| {})
|
||||
}
|
||||
}
|
||||
|
||||
@ -46,6 +49,8 @@ mod tests {
|
||||
t!(test5: "kebab-case" => "kebabCase");
|
||||
t!(test6: "SHOUTY_SNAKE_CASE" => "shoutySnakeCase");
|
||||
t!(test7: "snake_case" => "snakeCase");
|
||||
t!(test8: "this-contains_ ALLkinds OfWord_Boundaries" => "thisContainsAllKindsOfWordBoundaries");
|
||||
t!(test8: "this-contains_ ALLKinds OfWord_Boundaries" => "thisContainsAllKindsOfWordBoundaries");
|
||||
t!(test9: "XΣXΣ baffle" => "xσxςBaffle");
|
||||
t!(test10: "XMLHttpRequest" => "xmlHttpRequest");
|
||||
// TODO unicode tests
|
||||
}
|
||||
|
@ -37,10 +37,7 @@ impl<T: ShoutySnakeCase> ShoutySnekCase for T {
|
||||
|
||||
impl ShoutySnakeCase for str {
|
||||
fn to_shouty_snake_case(&self) -> Self::Owned {
|
||||
::transform(self, |c, s| {
|
||||
s.push('_');
|
||||
s.extend(c.to_uppercase())
|
||||
}, |c, s| s.extend(c.to_uppercase()))
|
||||
::transform(self, ::uppercase, |s| s.push('_'))
|
||||
}
|
||||
}
|
||||
|
||||
@ -64,6 +61,7 @@ mod tests {
|
||||
t!(test5: "kebab-case" => "KEBAB_CASE");
|
||||
t!(test6: "SHOUTY_SNAKE_CASE" => "SHOUTY_SNAKE_CASE");
|
||||
t!(test7: "snake_case" => "SNAKE_CASE");
|
||||
t!(test8: "this-contains_ ALLkinds OfWord_Boundaries" => "THIS_CONTAINS_ALL_KINDS_OF_WORD_BOUNDARIES");
|
||||
// TODO unicode tests
|
||||
t!(test8: "this-contains_ ALLKinds OfWord_Boundaries" => "THIS_CONTAINS_ALL_KINDS_OF_WORD_BOUNDARIES");
|
||||
t!(test9: "XΣXΣ baffle" => "XΣXΣ_BAFFLE");
|
||||
t!(test10: "XMLHttpRequest" => "XML_HTTP_REQUEST");
|
||||
}
|
||||
|
12
src/snake.rs
12
src/snake.rs
@ -34,10 +34,7 @@ impl<T: SnakeCase> SnekCase for T {
|
||||
|
||||
impl SnakeCase for str {
|
||||
fn to_snake_case(&self) -> String {
|
||||
::transform(self, |c, s| {
|
||||
s.push('_');
|
||||
s.extend(c.to_lowercase())
|
||||
}, |c, s| s.extend(c.to_lowercase()))
|
||||
::transform(self, ::lowercase, |s| s.push('_'))
|
||||
}
|
||||
}
|
||||
|
||||
@ -56,11 +53,12 @@ mod tests {
|
||||
|
||||
t!(test1: "CamelCase" => "camel_case");
|
||||
t!(test2: "This is Human case." => "this_is_human_case");
|
||||
t!(test3: "MixedUp CamelCase, with some Spaces" => "mixed_up_camel_case_with_some_spaces");
|
||||
t!(test3: "MixedUP CamelCase, with some Spaces" => "mixed_up_camel_case_with_some_spaces");
|
||||
t!(test4: "mixed_up snake_case with some _spaces" => "mixed_up_snake_case_with_some_spaces");
|
||||
t!(test5: "kebab-case" => "kebab_case");
|
||||
t!(test6: "SHOUTY_SNAKE_CASE" => "shouty_snake_case");
|
||||
t!(test7: "snake_case" => "snake_case");
|
||||
t!(test8: "this-contains_ ALLkinds OfWord_Boundaries" => "this_contains_all_kinds_of_word_boundaries");
|
||||
// TODO unicode tests
|
||||
t!(test8: "this-contains_ ALLKinds OfWord_Boundaries" => "this_contains_all_kinds_of_word_boundaries");
|
||||
t!(test9: "XΣXΣ baffle" => "xσxς_baffle");
|
||||
t!(test10: "XMLHttpRequest" => "xml_http_request");
|
||||
}
|
||||
|
16
src/title.rs
16
src/title.rs
@ -22,16 +22,7 @@ pub trait TitleCase: ToOwned {
|
||||
|
||||
impl TitleCase for str {
|
||||
fn to_title_case(&self) -> String {
|
||||
::transform(self, |c, s| {
|
||||
s.push(' ');
|
||||
s.extend(c.to_uppercase())
|
||||
}, |c, s| {
|
||||
if s.len() == 0 {
|
||||
s.extend(c.to_uppercase())
|
||||
} else {
|
||||
s.extend(c.to_lowercase())
|
||||
}
|
||||
})
|
||||
::transform(self, ::capitalize, |s| s.push(' '))
|
||||
}
|
||||
}
|
||||
|
||||
@ -55,6 +46,7 @@ mod tests {
|
||||
t!(test5: "kebab-case" => "Kebab Case");
|
||||
t!(test6: "SHOUTY_SNAKE_CASE" => "Shouty Snake Case");
|
||||
t!(test7: "snake_case" => "Snake Case");
|
||||
t!(test8: "this-contains_ ALLkinds OfWord_Boundaries" => "This Contains All Kinds Of Word Boundaries");
|
||||
// TODO unicode tests
|
||||
t!(test8: "this-contains_ ALLKinds OfWord_Boundaries" => "This Contains All Kinds Of Word Boundaries");
|
||||
t!(test9: "XΣXΣ baffle" => "Xσxς Baffle");
|
||||
t!(test10: "XMLHttpRequest" => "Xml Http Request");
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user