mirror of
https://github.com/mozilla/gecko-dev.git
synced 2025-02-25 20:01:50 +00:00
Bug 1857742 - patch 1 - Vendor the oxilangtag crate into third_party/rust. r=supply-chain-reviewers,dholbert
Differential Revision: https://phabricator.services.mozilla.com/D193891
This commit is contained in:
parent
819c082c31
commit
94a3c09204
7
Cargo.lock
generated
7
Cargo.lock
generated
@ -2207,6 +2207,7 @@ dependencies = [
|
||||
"nsstring",
|
||||
"oblivious_http",
|
||||
"origin-trials-ffi",
|
||||
"oxilangtag",
|
||||
"prefs_parser",
|
||||
"processtools",
|
||||
"profiler_helper",
|
||||
@ -4132,6 +4133,12 @@ dependencies = [
|
||||
"stable_deref_trait",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "oxilangtag"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8d91edf4fbb970279443471345a4e8c491bf05bb283b3e6c88e4e606fd8c181b"
|
||||
|
||||
[[package]]
|
||||
name = "packed_simd"
|
||||
version = "0.3.9"
|
||||
|
@ -2529,6 +2529,15 @@ who = "Mike Hommey <mh+mozilla@glandium.org>"
|
||||
criteria = "safe-to-deploy"
|
||||
delta = "6.3.0 -> 6.4.1"
|
||||
|
||||
[[audits.oxilangtag]]
|
||||
who = "Jonathan Kew <jkew@mozilla.com>"
|
||||
criteria = "safe-to-deploy"
|
||||
version = "0.1.3"
|
||||
notes = """
|
||||
I have reviewed all the code in this (small) crate.
|
||||
There is no unsafe code present.
|
||||
"""
|
||||
|
||||
[[audits.packed_simd]]
|
||||
who = "Henri Sivonen <hsivonen@hsivonen.fi>"
|
||||
criteria = "safe-to-deploy"
|
||||
|
1
third_party/rust/oxilangtag/.cargo-checksum.json
vendored
Normal file
1
third_party/rust/oxilangtag/.cargo-checksum.json
vendored
Normal file
@ -0,0 +1 @@
|
||||
{"files":{"CHANGELOG.md":"3d0f3240ed450d19b894dd8715e20bbec50a14eb0d357df8c09a4af1f19fc831","Cargo.toml":"b8414a40b2cdeb5b34dc4b7e79a5e192b56b953d9db1a762dbf3e8728074dd6a","LICENSE":"3fe41c99abc306c2cd34a9365b1810035ae93335ebf4736c0240b469b3f410eb","README.md":"fc98b140225bc0521a136c2c1ed8146f7398349a36d52481f97d8ec2b7679619","benches/lib.rs":"61c94b95e005c0df25ff740ddc7801d65f68bd6e00c0b8aca7eeb66b103f9eea","deny.toml":"fce6beebdde75e3950abfd230b5110d485f2daf5a333cc77b447669593fa7c62","src/lib.rs":"92c85f535a42b8dde8c2f3078c61e4e1580d326ac621eba2f410bdee521be41d","tests/lib.rs":"9927c137f39094cfd8fbcf56069a047818112374148e8950fd73708e9ae0382a"},"package":"8d91edf4fbb970279443471345a4e8c491bf05bb283b3e6c88e4e606fd8c181b"}
|
16
third_party/rust/oxilangtag/CHANGELOG.md
vendored
Normal file
16
third_party/rust/oxilangtag/CHANGELOG.md
vendored
Normal file
@ -0,0 +1,16 @@
|
||||
# Changelog
|
||||
|
||||
## [0.1.3] - 2022-03-26
|
||||
|
||||
### Added
|
||||
- `LanguageTag` now implements Serde `Serialize` and `Deserialize` trait if the `serde` crate is present.
|
||||
The serialization is a plain string.
|
||||
|
||||
|
||||
## [0.1.2] - 2021-04-16
|
||||
|
||||
### Added
|
||||
- `LanguageTag` struct with a parser, case normalization and components accessors.
|
||||
|
||||
### Changed
|
||||
- Proper attribution from [`language-tags`](https://github.com/pyfisch/rust-language-tags/).
|
39
third_party/rust/oxilangtag/Cargo.toml
vendored
Normal file
39
third_party/rust/oxilangtag/Cargo.toml
vendored
Normal file
@ -0,0 +1,39 @@
|
||||
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
|
||||
#
|
||||
# When uploading crates to the registry Cargo will automatically
|
||||
# "normalize" Cargo.toml files for maximal compatibility
|
||||
# with all versions of Cargo and also rewrite `path` dependencies
|
||||
# to registry (e.g., crates.io) dependencies.
|
||||
#
|
||||
# If you are reading this file be aware that the original Cargo.toml
|
||||
# will likely look very different (and much more reasonable).
|
||||
# See Cargo.toml.orig for the original contents.
|
||||
|
||||
[package]
|
||||
edition = "2018"
|
||||
name = "oxilangtag"
|
||||
version = "0.1.3"
|
||||
authors = ["Tpt <thomas@pellissier-tanon.fr>"]
|
||||
description = "Simple and fast implementation of language tag normalization and validation\n"
|
||||
readme = "README.md"
|
||||
keywords = ["language-tag", "BCP47"]
|
||||
license = "MIT"
|
||||
repository = "https://github.com/oxigraph/oxilangtag"
|
||||
[package.metadata.docs.rs]
|
||||
all-features = true
|
||||
|
||||
[[bench]]
|
||||
name = "lib"
|
||||
harness = false
|
||||
[dependencies.serde]
|
||||
version = "1"
|
||||
optional = true
|
||||
[dev-dependencies.criterion]
|
||||
version = "0.3"
|
||||
|
||||
[dev-dependencies.serde_test]
|
||||
version = "1"
|
||||
|
||||
[features]
|
||||
default = []
|
||||
serialize = ["serde"]
|
19
third_party/rust/oxilangtag/LICENSE
vendored
Normal file
19
third_party/rust/oxilangtag/LICENSE
vendored
Normal file
@ -0,0 +1,19 @@
|
||||
Copyright (c) 2015-2021 Pyfisch Tpt
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
46
third_party/rust/oxilangtag/README.md
vendored
Normal file
46
third_party/rust/oxilangtag/README.md
vendored
Normal file
@ -0,0 +1,46 @@
|
||||
oxilangtag
|
||||
==========
|
||||
|
||||
[](https://github.com/oxigraph/oxilangtag/actions)
|
||||
[](https://crates.io/crates/oxilangtag)
|
||||
[](https://docs.rs/oxilangtag)
|
||||
|
||||
OxiLangTag is a Rust library allowing to validate and normalize language tags following [RFC 5646](https://tools.ietf.org/html/rfc5646)
|
||||
([BCP 47](https://tools.ietf.org/html/bcp47)).
|
||||
|
||||
It is a fork of the [`language-tags`](https://github.com/pyfisch/rust-language-tags/) focusing on [RDF use cases](https://www.w3.org/TR/rdf11-primer/).
|
||||
You might find the [`language-tags`](https://github.com/pyfisch/rust-language-tags/) crate more convenient.
|
||||
|
||||
It allows zero stack allocation language tag validation.
|
||||
Getters are also provided to easily retrieve the various language tag components.
|
||||
|
||||
If [`serde`](https://serde.rs/) is available, `LanguageTag` implements the `Serialize` and `Deserialize` traits and encodes the language tag as a string.
|
||||
|
||||
Example:
|
||||
```rust
|
||||
use oxilangtag::LanguageTag;
|
||||
|
||||
// Parsing and validation
|
||||
let language_tag = LanguageTag::parse("zh-cmn-Hans-CN-x-test").unwrap();
|
||||
assert_eq!(language_tag.as_str(), "zh-cmn-Hans-CN-x-test");
|
||||
|
||||
// Language tag components
|
||||
assert_eq!(language_tag.primary_language(), "zh");
|
||||
assert_eq!(language_tag.extended_language(), Some("cmn"));
|
||||
assert_eq!(language_tag.full_language(), "zh-cmn");
|
||||
assert_eq!(language_tag.script(), Some("Hans"));
|
||||
assert_eq!(language_tag.region(), Some("CN"));
|
||||
assert_eq!(language_tag.extension(), None);
|
||||
assert_eq!(language_tag.private_use_subtags().collect::<Vec<_>>(), vec!["test"]);
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
This project is licensed under the MIT license ([LICENSE-MIT](LICENSE-MIT) or `<http://opensource.org/licenses/MIT>`).
|
||||
|
||||
It is based on the [`language-tags`](https://github.com/pyfisch/rust-language-tags/) crate by [pyfisch](https://github.com/pyfisch) under MIT license.
|
||||
|
||||
|
||||
### Contribution
|
||||
|
||||
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxilangtag by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.
|
77
third_party/rust/oxilangtag/benches/lib.rs
vendored
Normal file
77
third_party/rust/oxilangtag/benches/lib.rs
vendored
Normal file
@ -0,0 +1,77 @@
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
use oxilangtag::LanguageTag;
|
||||
|
||||
fn bench_language_tag_parse(c: &mut Criterion) {
|
||||
let examples = [
|
||||
"fr",
|
||||
"fr-Latn",
|
||||
"fr-fra",
|
||||
"fr-Latn-FR",
|
||||
"fr-Latn-419",
|
||||
"fr-FR",
|
||||
"ax-TZ",
|
||||
"fr-shadok",
|
||||
"fr-y-myext-myext2",
|
||||
"fra-Latn",
|
||||
"fra",
|
||||
"fra-FX",
|
||||
"i-klingon",
|
||||
"I-kLINgon",
|
||||
"no-bok",
|
||||
"fr-Lat",
|
||||
"mn-Cyrl-MN",
|
||||
"mN-cYrL-Mn",
|
||||
"fr-Latn-CA",
|
||||
"en-US",
|
||||
"fr-Latn-CA",
|
||||
"i-enochian",
|
||||
"x-fr-CH",
|
||||
"sr-Latn-CS",
|
||||
"es-419",
|
||||
"sl-nedis",
|
||||
"de-CH-1996",
|
||||
"de-Latg-1996",
|
||||
"sl-IT-nedis",
|
||||
"en-a-bbb-x-a-ccc",
|
||||
"de-a-value",
|
||||
"en-Latn-GB-boont-r-extended-sequence-x-private",
|
||||
"en-x-US",
|
||||
"az-Arab-x-AZE-derbend",
|
||||
"es-Latn-CO-x-private",
|
||||
"en-US-boont",
|
||||
"ab-x-abc-x-abc",
|
||||
"ab-x-abc-a-a",
|
||||
"i-default",
|
||||
"i-klingon",
|
||||
"abcd-Latn",
|
||||
"AaBbCcDd-x-y-any-x",
|
||||
"en",
|
||||
"de-AT",
|
||||
"es-419",
|
||||
"de-CH-1901",
|
||||
"sr-Cyrl",
|
||||
"sr-Cyrl-CS",
|
||||
"sl-Latn-IT-rozaj",
|
||||
"en-US-x-twain",
|
||||
"zh-cmn",
|
||||
"zh-cmn-Hant",
|
||||
"zh-cmn-Hant-HK",
|
||||
"zh-gan",
|
||||
"zh-yue-Hant-HK",
|
||||
"xr-lxs-qut",
|
||||
"xr-lqt-qu",
|
||||
"xr-p-lze",
|
||||
];
|
||||
|
||||
c.bench_function("language tag parse tests", |b| {
|
||||
b.iter(|| {
|
||||
for tag in examples.iter() {
|
||||
LanguageTag::parse(*tag).unwrap();
|
||||
}
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
criterion_group!(language_tag, bench_language_tag_parse);
|
||||
|
||||
criterion_main!(language_tag);
|
11
third_party/rust/oxilangtag/deny.toml
vendored
Normal file
11
third_party/rust/oxilangtag/deny.toml
vendored
Normal file
@ -0,0 +1,11 @@
|
||||
[licenses]
|
||||
unlicensed = "deny"
|
||||
allow = [
|
||||
"MIT",
|
||||
"Apache-2.0"
|
||||
]
|
||||
default = "deny"
|
||||
|
||||
[bans]
|
||||
multiple-versions = "warn"
|
||||
wildcards = "deny"
|
923
third_party/rust/oxilangtag/src/lib.rs
vendored
Normal file
923
third_party/rust/oxilangtag/src/lib.rs
vendored
Normal file
@ -0,0 +1,923 @@
|
||||
#![doc = include_str!("../README.md")]
|
||||
#![deny(unsafe_code)]
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
use std::borrow::{Borrow, Cow};
|
||||
use std::cmp::Ordering;
|
||||
use std::error::Error;
|
||||
use std::fmt;
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::iter::once;
|
||||
use std::ops::Deref;
|
||||
use std::str::{FromStr, Split};
|
||||
|
||||
/// A [RFC 5646](https://tools.ietf.org/html/rfc5646) language tag.
|
||||
///
|
||||
/// ```
|
||||
/// use oxilangtag::LanguageTag;
|
||||
///
|
||||
/// let language_tag = LanguageTag::parse("en-us").unwrap();
|
||||
/// assert_eq!(language_tag.into_inner(), "en-us")
|
||||
/// ```
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct LanguageTag<T> {
|
||||
tag: T,
|
||||
positions: TagElementsPositions,
|
||||
}
|
||||
|
||||
impl<T: Deref<Target = str>> LanguageTag<T> {
|
||||
/// Parses a language tag acccording to [RFC 5646](https://tools.ietf.org/html/rfc5646).
|
||||
/// and checks if the tag is ["well-formed"](https://tools.ietf.org/html/rfc5646#section-2.2.9).
|
||||
///
|
||||
/// This operation keeps internally the `tag` parameter and does not allocate on the heap.
|
||||
///
|
||||
/// ```
|
||||
/// use oxilangtag::LanguageTag;
|
||||
///
|
||||
/// let language_tag = LanguageTag::parse("en-us").unwrap();
|
||||
/// assert_eq!(language_tag.into_inner(), "en-us")
|
||||
/// ```
|
||||
pub fn parse(tag: T) -> Result<Self, LanguageTagParseError> {
|
||||
let positions = parse_language_tag(&tag, &mut VoidOutputBuffer::default())?;
|
||||
Ok(Self { tag, positions })
|
||||
}
|
||||
|
||||
/// Returns the underlying language tag representation.
|
||||
#[inline]
|
||||
pub fn as_str(&self) -> &str {
|
||||
&self.tag
|
||||
}
|
||||
|
||||
/// Returns the underlying language tag representation.
|
||||
#[inline]
|
||||
pub fn into_inner(self) -> T {
|
||||
self.tag
|
||||
}
|
||||
|
||||
/// Returns the [primary language subtag](https://tools.ietf.org/html/rfc5646#section-2.2.1).
|
||||
///
|
||||
/// ```
|
||||
/// use oxilangtag::LanguageTag;
|
||||
///
|
||||
/// let language_tag = LanguageTag::parse("zh-cmn-Hans-CN").unwrap();
|
||||
/// assert_eq!(language_tag.primary_language(), "zh");
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn primary_language(&self) -> &str {
|
||||
&self.tag[..self.positions.language_end]
|
||||
}
|
||||
|
||||
/// Returns the [extended language subtags](https://tools.ietf.org/html/rfc5646#section-2.2.2).
|
||||
///
|
||||
/// Valid language tags have at most one extended language.
|
||||
///
|
||||
/// ```
|
||||
/// use oxilangtag::LanguageTag;
|
||||
///
|
||||
/// let language_tag = LanguageTag::parse("zh-cmn-Hans-CN").unwrap();
|
||||
/// assert_eq!(language_tag.extended_language(), Some("cmn"));
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn extended_language(&self) -> Option<&str> {
|
||||
if self.positions.language_end == self.positions.extlang_end {
|
||||
None
|
||||
} else {
|
||||
Some(&self.tag[self.positions.language_end + 1..self.positions.extlang_end])
|
||||
}
|
||||
}
|
||||
|
||||
/// Iterates on the [extended language subtags](https://tools.ietf.org/html/rfc5646#section-2.2.2).
|
||||
///
|
||||
/// Valid language tags have at most one extended language.
|
||||
///
|
||||
/// ```
|
||||
/// use oxilangtag::LanguageTag;
|
||||
///
|
||||
/// let language_tag = LanguageTag::parse("zh-cmn-Hans-CN").unwrap();
|
||||
/// assert_eq!(language_tag.extended_language_subtags().collect::<Vec<_>>(), vec!["cmn"]);
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn extended_language_subtags(&self) -> impl Iterator<Item = &str> {
|
||||
self.extended_language().unwrap_or("").split_terminator('-')
|
||||
}
|
||||
|
||||
/// Returns the [primary language subtag](https://tools.ietf.org/html/rfc5646#section-2.2.1)
|
||||
/// and its [extended language subtags](https://tools.ietf.org/html/rfc5646#section-2.2.2).
|
||||
///
|
||||
/// ```
|
||||
/// use oxilangtag::LanguageTag;
|
||||
///
|
||||
/// let language_tag = LanguageTag::parse("zh-cmn-Hans-CN").unwrap();
|
||||
/// assert_eq!(language_tag.full_language(), "zh-cmn");
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn full_language(&self) -> &str {
|
||||
&self.tag[..self.positions.extlang_end]
|
||||
}
|
||||
|
||||
/// Returns the [script subtag](https://tools.ietf.org/html/rfc5646#section-2.2.3).
|
||||
///
|
||||
/// ```
|
||||
/// use oxilangtag::LanguageTag;
|
||||
///
|
||||
/// let language_tag = LanguageTag::parse("zh-cmn-Hans-CN").unwrap();
|
||||
/// assert_eq!(language_tag.script(), Some("Hans"));
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn script(&self) -> Option<&str> {
|
||||
if self.positions.extlang_end == self.positions.script_end {
|
||||
None
|
||||
} else {
|
||||
Some(&self.tag[self.positions.extlang_end + 1..self.positions.script_end])
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the [region subtag](https://tools.ietf.org/html/rfc5646#section-2.2.4).
|
||||
///
|
||||
/// ```
|
||||
/// use oxilangtag::LanguageTag;
|
||||
///
|
||||
/// let language_tag = LanguageTag::parse("zh-cmn-Hans-CN").unwrap();
|
||||
/// assert_eq!(language_tag.region(), Some("CN"));
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn region(&self) -> Option<&str> {
|
||||
if self.positions.script_end == self.positions.region_end {
|
||||
None
|
||||
} else {
|
||||
Some(&self.tag[self.positions.script_end + 1..self.positions.region_end])
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the [variant subtags](https://tools.ietf.org/html/rfc5646#section-2.2.5).
|
||||
///
|
||||
/// ```
|
||||
/// use oxilangtag::LanguageTag;
|
||||
///
|
||||
/// let language_tag = LanguageTag::parse("zh-Latn-TW-pinyin").unwrap();
|
||||
/// assert_eq!(language_tag.variant(), Some("pinyin"));
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn variant(&self) -> Option<&str> {
|
||||
if self.positions.region_end == self.positions.variant_end {
|
||||
None
|
||||
} else {
|
||||
Some(&self.tag[self.positions.region_end + 1..self.positions.variant_end])
|
||||
}
|
||||
}
|
||||
|
||||
/// Iterates on the [variant subtags](https://tools.ietf.org/html/rfc5646#section-2.2.5).
|
||||
///
|
||||
/// ```
|
||||
/// use oxilangtag::LanguageTag;
|
||||
///
|
||||
/// let language_tag = LanguageTag::parse("zh-Latn-TW-pinyin").unwrap();
|
||||
/// assert_eq!(language_tag.variant_subtags().collect::<Vec<_>>(), vec!["pinyin"]);
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn variant_subtags(&self) -> impl Iterator<Item = &str> {
|
||||
self.variant().unwrap_or("").split_terminator('-')
|
||||
}
|
||||
|
||||
/// Returns the [extension subtags](https://tools.ietf.org/html/rfc5646#section-2.2.6).
|
||||
///
|
||||
/// ```
|
||||
/// use oxilangtag::LanguageTag;
|
||||
///
|
||||
/// let language_tag = LanguageTag::parse("de-DE-u-co-phonebk").unwrap();
|
||||
/// assert_eq!(language_tag.extension(), Some("u-co-phonebk"));
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn extension(&self) -> Option<&str> {
|
||||
if self.positions.variant_end == self.positions.extension_end {
|
||||
None
|
||||
} else {
|
||||
Some(&self.tag[self.positions.variant_end + 1..self.positions.extension_end])
|
||||
}
|
||||
}
|
||||
|
||||
/// Iterates on the [extension subtags](https://tools.ietf.org/html/rfc5646#section-2.2.6).
|
||||
///
|
||||
/// ```
|
||||
/// use oxilangtag::LanguageTag;
|
||||
///
|
||||
/// let language_tag = LanguageTag::parse("de-DE-u-co-phonebk").unwrap();
|
||||
/// assert_eq!(language_tag.extension_subtags().collect::<Vec<_>>(), vec![('u', "co-phonebk")]);
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn extension_subtags(&self) -> impl Iterator<Item = (char, &str)> {
|
||||
match self.extension() {
|
||||
Some(parts) => ExtensionsIterator::new(parts),
|
||||
None => ExtensionsIterator::new(""),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the [private use subtags](https://tools.ietf.org/html/rfc5646#section-2.2.7).
|
||||
///
|
||||
/// ```
|
||||
/// use oxilangtag::LanguageTag;
|
||||
///
|
||||
/// let language_tag = LanguageTag::parse("de-x-foo-bar").unwrap();
|
||||
/// assert_eq!(language_tag.private_use(), Some("x-foo-bar"));
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn private_use(&self) -> Option<&str> {
|
||||
if self.tag.starts_with("x-") {
|
||||
Some(&self.tag)
|
||||
} else if self.positions.extension_end == self.tag.len() {
|
||||
None
|
||||
} else {
|
||||
Some(&self.tag[self.positions.extension_end + 1..])
|
||||
}
|
||||
}
|
||||
|
||||
/// Iterates on the [private use subtags](https://tools.ietf.org/html/rfc5646#section-2.2.7).
|
||||
///
|
||||
/// ```
|
||||
/// use oxilangtag::LanguageTag;
|
||||
///
|
||||
/// let language_tag = LanguageTag::parse("de-x-foo-bar").unwrap();
|
||||
/// assert_eq!(language_tag.private_use_subtags().collect::<Vec<_>>(), vec!["foo", "bar"]);
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn private_use_subtags(&self) -> impl Iterator<Item = &str> {
|
||||
self.private_use()
|
||||
.map(|part| &part[2..])
|
||||
.unwrap_or("")
|
||||
.split_terminator('-')
|
||||
}
|
||||
}
|
||||
|
||||
impl LanguageTag<String> {
|
||||
/// Parses a language tag acccording to [RFC 5646](https://tools.ietf.org/html/rfc5646)
|
||||
/// and normalizes its case.
|
||||
///
|
||||
/// This parser accepts the language tags that are "well-formed" according to
|
||||
/// [RFC 5646](https://tools.ietf.org/html/rfc5646#section-2.2.9).
|
||||
///
|
||||
/// This operation does heap allocation.
|
||||
///
|
||||
/// ```
|
||||
/// use oxilangtag::LanguageTag;
|
||||
///
|
||||
/// let language_tag = LanguageTag::parse_and_normalize("en-us").unwrap();
|
||||
/// assert_eq!(language_tag.into_inner(), "en-US")
|
||||
/// ```
|
||||
pub fn parse_and_normalize(tag: &str) -> Result<Self, LanguageTagParseError> {
|
||||
let mut output_buffer = String::with_capacity(tag.len());
|
||||
let positions = parse_language_tag(tag, &mut output_buffer)?;
|
||||
Ok(Self {
|
||||
tag: output_buffer,
|
||||
positions,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<Lft: PartialEq<Rhs>, Rhs> PartialEq<LanguageTag<Rhs>> for LanguageTag<Lft> {
|
||||
#[inline]
|
||||
fn eq(&self, other: &LanguageTag<Rhs>) -> bool {
|
||||
self.tag.eq(&other.tag)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: PartialEq<str>> PartialEq<str> for LanguageTag<T> {
|
||||
#[inline]
|
||||
fn eq(&self, other: &str) -> bool {
|
||||
self.tag.eq(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: PartialEq<&'a str>> PartialEq<&'a str> for LanguageTag<T> {
|
||||
#[inline]
|
||||
fn eq(&self, other: &&'a str) -> bool {
|
||||
self.tag.eq(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: PartialEq<String>> PartialEq<String> for LanguageTag<T> {
|
||||
#[inline]
|
||||
fn eq(&self, other: &String) -> bool {
|
||||
self.tag.eq(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: PartialEq<Cow<'a, str>>> PartialEq<Cow<'a, str>> for LanguageTag<T> {
|
||||
#[inline]
|
||||
fn eq(&self, other: &Cow<'a, str>) -> bool {
|
||||
self.tag.eq(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: PartialEq<str>> PartialEq<LanguageTag<T>> for str {
|
||||
#[inline]
|
||||
fn eq(&self, other: &LanguageTag<T>) -> bool {
|
||||
other.tag.eq(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: PartialEq<&'a str>> PartialEq<LanguageTag<T>> for &'a str {
|
||||
#[inline]
|
||||
fn eq(&self, other: &LanguageTag<T>) -> bool {
|
||||
other.tag.eq(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: PartialEq<String>> PartialEq<LanguageTag<T>> for String {
|
||||
#[inline]
|
||||
fn eq(&self, other: &LanguageTag<T>) -> bool {
|
||||
other.tag.eq(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: PartialEq<Cow<'a, str>>> PartialEq<LanguageTag<T>> for Cow<'a, str> {
|
||||
#[inline]
|
||||
fn eq(&self, other: &LanguageTag<T>) -> bool {
|
||||
other.tag.eq(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Eq> Eq for LanguageTag<T> {}
|
||||
|
||||
impl<T: Hash> Hash for LanguageTag<T> {
|
||||
#[inline]
|
||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||
self.tag.hash(state)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: PartialOrd> PartialOrd for LanguageTag<T> {
|
||||
#[inline]
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
self.tag.partial_cmp(&other.tag)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Ord> Ord for LanguageTag<T> {
|
||||
#[inline]
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
self.tag.cmp(&other.tag)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Deref<Target = str>> Deref for LanguageTag<T> {
|
||||
type Target = str;
|
||||
|
||||
#[inline]
|
||||
fn deref(&self) -> &str {
|
||||
self.tag.deref()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: AsRef<str>> AsRef<str> for LanguageTag<T> {
|
||||
#[inline]
|
||||
fn as_ref(&self) -> &str {
|
||||
self.tag.as_ref()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Borrow<str>> Borrow<str> for LanguageTag<T> {
|
||||
#[inline]
|
||||
fn borrow(&self) -> &str {
|
||||
self.tag.borrow()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: fmt::Debug> fmt::Debug for LanguageTag<T> {
|
||||
#[inline]
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
self.tag.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: fmt::Display> fmt::Display for LanguageTag<T> {
|
||||
#[inline]
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
self.tag.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for LanguageTag<String> {
|
||||
type Err = LanguageTagParseError;
|
||||
|
||||
#[inline]
|
||||
fn from_str(tag: &str) -> Result<Self, LanguageTagParseError> {
|
||||
Self::parse_and_normalize(tag)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<LanguageTag<&'a str>> for LanguageTag<String> {
|
||||
#[inline]
|
||||
fn from(tag: LanguageTag<&'a str>) -> Self {
|
||||
Self {
|
||||
tag: tag.tag.into(),
|
||||
positions: tag.positions,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<LanguageTag<Cow<'a, str>>> for LanguageTag<String> {
|
||||
#[inline]
|
||||
fn from(tag: LanguageTag<Cow<'a, str>>) -> Self {
|
||||
Self {
|
||||
tag: tag.tag.into(),
|
||||
positions: tag.positions,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<LanguageTag<Box<str>>> for LanguageTag<String> {
|
||||
#[inline]
|
||||
fn from(tag: LanguageTag<Box<str>>) -> Self {
|
||||
Self {
|
||||
tag: tag.tag.into(),
|
||||
positions: tag.positions,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<LanguageTag<&'a str>> for LanguageTag<Cow<'a, str>> {
|
||||
#[inline]
|
||||
fn from(tag: LanguageTag<&'a str>) -> Self {
|
||||
Self {
|
||||
tag: tag.tag.into(),
|
||||
positions: tag.positions,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<LanguageTag<String>> for LanguageTag<Cow<'a, str>> {
|
||||
#[inline]
|
||||
fn from(tag: LanguageTag<String>) -> Self {
|
||||
Self {
|
||||
tag: tag.tag.into(),
|
||||
positions: tag.positions,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
impl<T: Serialize> Serialize for LanguageTag<T> {
|
||||
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
|
||||
self.tag.serialize(serializer)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
impl<'de, T: Deref<Target = str> + Deserialize<'de>> Deserialize<'de> for LanguageTag<T> {
|
||||
fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<LanguageTag<T>, D::Error> {
|
||||
use serde::de::Error;
|
||||
|
||||
Self::parse(T::deserialize(deserializer)?).map_err(D::Error::custom)
|
||||
}
|
||||
}
|
||||
|
||||
/// An error raised during [`LanguageTag`](struct.LanguageTag.html) validation.
|
||||
#[derive(Debug)]
|
||||
pub struct LanguageTagParseError {
|
||||
kind: TagParseErrorKind,
|
||||
}
|
||||
|
||||
impl fmt::Display for LanguageTagParseError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self.kind {
|
||||
TagParseErrorKind::EmptyExtension => {
|
||||
write!(f, "If an extension subtag is present, it must not be empty")
|
||||
}
|
||||
TagParseErrorKind::EmptyPrivateUse => {
|
||||
write!(f, "If the `x` subtag is present, it must not be empty")
|
||||
}
|
||||
TagParseErrorKind::ForbiddenChar => {
|
||||
write!(f, "The langtag contains a char not allowed")
|
||||
}
|
||||
TagParseErrorKind::InvalidSubtag => write!(
|
||||
f,
|
||||
"A subtag fails to parse, it does not match any other subtags"
|
||||
),
|
||||
TagParseErrorKind::InvalidLanguage => write!(f, "The given language subtag is invalid"),
|
||||
TagParseErrorKind::SubtagTooLong => {
|
||||
write!(f, "A subtag may be eight characters in length at maximum")
|
||||
}
|
||||
TagParseErrorKind::EmptySubtag => write!(f, "A subtag should not be empty"),
|
||||
TagParseErrorKind::TooManyExtlangs => {
|
||||
write!(f, "At maximum three extlangs are allowed")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for LanguageTagParseError {}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum TagParseErrorKind {
|
||||
/// If an extension subtag is present, it must not be empty.
|
||||
EmptyExtension,
|
||||
/// If the `x` subtag is present, it must not be empty.
|
||||
EmptyPrivateUse,
|
||||
/// The langtag contains a char that is not A-Z, a-z, 0-9 or the dash.
|
||||
ForbiddenChar,
|
||||
/// A subtag fails to parse, it does not match any other subtags.
|
||||
InvalidSubtag,
|
||||
/// The given language subtag is invalid.
|
||||
InvalidLanguage,
|
||||
/// A subtag may be eight characters in length at maximum.
|
||||
SubtagTooLong,
|
||||
/// A subtag should not be empty.
|
||||
EmptySubtag,
|
||||
/// At maximum three extlangs are allowed, but zero to one extlangs are preferred.
|
||||
TooManyExtlangs,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
struct TagElementsPositions {
|
||||
language_end: usize,
|
||||
extlang_end: usize,
|
||||
script_end: usize,
|
||||
region_end: usize,
|
||||
variant_end: usize,
|
||||
extension_end: usize,
|
||||
}
|
||||
|
||||
trait OutputBuffer: Extend<char> {
|
||||
fn push(&mut self, c: char);
|
||||
|
||||
fn push_str(&mut self, s: &str);
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct VoidOutputBuffer {}
|
||||
|
||||
impl OutputBuffer for VoidOutputBuffer {
|
||||
#[inline]
|
||||
fn push(&mut self, _: char) {}
|
||||
|
||||
#[inline]
|
||||
fn push_str(&mut self, _: &str) {}
|
||||
}
|
||||
|
||||
impl Extend<char> for VoidOutputBuffer {
|
||||
#[inline]
|
||||
fn extend<T: IntoIterator<Item = char>>(&mut self, _: T) {}
|
||||
}
|
||||
|
||||
impl OutputBuffer for String {
|
||||
#[inline]
|
||||
fn push(&mut self, c: char) {
|
||||
self.push(c);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn push_str(&mut self, s: &str) {
|
||||
self.push_str(s);
|
||||
}
|
||||
}
|
||||
|
||||
/// Parses language tag following [the RFC5646 grammar](https://tools.ietf.org/html/rfc5646#section-2.1)
|
||||
fn parse_language_tag(
|
||||
input: &str,
|
||||
output: &mut impl OutputBuffer,
|
||||
) -> Result<TagElementsPositions, LanguageTagParseError> {
|
||||
//grandfathered tags
|
||||
if let Some(tag) = GRANDFATHEREDS
|
||||
.iter()
|
||||
.find(|record| record.eq_ignore_ascii_case(input))
|
||||
{
|
||||
output.push_str(tag);
|
||||
Ok(TagElementsPositions {
|
||||
language_end: tag.len(),
|
||||
extlang_end: tag.len(),
|
||||
script_end: tag.len(),
|
||||
region_end: tag.len(),
|
||||
variant_end: tag.len(),
|
||||
extension_end: tag.len(),
|
||||
})
|
||||
} else if input.starts_with("x-") || input.starts_with("X-") {
|
||||
// private use
|
||||
if !is_alphanumeric_or_dash(input) {
|
||||
Err(LanguageTagParseError {
|
||||
kind: TagParseErrorKind::ForbiddenChar,
|
||||
})
|
||||
} else if input.len() == 2 {
|
||||
Err(LanguageTagParseError {
|
||||
kind: TagParseErrorKind::EmptyPrivateUse,
|
||||
})
|
||||
} else {
|
||||
output.extend(input.chars().map(|c| c.to_ascii_lowercase()));
|
||||
Ok(TagElementsPositions {
|
||||
language_end: input.len(),
|
||||
extlang_end: input.len(),
|
||||
script_end: input.len(),
|
||||
region_end: input.len(),
|
||||
variant_end: input.len(),
|
||||
extension_end: input.len(),
|
||||
})
|
||||
}
|
||||
} else {
|
||||
parse_langtag(input, output)
|
||||
}
|
||||
}
|
||||
|
||||
/// Handles normal tags.
|
||||
fn parse_langtag(
|
||||
input: &str,
|
||||
output: &mut impl OutputBuffer,
|
||||
) -> Result<TagElementsPositions, LanguageTagParseError> {
|
||||
#[derive(PartialEq, Eq)]
|
||||
enum State {
|
||||
Start,
|
||||
AfterLanguage,
|
||||
AfterExtLang,
|
||||
AfterScript,
|
||||
AfterRegion,
|
||||
InExtension { expected: bool },
|
||||
InPrivateUse { expected: bool },
|
||||
}
|
||||
|
||||
let mut state = State::Start;
|
||||
let mut language_end = 0;
|
||||
let mut extlang_end = 0;
|
||||
let mut script_end = 0;
|
||||
let mut region_end = 0;
|
||||
let mut variant_end = 0;
|
||||
let mut extension_end = 0;
|
||||
let mut extlangs_count = 0;
|
||||
for (subtag, end) in SubTagIterator::new(input) {
|
||||
if subtag.is_empty() {
|
||||
return Err(LanguageTagParseError {
|
||||
kind: TagParseErrorKind::EmptySubtag,
|
||||
});
|
||||
}
|
||||
if subtag.len() > 8 {
|
||||
return Err(LanguageTagParseError {
|
||||
kind: TagParseErrorKind::SubtagTooLong,
|
||||
});
|
||||
}
|
||||
if state == State::Start {
|
||||
// Primary language
|
||||
if subtag.len() < 2 || !is_alphabetic(subtag) {
|
||||
return Err(LanguageTagParseError {
|
||||
kind: TagParseErrorKind::InvalidLanguage,
|
||||
});
|
||||
}
|
||||
language_end = end;
|
||||
output.extend(to_lowercase(subtag));
|
||||
if subtag.len() < 4 {
|
||||
// extlangs are only allowed for short language tags
|
||||
state = State::AfterLanguage;
|
||||
} else {
|
||||
state = State::AfterExtLang;
|
||||
}
|
||||
} else if let State::InPrivateUse { .. } = state {
|
||||
if !is_alphanumeric(subtag) {
|
||||
return Err(LanguageTagParseError {
|
||||
kind: TagParseErrorKind::InvalidSubtag,
|
||||
});
|
||||
}
|
||||
output.push('-');
|
||||
output.extend(to_lowercase(subtag));
|
||||
state = State::InPrivateUse { expected: false };
|
||||
} else if subtag == "x" || subtag == "X" {
|
||||
// We make sure extension is found
|
||||
if let State::InExtension { expected: true } = state {
|
||||
return Err(LanguageTagParseError {
|
||||
kind: TagParseErrorKind::EmptyExtension,
|
||||
});
|
||||
}
|
||||
output.push('-');
|
||||
output.push('x');
|
||||
state = State::InPrivateUse { expected: true };
|
||||
} else if subtag.len() == 1 && is_alphanumeric(subtag) {
|
||||
// We make sure extension is found
|
||||
if let State::InExtension { expected: true } = state {
|
||||
return Err(LanguageTagParseError {
|
||||
kind: TagParseErrorKind::EmptyExtension,
|
||||
});
|
||||
}
|
||||
let extension_tag = subtag.chars().next().unwrap().to_ascii_lowercase();
|
||||
output.push('-');
|
||||
output.push(extension_tag);
|
||||
state = State::InExtension { expected: true };
|
||||
} else if let State::InExtension { .. } = state {
|
||||
if !is_alphanumeric(subtag) {
|
||||
return Err(LanguageTagParseError {
|
||||
kind: TagParseErrorKind::InvalidSubtag,
|
||||
});
|
||||
}
|
||||
extension_end = end;
|
||||
output.push('-');
|
||||
output.extend(to_lowercase(subtag));
|
||||
state = State::InExtension { expected: false };
|
||||
} else if state == State::AfterLanguage && subtag.len() == 3 && is_alphabetic(subtag) {
|
||||
extlangs_count += 1;
|
||||
if extlangs_count > 3 {
|
||||
return Err(LanguageTagParseError {
|
||||
kind: TagParseErrorKind::TooManyExtlangs,
|
||||
});
|
||||
}
|
||||
// valid extlangs
|
||||
extlang_end = end;
|
||||
output.push('-');
|
||||
output.extend(to_lowercase(subtag));
|
||||
} else if (state == State::AfterLanguage || state == State::AfterExtLang)
|
||||
&& subtag.len() == 4
|
||||
&& is_alphabetic(subtag)
|
||||
{
|
||||
// Script
|
||||
script_end = end;
|
||||
output.push('-');
|
||||
output.extend(to_uppercase_first(subtag));
|
||||
state = State::AfterScript;
|
||||
} else if (state == State::AfterLanguage
|
||||
|| state == State::AfterExtLang
|
||||
|| state == State::AfterScript)
|
||||
&& (subtag.len() == 2 && is_alphabetic(subtag)
|
||||
|| subtag.len() == 3 && is_numeric(subtag))
|
||||
{
|
||||
// Region
|
||||
region_end = end;
|
||||
output.push('-');
|
||||
output.extend(to_uppercase(subtag));
|
||||
state = State::AfterRegion;
|
||||
} else if (state == State::AfterLanguage
|
||||
|| state == State::AfterExtLang
|
||||
|| state == State::AfterScript
|
||||
|| state == State::AfterRegion)
|
||||
&& is_alphanumeric(subtag)
|
||||
&& (subtag.len() >= 5 && is_alphabetic(&subtag[0..1])
|
||||
|| subtag.len() >= 4 && is_numeric(&subtag[0..1]))
|
||||
{
|
||||
// Variant
|
||||
variant_end = end;
|
||||
output.push('-');
|
||||
output.extend(to_lowercase(subtag));
|
||||
state = State::AfterRegion;
|
||||
} else {
|
||||
return Err(LanguageTagParseError {
|
||||
kind: TagParseErrorKind::InvalidSubtag,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
//We make sure we are in a correct final state
|
||||
if let State::InExtension { expected: true } = state {
|
||||
return Err(LanguageTagParseError {
|
||||
kind: TagParseErrorKind::EmptyExtension,
|
||||
});
|
||||
}
|
||||
if let State::InPrivateUse { expected: true } = state {
|
||||
return Err(LanguageTagParseError {
|
||||
kind: TagParseErrorKind::EmptyPrivateUse,
|
||||
});
|
||||
}
|
||||
|
||||
//We make sure we have not skipped anyone
|
||||
if extlang_end < language_end {
|
||||
extlang_end = language_end;
|
||||
}
|
||||
if script_end < extlang_end {
|
||||
script_end = extlang_end;
|
||||
}
|
||||
if region_end < script_end {
|
||||
region_end = script_end;
|
||||
}
|
||||
if variant_end < region_end {
|
||||
variant_end = region_end;
|
||||
}
|
||||
if extension_end < variant_end {
|
||||
extension_end = variant_end;
|
||||
}
|
||||
|
||||
Ok(TagElementsPositions {
|
||||
language_end,
|
||||
extlang_end,
|
||||
script_end,
|
||||
region_end,
|
||||
variant_end,
|
||||
extension_end,
|
||||
})
|
||||
}
|
||||
|
||||
struct ExtensionsIterator<'a> {
|
||||
input: &'a str,
|
||||
}
|
||||
|
||||
impl<'a> ExtensionsIterator<'a> {
|
||||
fn new(input: &'a str) -> Self {
|
||||
Self { input }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for ExtensionsIterator<'a> {
|
||||
type Item = (char, &'a str);
|
||||
|
||||
fn next(&mut self) -> Option<(char, &'a str)> {
|
||||
let mut parts_iterator = self.input.split_terminator('-');
|
||||
let singleton = parts_iterator.next()?.chars().next().unwrap();
|
||||
let mut content_size: usize = 2;
|
||||
for part in parts_iterator {
|
||||
if part.len() == 1 {
|
||||
let content = &self.input[2..content_size - 1];
|
||||
self.input = &self.input[content_size..];
|
||||
return Some((singleton, content));
|
||||
} else {
|
||||
content_size += part.len() + 1;
|
||||
}
|
||||
}
|
||||
let result = self.input.get(2..).map(|content| (singleton, content));
|
||||
self.input = "";
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
struct SubTagIterator<'a> {
|
||||
split: Split<'a, char>,
|
||||
position: usize,
|
||||
}
|
||||
|
||||
impl<'a> SubTagIterator<'a> {
|
||||
#[inline]
|
||||
fn new(input: &'a str) -> Self {
|
||||
Self {
|
||||
split: input.split('-'),
|
||||
position: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for SubTagIterator<'a> {
|
||||
type Item = (&'a str, usize);
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<(&'a str, usize)> {
|
||||
let tag = self.split.next()?;
|
||||
let tag_end = self.position + tag.len();
|
||||
self.position = tag_end + 1;
|
||||
Some((tag, tag_end))
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_alphabetic(s: &str) -> bool {
|
||||
s.chars().all(|x| x.is_ascii_alphabetic())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_numeric(s: &str) -> bool {
|
||||
s.chars().all(|x| x.is_ascii_digit())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_alphanumeric(s: &str) -> bool {
|
||||
s.chars().all(|x| x.is_ascii_alphanumeric())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_alphanumeric_or_dash(s: &str) -> bool {
|
||||
s.chars().all(|x| x.is_ascii_alphanumeric() || x == '-')
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn to_uppercase(s: &str) -> impl Iterator<Item = char> + '_ {
|
||||
s.chars().map(|c| c.to_ascii_uppercase())
|
||||
}
|
||||
|
||||
// Beware: panics if s.len() == 0 (should never happen in our code)
|
||||
#[inline]
|
||||
fn to_uppercase_first(s: &str) -> impl Iterator<Item = char> + '_ {
|
||||
let mut chars = s.chars();
|
||||
once(chars.next().unwrap().to_ascii_uppercase()).chain(chars.map(|c| c.to_ascii_lowercase()))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn to_lowercase(s: &str) -> impl Iterator<Item = char> + '_ {
|
||||
s.chars().map(|c| c.to_ascii_lowercase())
|
||||
}
|
||||
|
||||
const GRANDFATHEREDS: [&str; 26] = [
|
||||
"art-lojban",
|
||||
"cel-gaulish",
|
||||
"en-GB-oed",
|
||||
"i-ami",
|
||||
"i-bnn",
|
||||
"i-default",
|
||||
"i-enochian",
|
||||
"i-hak",
|
||||
"i-klingon",
|
||||
"i-lux",
|
||||
"i-mingo",
|
||||
"i-navajo",
|
||||
"i-pwn",
|
||||
"i-tao",
|
||||
"i-tay",
|
||||
"i-tsu",
|
||||
"no-bok",
|
||||
"no-nyn",
|
||||
"sgn-BE-FR",
|
||||
"sgn-BE-NL",
|
||||
"sgn-CH-DE",
|
||||
"zh-guoyu",
|
||||
"zh-hakka",
|
||||
"zh-min",
|
||||
"zh-min-nan",
|
||||
"zh-xiang",
|
||||
];
|
722
third_party/rust/oxilangtag/tests/lib.rs
vendored
Normal file
722
third_party/rust/oxilangtag/tests/lib.rs
vendored
Normal file
@ -0,0 +1,722 @@
|
||||
use oxilangtag::LanguageTag;
|
||||
use serde_test::{assert_de_tokens, assert_de_tokens_error};
|
||||
#[cfg(feature = "serde")]
|
||||
use serde_test::{assert_tokens, Token};
|
||||
use std::collections::hash_map::DefaultHasher;
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
||||
// Tests from RFC 5646 2.1.1
|
||||
#[test]
|
||||
fn test_formatting() {
|
||||
assert_eq!(
|
||||
"mn-Cyrl-MN",
|
||||
LanguageTag::parse_and_normalize("mn-Cyrl-MN")
|
||||
.unwrap()
|
||||
.as_str()
|
||||
);
|
||||
assert_eq!(
|
||||
"mn-Cyrl-MN",
|
||||
LanguageTag::parse_and_normalize("MN-cYRL-mn")
|
||||
.unwrap()
|
||||
.as_str()
|
||||
);
|
||||
assert_eq!(
|
||||
"mn-Cyrl-MN",
|
||||
LanguageTag::parse_and_normalize("mN-cYrL-Mn")
|
||||
.unwrap()
|
||||
.as_str()
|
||||
);
|
||||
assert_eq!(
|
||||
"en-CA-x-ca",
|
||||
LanguageTag::parse_and_normalize("en-CA-x-ca")
|
||||
.unwrap()
|
||||
.as_str()
|
||||
);
|
||||
assert_eq!(
|
||||
"sgn-BE-FR",
|
||||
LanguageTag::parse_and_normalize("sgn-BE-FR")
|
||||
.unwrap()
|
||||
.as_str()
|
||||
);
|
||||
assert_eq!(
|
||||
"az-Latn-x-latn",
|
||||
LanguageTag::parse_and_normalize("az-Latn-x-latn")
|
||||
.unwrap()
|
||||
.as_str()
|
||||
);
|
||||
assert_eq!(
|
||||
"i-ami",
|
||||
LanguageTag::parse_and_normalize("i-ami").unwrap().as_str()
|
||||
);
|
||||
assert_eq!(
|
||||
"i-ami",
|
||||
LanguageTag::parse_and_normalize("I-AMI").unwrap().as_str()
|
||||
);
|
||||
assert_eq!(
|
||||
"sl-afb-Latn-005-nedis",
|
||||
LanguageTag::parse_and_normalize("SL-AFB-lATN-005-nEdis")
|
||||
.unwrap()
|
||||
.as_str()
|
||||
)
|
||||
}
|
||||
|
||||
// Tests from RFC 5646 2.2.1
|
||||
#[test]
|
||||
fn test_primary_language() {
|
||||
assert_eq!(
|
||||
"fr",
|
||||
LanguageTag::parse_and_normalize("fr")
|
||||
.unwrap()
|
||||
.primary_language()
|
||||
);
|
||||
assert_eq!(
|
||||
"de",
|
||||
LanguageTag::parse_and_normalize("de")
|
||||
.unwrap()
|
||||
.primary_language()
|
||||
);
|
||||
assert_eq!(
|
||||
"x-fr-ch",
|
||||
LanguageTag::parse_and_normalize("x-fr-CH")
|
||||
.unwrap()
|
||||
.primary_language()
|
||||
);
|
||||
assert_eq!(
|
||||
"i-klingon",
|
||||
LanguageTag::parse_and_normalize("i-klingon")
|
||||
.unwrap()
|
||||
.primary_language()
|
||||
);
|
||||
assert_eq!(
|
||||
"i-bnn",
|
||||
LanguageTag::parse_and_normalize("i-bnn")
|
||||
.unwrap()
|
||||
.primary_language()
|
||||
);
|
||||
assert_eq!(
|
||||
"zh-hakka",
|
||||
LanguageTag::parse_and_normalize("zh-hakka")
|
||||
.unwrap()
|
||||
.primary_language()
|
||||
)
|
||||
}
|
||||
|
||||
// Tests from RFC 5646 2.2.2
|
||||
#[test]
|
||||
fn test_extended_language() {
|
||||
fn parts(tag: &LanguageTag<String>) -> (&str, &str, Option<&str>, Vec<&str>) {
|
||||
(
|
||||
tag.full_language(),
|
||||
tag.primary_language(),
|
||||
tag.extended_language(),
|
||||
tag.extended_language_subtags().collect(),
|
||||
)
|
||||
}
|
||||
|
||||
assert_eq!(("zh", "zh", None, vec![]), parts(&"zh".parse().unwrap()));
|
||||
assert_eq!(
|
||||
("zh-gan", "zh", Some("gan"), vec!["gan"]),
|
||||
parts(&"zh-gan".parse().unwrap())
|
||||
);
|
||||
assert_eq!(
|
||||
("zh-gan-foo", "zh", Some("gan-foo"), vec!["gan", "foo"]),
|
||||
parts(&"zh-gan-foo".parse().unwrap())
|
||||
);
|
||||
assert_eq!(
|
||||
("zh-min-nan", "zh-min-nan", None, vec![]),
|
||||
parts(&"zh-min-nan".parse().unwrap())
|
||||
);
|
||||
assert_eq!(
|
||||
("i-tsu", "i-tsu", None, vec![]),
|
||||
parts(&"i-tsu".parse().unwrap())
|
||||
);
|
||||
assert_eq!(("zh", "zh", None, vec![]), parts(&"zh-CN".parse().unwrap()));
|
||||
assert_eq!(
|
||||
("zh-gan", "zh", Some("gan"), vec!["gan"]),
|
||||
parts(&"zh-gan-CN".parse().unwrap())
|
||||
);
|
||||
assert_eq!(
|
||||
("ar-afb", "ar", Some("afb"), vec!["afb"]),
|
||||
parts(&"ar-afb".parse().unwrap())
|
||||
);
|
||||
}
|
||||
|
||||
// Tests from RFC 5646 2.2.3
|
||||
#[test]
|
||||
fn test_script() {
|
||||
fn parts(tag: &LanguageTag<String>) -> (&str, Option<&str>) {
|
||||
(tag.primary_language(), tag.script())
|
||||
}
|
||||
|
||||
assert_eq!(("sr", Some("Latn")), parts(&"sr-Latn".parse().unwrap()));
|
||||
assert_eq!(("ar", Some("Latn")), parts(&"ar-afb-Latn".parse().unwrap()))
|
||||
}
|
||||
|
||||
// Tests from RFC 5646 2.2.4
|
||||
#[test]
|
||||
fn test_region() {
|
||||
fn parts(tag: &LanguageTag<String>) -> (&str, Option<&str>, Option<&str>) {
|
||||
(tag.primary_language(), tag.script(), tag.region())
|
||||
}
|
||||
|
||||
assert_eq!(("de", None, Some("AT")), parts(&"de-AT".parse().unwrap()));
|
||||
assert_eq!(
|
||||
("sr", Some("Latn"), Some("RS")),
|
||||
parts(&"sr-Latn-RS".parse().unwrap())
|
||||
);
|
||||
assert_eq!(("es", None, Some("419")), parts(&"es-419".parse().unwrap()));
|
||||
assert_eq!(("ar", None, Some("DE")), parts(&"ar-DE".parse().unwrap()));
|
||||
assert_eq!(("ar", None, Some("005")), parts(&"ar-005".parse().unwrap()));
|
||||
}
|
||||
|
||||
// Tests from RFC 5646 2.2.5
|
||||
#[test]
|
||||
fn test_variant() {
|
||||
fn parts(tag: &LanguageTag<String>) -> (&str, Option<&str>, Vec<&str>) {
|
||||
(
|
||||
tag.primary_language(),
|
||||
tag.variant(),
|
||||
tag.variant_subtags().collect(),
|
||||
)
|
||||
}
|
||||
|
||||
assert_eq!(("sl", None, vec![]), parts(&"sl".parse().unwrap()));
|
||||
assert_eq!(
|
||||
("sl", Some("nedis"), vec!["nedis"]),
|
||||
parts(&"sl-nedis".parse().unwrap())
|
||||
);
|
||||
assert_eq!(
|
||||
("de", Some("1996"), vec!["1996"]),
|
||||
parts(&"de-CH-1996".parse().unwrap())
|
||||
);
|
||||
assert_eq!(
|
||||
("art-lojban", None, vec![]),
|
||||
parts(&"art-lojban".parse().unwrap())
|
||||
);
|
||||
}
|
||||
|
||||
// Tests from RFC 5646 2.2.6
|
||||
#[test]
|
||||
fn test_extension() {
|
||||
fn parts(tag: &LanguageTag<String>) -> (&str, Option<&str>, Vec<(char, &str)>) {
|
||||
(
|
||||
tag.primary_language(),
|
||||
tag.extension(),
|
||||
tag.extension_subtags().collect(),
|
||||
)
|
||||
}
|
||||
|
||||
assert_eq!(("en", None, vec![]), parts(&"en".parse().unwrap()));
|
||||
assert_eq!(
|
||||
("en", Some("a-bbb"), vec![('a', "bbb")]),
|
||||
parts(&"en-a-bbb-x-a-ccc".parse().unwrap())
|
||||
);
|
||||
assert_eq!(
|
||||
(
|
||||
"en",
|
||||
Some("a-babble-b-warble"),
|
||||
vec![('a', "babble"), ('b', "warble")]
|
||||
),
|
||||
parts(&"en-a-babble-b-warble".parse().unwrap())
|
||||
);
|
||||
assert_eq!(
|
||||
("fr", Some("a-latn"), vec![('a', "latn")]),
|
||||
parts(&"fr-a-Latn".parse().unwrap())
|
||||
);
|
||||
assert_eq!(
|
||||
(
|
||||
"en",
|
||||
Some("r-extended-sequence"),
|
||||
vec![('r', "extended-sequence")]
|
||||
),
|
||||
parts(
|
||||
&"en-Latn-GB-boont-r-extended-sequence-x-private"
|
||||
.parse()
|
||||
.unwrap()
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
("en", Some("r-az-r-qt"), vec![('r', "az"), ('r', "qt")]),
|
||||
parts(&"en-r-az-r-qt".parse().unwrap())
|
||||
);
|
||||
assert_eq!(("i-tsu", None, vec![]), parts(&"i-tsu".parse().unwrap()));
|
||||
}
|
||||
|
||||
// Tests from RFC 5646 2.2.7
|
||||
#[test]
|
||||
fn test_privateuse() {
|
||||
fn parts(tag: &LanguageTag<String>) -> (&str, Option<&str>, Vec<&str>) {
|
||||
(
|
||||
tag.primary_language(),
|
||||
tag.private_use(),
|
||||
tag.private_use_subtags().collect(),
|
||||
)
|
||||
}
|
||||
|
||||
assert_eq!(("en", None, vec![]), parts(&"en".parse().unwrap()));
|
||||
assert_eq!(
|
||||
("en", Some("x-us"), vec!["us"]),
|
||||
parts(&"en-x-US".parse().unwrap())
|
||||
);
|
||||
assert_eq!(
|
||||
("el", Some("x-koine"), vec!["koine"]),
|
||||
parts(&"el-x-koine".parse().unwrap())
|
||||
);
|
||||
assert_eq!(
|
||||
("x-fr-ch", Some("x-fr-ch"), vec!["fr", "ch"]),
|
||||
parts(&"x-fr-ch".parse().unwrap())
|
||||
);
|
||||
assert_eq!(
|
||||
("es", Some("x-foobar-at-007"), vec!["foobar", "at", "007"]),
|
||||
parts(&"es-x-foobar-AT-007".parse().unwrap())
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fmt() {
|
||||
assert_eq!(
|
||||
"ar-arb-Latn-DE-nedis-foobar",
|
||||
LanguageTag::parse_and_normalize("ar-arb-Latn-DE-nedis-foobar")
|
||||
.unwrap()
|
||||
.as_str()
|
||||
);
|
||||
assert_eq!(
|
||||
"ar-arb-Latn-DE-nedis-foobar",
|
||||
LanguageTag::parse_and_normalize("ar-arb-latn-de-nedis-foobar")
|
||||
.unwrap()
|
||||
.as_str()
|
||||
);
|
||||
assert_eq!(
|
||||
"ar-arb-Latn-DE-nedis-foobar",
|
||||
LanguageTag::parse_and_normalize("AR-ARB-LATN-DE-NEDIS-FOOBAR")
|
||||
.unwrap()
|
||||
.as_str()
|
||||
);
|
||||
assert_eq!(
|
||||
"xx-z-foo-a-bar-f-spam-b-eggs",
|
||||
LanguageTag::parse_and_normalize("xx-z-foo-a-bar-F-spam-b-eggs")
|
||||
.unwrap()
|
||||
.as_str()
|
||||
);
|
||||
assert_eq!(
|
||||
"hkgnmerm-x-e5-zf-vddjcpz-1v6",
|
||||
LanguageTag::parse_and_normalize("HkgnmerM-x-e5-zf-VdDjcpz-1V6")
|
||||
.unwrap()
|
||||
.to_string()
|
||||
);
|
||||
assert_eq!(
|
||||
"mgxqa-Ywep-8lcw-7bvt-h-dp1md-0h7-0z3ir",
|
||||
LanguageTag::parse_and_normalize("MgxQa-ywEp-8lcW-7bvT-h-dP1Md-0h7-0Z3ir")
|
||||
.unwrap()
|
||||
.as_str()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_unicode() {
|
||||
assert!(LanguageTag::parse("zh-x-Üńìcødê").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cmp() {
|
||||
assert_eq!(
|
||||
LanguageTag::parse_and_normalize("dE-AraB-lY").unwrap(),
|
||||
LanguageTag::parse_and_normalize("DE-aRaB-LY").unwrap()
|
||||
);
|
||||
assert_ne!(
|
||||
LanguageTag::parse_and_normalize("zh").unwrap(),
|
||||
LanguageTag::parse_and_normalize("zh-Latn").unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
// http://www.langtag.net/test-suites/well-formed-tags.txt
|
||||
#[test]
|
||||
fn test_wellformed_tags() {
|
||||
let tags = vec![
|
||||
"fr",
|
||||
"fr-Latn",
|
||||
"fr-fra", // Extended tag
|
||||
"fr-Latn-FR",
|
||||
"fr-Latn-419",
|
||||
"fr-FR",
|
||||
"ax-TZ", // Not in the registry, but well-formed
|
||||
"fr-shadok", // Variant
|
||||
"fr-y-myext-myext2",
|
||||
"fra-Latn", // ISO 639 can be 3-letters
|
||||
"fra",
|
||||
"fra-FX",
|
||||
"i-klingon", // grandfathered with singleton
|
||||
"I-kLINgon", // tags are case-insensitive...
|
||||
"no-bok", // grandfathered without singleton
|
||||
"fr-Lat", // Extended",
|
||||
"mn-Cyrl-MN",
|
||||
"mN-cYrL-Mn",
|
||||
"fr-Latn-CA",
|
||||
"en-US",
|
||||
"fr-Latn-CA",
|
||||
"i-enochian", // Grand fathered
|
||||
"x-fr-CH",
|
||||
"sr-Latn-CS",
|
||||
"es-419",
|
||||
"sl-nedis",
|
||||
"de-CH-1996",
|
||||
"de-Latg-1996",
|
||||
"sl-IT-nedis",
|
||||
"en-a-bbb-x-a-ccc",
|
||||
"de-a-value",
|
||||
"en-Latn-GB-boont-r-extended-sequence-x-private",
|
||||
"en-x-US",
|
||||
"az-Arab-x-AZE-derbend",
|
||||
"es-Latn-CO-x-private",
|
||||
"en-US-boont",
|
||||
"ab-x-abc-x-abc", // anything goes after x
|
||||
"ab-x-abc-a-a", // ditto",
|
||||
"i-default", // grandfathered",
|
||||
"i-klingon", // grandfathered",
|
||||
"abcd-Latn", // Language of 4 chars reserved for future use
|
||||
"AaBbCcDd-x-y-any-x", // Language of 5-8 chars, registered
|
||||
"en",
|
||||
"de-AT",
|
||||
"es-419",
|
||||
"de-CH-1901",
|
||||
"sr-Cyrl",
|
||||
"sr-Cyrl-CS",
|
||||
"sl-Latn-IT-rozaj",
|
||||
"en-US-x-twain",
|
||||
"zh-cmn",
|
||||
"zh-cmn-Hant",
|
||||
"zh-cmn-Hant-HK",
|
||||
"zh-gan",
|
||||
"zh-yue-Hant-HK",
|
||||
"xr-lxs-qut", // extlangS
|
||||
"xr-lqt-qu", // extlang + region
|
||||
"xr-p-lze", // Extension
|
||||
];
|
||||
for tag in tags {
|
||||
let result = LanguageTag::parse(tag);
|
||||
assert!(
|
||||
result.is_ok(),
|
||||
"{} should be considered well-formed but returned error {}",
|
||||
tag,
|
||||
result.err().unwrap()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// http://www.langtag.net/test-suites/broken-tags.txt
|
||||
#[test]
|
||||
fn test_broken_tags() {
|
||||
let tags = vec![
|
||||
"",
|
||||
"f",
|
||||
"f-Latn",
|
||||
"fr-Latn-F",
|
||||
"a-value",
|
||||
"tlh-a-b-foo",
|
||||
"i-notexist", // grandfathered but not registered: always invalid
|
||||
"abcdefghi-012345678",
|
||||
"ab-abc-abc-abc-abc",
|
||||
"ab-abcd-abc",
|
||||
"ab-ab-abc",
|
||||
"ab-123-abc",
|
||||
"a-Hant-ZH",
|
||||
"a1-Hant-ZH",
|
||||
"ab-abcde-abc",
|
||||
"ab-1abc-abc",
|
||||
"ab-ab-abcd",
|
||||
"ab-123-abcd",
|
||||
"ab-abcde-abcd",
|
||||
"ab-1abc-abcd",
|
||||
"ab-a-b",
|
||||
"ab-a-x",
|
||||
"ab--ab",
|
||||
"ab-abc-",
|
||||
"-ab-abc",
|
||||
"abcd-efg",
|
||||
"aabbccddE",
|
||||
];
|
||||
for tag in tags {
|
||||
let result = LanguageTag::parse(tag);
|
||||
assert!(
|
||||
result.is_err(),
|
||||
"{} should be considered not well-formed but returned result {:?}",
|
||||
tag,
|
||||
result.ok().unwrap()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_random_good_tags() {
|
||||
// http://unicode.org/repos/cldr/trunk/tools/java/org/unicode/cldr/util/data/langtagTest.txt
|
||||
let tags = vec![
|
||||
"zszLDm-sCVS-es-x-gn762vG-83-S-mlL",
|
||||
"IIJdFI-cfZv",
|
||||
"kbAxSgJ-685",
|
||||
"tbutP",
|
||||
"hDL-595",
|
||||
"dUf-iUjq-0hJ4P-5YkF-WD8fk",
|
||||
"FZAABA-FH",
|
||||
"xZ-lh-4QfM5z9J-1eG4-x-K-R6VPr2z",
|
||||
"Fyi",
|
||||
"SeI-DbaG",
|
||||
"ch-xwFn",
|
||||
"OeC-GPVI",
|
||||
"JLzvUSi",
|
||||
"Fxh-hLAs",
|
||||
"pKHzCP-sgaO-554",
|
||||
"eytqeW-hfgH-uQ",
|
||||
"ydn-zeOP-PR",
|
||||
"uoWmBM-yHCf-JE",
|
||||
"xwYem",
|
||||
"zie",
|
||||
"Re-wjSv-Ey-i-XE-E-JjWTEB8-f-DLSH-NVzLH-AtnFGWoH-SIDE",
|
||||
"Ri-063-c-u6v-ZfhkToTB-C-IFfmv-XT-j-rdyYFMhK-h-pY-D5-Oh6FqBhL-hcXt-v-WdpNx71-\
|
||||
K-c74m4-eBTT7-JdH7Q1Z",
|
||||
"ji",
|
||||
"IM-487",
|
||||
"EPZ-zwcB",
|
||||
"GauwEcwo",
|
||||
"kDEP",
|
||||
"FwDYt-TNvo",
|
||||
"ottqP-KLES-x-9-i9",
|
||||
"fcflR-grQQ",
|
||||
"TvFwdu-kYhs",
|
||||
"WE-336",
|
||||
"MgxQa-ywEp-8lcW-7bvT-h-dP1Md-0h7-0Z3ir-K-Srkm-kA-7LXM-Z-whb2MiO-2mNsvbLm-W3O\
|
||||
-4r-U-KceIxHdI-gvMVgUBV-2uRUni-J0-7C8yTK2",
|
||||
"Hyr-B-evMtVoB1-mtsVZf-vQMV-gM-I-rr-kvLzg-f-lAUK-Qb36Ne-Z-7eFzOD-mv6kKf-l-miZ\
|
||||
7U3-k-XDGtNQG",
|
||||
"ybrlCpzy",
|
||||
"PTow-w-cAQ51-8Xd6E-cumicgt-WpkZv3NY-q-ORYPRy-v-A4jL4A-iNEqQZZ-sjKn-W-N1F-pzy\
|
||||
c-xP5eWz-LmsCiCcZ",
|
||||
"ih-DlPR-PE",
|
||||
"Krf-362",
|
||||
"WzaD",
|
||||
"EPaOnB-gHHn",
|
||||
"XYta",
|
||||
"NZ-RgOO-tR",
|
||||
"at-FE",
|
||||
"Tpc-693",
|
||||
"YFp",
|
||||
"gRQrQULo",
|
||||
"pVomZ-585",
|
||||
"laSu-ZcAq-338",
|
||||
"gCW",
|
||||
"PydSwHRI-TYfF",
|
||||
"zKmWDD",
|
||||
"X-bCrL5RL",
|
||||
"HK",
|
||||
"YMKGcLY",
|
||||
"GDJ-nHYa-bw-X-ke-rohH5GfS-LdJKsGVe",
|
||||
"tfOxdau-yjge-489-a-oB-I8Csb-1ESaK1v-VFNz-N-FT-ZQyn-On2-I-hu-vaW3-jIQb-vg0U-h\
|
||||
Ul-h-dO6KuJqB-U-tde2L-P3gHUY-vnl5c-RyO-H-gK1-zDPu-VF1oeh8W-kGzzvBbW-yuAJZ",
|
||||
"LwDux",
|
||||
"Zl-072",
|
||||
"Ri-Ar",
|
||||
"vocMSwo-cJnr-288",
|
||||
"kUWq-gWfQ-794",
|
||||
"YyzqKL-273",
|
||||
"Xrw-ZHwH-841-9foT-ESSZF-6OqO-0knk-991U-9p3m-b-JhiV-0Kq7Y-h-cxphLb-cDlXUBOQ-X\
|
||||
-4Ti-jty94yPp",
|
||||
"en-GB-oed",
|
||||
"LEuZl-so",
|
||||
"HyvBvFi-cCAl-X-irMQA-Pzt-H",
|
||||
"uDbsrAA-304",
|
||||
"wTS",
|
||||
"IWXS",
|
||||
"XvDqNkSn-jRDR",
|
||||
"gX-Ycbb-iLphEks-AQ1aJ5",
|
||||
"FbSBz-VLcR-VL",
|
||||
"JYoVQOP-Iytp",
|
||||
"gDSoDGD-lq-v-7aFec-ag-k-Z4-0kgNxXC-7h",
|
||||
"Bjvoayy-029",
|
||||
"qSDJd",
|
||||
"qpbQov",
|
||||
"fYIll-516",
|
||||
"GfgLyfWE-EHtB",
|
||||
"Wc-ZMtk",
|
||||
"cgh-VEYK",
|
||||
"WRZs-AaFd-yQ",
|
||||
"eSb-CpsZ-788",
|
||||
"YVwFU",
|
||||
"JSsHiQhr-MpjT-381",
|
||||
"LuhtJIQi-JKYt",
|
||||
"vVTvS-RHcP",
|
||||
"SY",
|
||||
"fSf-EgvQfI-ktWoG-8X5z-63PW",
|
||||
"NOKcy",
|
||||
"OjJb-550",
|
||||
"KB",
|
||||
"qzKBv-zDKk-589",
|
||||
"Jr",
|
||||
"Acw-GPXf-088",
|
||||
"WAFSbos",
|
||||
"HkgnmerM-x-e5-zf-VdDjcpz-1V6",
|
||||
"UAfYflJU-uXDc-YV",
|
||||
"x-CHsHx-VDcOUAur-FqagDTx-H-V0e74R",
|
||||
"uZIAZ-Xmbh-pd",
|
||||
];
|
||||
for tag in tags {
|
||||
let result = LanguageTag::parse(tag);
|
||||
assert!(
|
||||
result.is_ok(),
|
||||
"{} should be considered well-formed but returned error {}",
|
||||
tag,
|
||||
result.err().unwrap()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_random_bad_tags() {
|
||||
// http://unicode.org/repos/cldr/trunk/tools/java/org/unicode/cldr/util/data/langtagTest.txt
|
||||
let tags = vec![
|
||||
"EdY-z_H791Xx6_m_kj",
|
||||
"qWt85_8S0-L_rbBDq0gl_m_O_zsAx_nRS",
|
||||
"VzyL2",
|
||||
"T_VFJq-L-0JWuH_u2_VW-hK-kbE",
|
||||
"u-t",
|
||||
"Q-f_ZVJXyc-doj_k-i",
|
||||
"JWB7gNa_K-5GB-25t_W-s-ZbGVwDu1-H3E",
|
||||
"b-2T-Qob_L-C9v_2CZxK86",
|
||||
"fQTpX_0_4Vg_L3L_g7VtALh2",
|
||||
"S-Z-E_J",
|
||||
"f6wsq-02_i-F",
|
||||
"9_GcUPq_G",
|
||||
"QjsIy_9-0-7_Dv2yPV09_D-JXWXM",
|
||||
"D_se-f-k",
|
||||
"ON47Wv1_2_W",
|
||||
"f-z-R_s-ha",
|
||||
"N3APeiw_195_Bx2-mM-pf-Z-Ip5lXWa-5r",
|
||||
"IRjxU-E_6kS_D_b1b_H",
|
||||
"NB-3-5-AyW_FQ-9hB-TrRJg3JV_3C",
|
||||
"yF-3a_V_FoJQAHeL_Z-Mc-u",
|
||||
"n_w_bbunOG_1-s-tJMT5je",
|
||||
"Q-AEWE_X",
|
||||
"57b1O_k_R6MU_sb",
|
||||
"hK_65J_i-o_SI-Y",
|
||||
"wB4B7u_5I2_I_NZPI",
|
||||
"J24Nb_q_d-zE",
|
||||
"v6-dHjJmvPS_IEb-x_A-O-i",
|
||||
"8_8_dl-ZgBr84u-P-E",
|
||||
"nIn-xD7EVhe_C",
|
||||
"5_N-6P_x7Of_Lo_6_YX_R",
|
||||
"0_46Oo0sZ-YNwiU8Wr_d-M-pg1OriV",
|
||||
"laiY-5",
|
||||
"K-8Mdd-j_ila0sSpo_aO8_J",
|
||||
"wNATtSL-Cp4_gPa_fD41_9z",
|
||||
"H_FGz5V8_n6rrcoz0_1O6d-kH-7-N",
|
||||
"wDOrnHU-odqJ_vWl",
|
||||
"gP_qO-I-jH",
|
||||
"h",
|
||||
"dJ0hX-o_csBykEhU-F",
|
||||
"L-Vf7_BV_eRJ5goSF_Kp",
|
||||
"y-oF-chnavU-H",
|
||||
"9FkG-8Q-8_v",
|
||||
"W_l_NDQqI-O_SFSAOVq",
|
||||
"kDG3fzXw",
|
||||
"t-nsSp-7-t-mUK2",
|
||||
"Yw-F",
|
||||
"1-S_3_l",
|
||||
"u-v_brn-Y",
|
||||
"4_ft_3ZPZC5lA_D",
|
||||
"n_dR-QodsqJnh_e",
|
||||
"Hwvt-bSwZwj_KL-hxg0m-3_hUG",
|
||||
"mQHzvcV-UL-o2O_1KhUJQo_G2_uryk3-a",
|
||||
"b-UTn33HF",
|
||||
"r-Ep-jY-aFM_N_H",
|
||||
"K-k-krEZ0gwD_k_ua-9dm3Oy-s_v",
|
||||
"XS_oS-p",
|
||||
"EIx_h-zf5",
|
||||
"p_z-0_i-omQCo3B",
|
||||
"1_q0N_jo_9",
|
||||
"0Ai-6-S",
|
||||
"L-LZEp_HtW",
|
||||
"Zj-A4JD_2A5Aj7_b-m3",
|
||||
"x",
|
||||
"p-qPuXQpp_d-jeKifB-c-7_G-X",
|
||||
"X94cvJ_A",
|
||||
"F2D25R_qk_W-w_Okf_kx",
|
||||
"rc-f",
|
||||
"D",
|
||||
"gD_WrDfxmF-wu-E-U4t",
|
||||
"Z_BN9O4_D9-D_0E_KnCwZF-84b-19",
|
||||
"T-8_g-u-0_E",
|
||||
"lXTtys9j_X_A_m-vtNiNMw_X_b-C6Nr",
|
||||
"V_Ps-4Y-S",
|
||||
"X5wGEA",
|
||||
"mIbHFf_ALu4_Jo1Z1",
|
||||
"ET-TacYx_c",
|
||||
"Z-Lm5cAP_ri88-d_q_fi8-x",
|
||||
"rTi2ah-4j_j_4AlxTs6m_8-g9zqncIf-N5",
|
||||
"FBaLB85_u-0NxhAy-ZU_9c",
|
||||
"x_j_l-5_aV95_s_tY_jp4",
|
||||
"PL768_D-m7jNWjfD-Nl_7qvb_bs_8_Vg",
|
||||
"9-yOc-gbh",
|
||||
"6DYxZ_SL-S_Ye",
|
||||
"ZCa-U-muib-6-d-f_oEh_O",
|
||||
"Qt-S-o8340F_f_aGax-c-jbV0gfK_p",
|
||||
"WE_SzOI_OGuoBDk-gDp",
|
||||
"cs-Y_9",
|
||||
"m1_uj",
|
||||
"Y-ob_PT",
|
||||
"li-B",
|
||||
"f-2-7-9m_f8den_J_T_d",
|
||||
"p-Os0dua-H_o-u",
|
||||
"L",
|
||||
"rby-w",
|
||||
];
|
||||
for tag in tags {
|
||||
let result = LanguageTag::parse(tag);
|
||||
assert!(
|
||||
result.is_err(),
|
||||
"{} should be considered not well-formed but returned result {:?}",
|
||||
tag,
|
||||
result.ok().unwrap()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_eq() {
|
||||
let tag = LanguageTag::parse("en-fr").unwrap();
|
||||
assert_eq!(tag, "en-fr");
|
||||
assert_ne!(tag, "en-FR");
|
||||
assert_eq!("en-fr", tag);
|
||||
assert_eq!(hash(&tag), hash("en-fr"));
|
||||
assert_ne!(hash(&tag), hash("en-FR"));
|
||||
}
|
||||
|
||||
fn hash(value: impl Hash) -> u64 {
|
||||
let mut hasher = DefaultHasher::new();
|
||||
value.hash(&mut hasher);
|
||||
hasher.finish()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_str() {
|
||||
let tag = LanguageTag::parse("en-fr").unwrap();
|
||||
assert!(tag.starts_with("en-"));
|
||||
}
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
#[test]
|
||||
fn test_serd_impl() {
|
||||
assert_tokens(
|
||||
&LanguageTag::parse("en-us").unwrap(),
|
||||
&[Token::BorrowedStr("en-us")],
|
||||
);
|
||||
assert_tokens(
|
||||
&LanguageTag::parse("en-US".to_string()).unwrap(),
|
||||
&[Token::String("en-US")],
|
||||
);
|
||||
assert_de_tokens(
|
||||
&LanguageTag::parse("en-US".to_string()).unwrap(),
|
||||
&[Token::BorrowedStr("en-US")],
|
||||
);
|
||||
assert_de_tokens_error::<LanguageTag<String>>(
|
||||
&[Token::String("verybadvalue")],
|
||||
"A subtag may be eight characters in length at maximum",
|
||||
);
|
||||
}
|
@ -56,6 +56,7 @@ unic-langid = { version = "0.9", features = ["likelysubtags"] }
|
||||
unic-langid-ffi = { path = "../../../../intl/locale/rust/unic-langid-ffi" }
|
||||
fluent-langneg = { version = "0.13", features = ["cldr"] }
|
||||
fluent-langneg-ffi = { path = "../../../../intl/locale/rust/fluent-langneg-ffi" }
|
||||
oxilangtag = "0.1.3"
|
||||
rure = "0.2.2"
|
||||
rust_minidump_writer_linux = { path = "../../../crashreporter/rust_minidump_writer_linux", optional = true }
|
||||
mozannotation_client = { path = "../../../crashreporter/mozannotation_client", optional = true }
|
||||
|
Loading…
x
Reference in New Issue
Block a user