Bug 1857742 - patch 1 - Vendor the oxilangtag crate into third_party/rust. r=supply-chain-reviewers,dholbert

Differential Revision: https://phabricator.services.mozilla.com/D193891
2025-02-25 20:01:50 +00:00 · 2023-11-18 10:36:00 +00:00 · 2023-11-18 10:36:00 +00:00 · 94a3c09204
commit 94a3c09204
parent 819c082c31
12 changed files with 1871 additions and 0 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -2207,6 +2207,7 @@ dependencies = [
 "nsstring",
 "oblivious_http",
 "origin-trials-ffi",
+ "oxilangtag",
 "prefs_parser",
 "processtools",
 "profiler_helper",
@ -4132,6 +4133,12 @@ dependencies = [
 "stable_deref_trait",
 ]

+[[package]]
+name = "oxilangtag"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8d91edf4fbb970279443471345a4e8c491bf05bb283b3e6c88e4e606fd8c181b"
+
 [[package]]
 name = "packed_simd"
 version = "0.3.9"
--- a/supply-chain/audits.toml
+++ b/supply-chain/audits.toml
@ -2529,6 +2529,15 @@ who = "Mike Hommey <mh+mozilla@glandium.org>"
 criteria = "safe-to-deploy"
 delta = "6.3.0 -> 6.4.1"

+[[audits.oxilangtag]]
+who = "Jonathan Kew <jkew@mozilla.com>"
+criteria = "safe-to-deploy"
+version = "0.1.3"
+notes = """
+I have reviewed all the code in this (small) crate.
+There is no unsafe code present.
+"""
+
 [[audits.packed_simd]]
 who = "Henri Sivonen <hsivonen@hsivonen.fi>"
 criteria = "safe-to-deploy"
--- a/third_party/rust/oxilangtag/.cargo-checksum.json
+++ b/third_party/rust/oxilangtag/.cargo-checksum.json
@ -0,0 +1 @@
+{"files":{"CHANGELOG.md":"3d0f3240ed450d19b894dd8715e20bbec50a14eb0d357df8c09a4af1f19fc831","Cargo.toml":"b8414a40b2cdeb5b34dc4b7e79a5e192b56b953d9db1a762dbf3e8728074dd6a","LICENSE":"3fe41c99abc306c2cd34a9365b1810035ae93335ebf4736c0240b469b3f410eb","README.md":"fc98b140225bc0521a136c2c1ed8146f7398349a36d52481f97d8ec2b7679619","benches/lib.rs":"61c94b95e005c0df25ff740ddc7801d65f68bd6e00c0b8aca7eeb66b103f9eea","deny.toml":"fce6beebdde75e3950abfd230b5110d485f2daf5a333cc77b447669593fa7c62","src/lib.rs":"92c85f535a42b8dde8c2f3078c61e4e1580d326ac621eba2f410bdee521be41d","tests/lib.rs":"9927c137f39094cfd8fbcf56069a047818112374148e8950fd73708e9ae0382a"},"package":"8d91edf4fbb970279443471345a4e8c491bf05bb283b3e6c88e4e606fd8c181b"}
--- a/third_party/rust/oxilangtag/CHANGELOG.md
+++ b/third_party/rust/oxilangtag/CHANGELOG.md
@ -0,0 +1,16 @@
+# Changelog
+
+## [0.1.3] - 2022-03-26
+
+### Added
+- `LanguageTag` now implements Serde `Serialize` and `Deserialize` trait if the `serde` crate is present. 
+ The serialization is a plain string.
+
+
+## [0.1.2] - 2021-04-16
+
+### Added
+- `LanguageTag` struct with a parser, case normalization and components accessors.
+
+### Changed
+- Proper attribution from [`language-tags`](https://github.com/pyfisch/rust-language-tags/).
--- a/third_party/rust/oxilangtag/Cargo.toml
+++ b/third_party/rust/oxilangtag/Cargo.toml
@ -0,0 +1,39 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies.
+#
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
+
+[package]
+edition = "2018"
+name = "oxilangtag"
+version = "0.1.3"
+authors = ["Tpt <thomas@pellissier-tanon.fr>"]
+description = "Simple and fast implementation of language tag normalization and validation\n"
+readme = "README.md"
+keywords = ["language-tag", "BCP47"]
+license = "MIT"
+repository = "https://github.com/oxigraph/oxilangtag"
+[package.metadata.docs.rs]
+all-features = true
+
+[[bench]]
+name = "lib"
+harness = false
+[dependencies.serde]
+version = "1"
+optional = true
+[dev-dependencies.criterion]
+version = "0.3"
+
+[dev-dependencies.serde_test]
+version = "1"
+
+[features]
+default = []
+serialize = ["serde"]
--- a/third_party/rust/oxilangtag/LICENSE
+++ b/third_party/rust/oxilangtag/LICENSE
@ -0,0 +1,19 @@
+Copyright (c) 2015-2021 Pyfisch Tpt
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
--- a/third_party/rust/oxilangtag/README.md
+++ b/third_party/rust/oxilangtag/README.md
@ -0,0 +1,46 @@
+oxilangtag
+==========
+
+[![actions status](https://github.com/oxigraph/oxilangtag/workflows/build/badge.svg)](https://github.com/oxigraph/oxilangtag/actions)
+[![Latest Version](https://img.shields.io/crates/v/oxilangtag.svg)](https://crates.io/crates/oxilangtag)
+[![Released API docs](https://docs.rs/oxilangtag/badge.svg)](https://docs.rs/oxilangtag)
+
+OxiLangTag is a Rust library allowing to validate and normalize language tags following [RFC 5646](https://tools.ietf.org/html/rfc5646)
+([BCP 47](https://tools.ietf.org/html/bcp47)).
+
+It is a fork of the [`language-tags`](https://github.com/pyfisch/rust-language-tags/) focusing on [RDF use cases](https://www.w3.org/TR/rdf11-primer/).
+You might find the [`language-tags`](https://github.com/pyfisch/rust-language-tags/) crate more convenient.
+
+It allows zero stack allocation language tag validation.
+Getters are also provided to easily retrieve the various language tag components.
+
+If [`serde`](https://serde.rs/) is available, `LanguageTag` implements the `Serialize` and `Deserialize` traits and encodes the language tag as a string.
+
+Example:
+```rust
+use oxilangtag::LanguageTag;
+
+// Parsing and validation
+let language_tag = LanguageTag::parse("zh-cmn-Hans-CN-x-test").unwrap();
+assert_eq!(language_tag.as_str(), "zh-cmn-Hans-CN-x-test");
+
+// Language tag components
+assert_eq!(language_tag.primary_language(), "zh");
+assert_eq!(language_tag.extended_language(), Some("cmn"));
+assert_eq!(language_tag.full_language(), "zh-cmn");
+assert_eq!(language_tag.script(), Some("Hans"));
+assert_eq!(language_tag.region(), Some("CN"));
+assert_eq!(language_tag.extension(), None);
+assert_eq!(language_tag.private_use_subtags().collect::<Vec<_>>(), vec!["test"]);
+```
+
+## License
+
+This project is licensed under the MIT license ([LICENSE-MIT](LICENSE-MIT) or `<http://opensource.org/licenses/MIT>`).
+
+It is based on the [`language-tags`](https://github.com/pyfisch/rust-language-tags/) crate by [pyfisch](https://github.com/pyfisch) under MIT license.
+
+
+### Contribution
+
+Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxilangtag by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.
--- a/third_party/rust/oxilangtag/benches/lib.rs
+++ b/third_party/rust/oxilangtag/benches/lib.rs
@ -0,0 +1,77 @@
+use criterion::{criterion_group, criterion_main, Criterion};
+use oxilangtag::LanguageTag;
+
+fn bench_language_tag_parse(c: &mut Criterion) {
+    let examples = [
+        "fr",
+        "fr-Latn",
+        "fr-fra",
+        "fr-Latn-FR",
+        "fr-Latn-419",
+        "fr-FR",
+        "ax-TZ",
+        "fr-shadok",
+        "fr-y-myext-myext2",
+        "fra-Latn",
+        "fra",
+        "fra-FX",
+        "i-klingon",
+        "I-kLINgon",
+        "no-bok",
+        "fr-Lat",
+        "mn-Cyrl-MN",
+        "mN-cYrL-Mn",
+        "fr-Latn-CA",
+        "en-US",
+        "fr-Latn-CA",
+        "i-enochian",
+        "x-fr-CH",
+        "sr-Latn-CS",
+        "es-419",
+        "sl-nedis",
+        "de-CH-1996",
+        "de-Latg-1996",
+        "sl-IT-nedis",
+        "en-a-bbb-x-a-ccc",
+        "de-a-value",
+        "en-Latn-GB-boont-r-extended-sequence-x-private",
+        "en-x-US",
+        "az-Arab-x-AZE-derbend",
+        "es-Latn-CO-x-private",
+        "en-US-boont",
+        "ab-x-abc-x-abc",
+        "ab-x-abc-a-a",
+        "i-default",
+        "i-klingon",
+        "abcd-Latn",
+        "AaBbCcDd-x-y-any-x",
+        "en",
+        "de-AT",
+        "es-419",
+        "de-CH-1901",
+        "sr-Cyrl",
+        "sr-Cyrl-CS",
+        "sl-Latn-IT-rozaj",
+        "en-US-x-twain",
+        "zh-cmn",
+        "zh-cmn-Hant",
+        "zh-cmn-Hant-HK",
+        "zh-gan",
+        "zh-yue-Hant-HK",
+        "xr-lxs-qut",
+        "xr-lqt-qu",
+        "xr-p-lze",
+    ];
+
+    c.bench_function("language tag parse tests", |b| {
+        b.iter(|| {
+            for tag in examples.iter() {
+                LanguageTag::parse(*tag).unwrap();
+            }
+        })
+    });
+}
+
+criterion_group!(language_tag, bench_language_tag_parse);
+
+criterion_main!(language_tag);
--- a/third_party/rust/oxilangtag/deny.toml
+++ b/third_party/rust/oxilangtag/deny.toml
@ -0,0 +1,11 @@
+[licenses]
+unlicensed = "deny"
+allow = [
+    "MIT",
+    "Apache-2.0"
+]
+default = "deny"
+
+[bans]
+multiple-versions = "warn"
+wildcards = "deny"
--- a/third_party/rust/oxilangtag/src/lib.rs
+++ b/third_party/rust/oxilangtag/src/lib.rs
@ -0,0 +1,923 @@
+#![doc = include_str!("../README.md")]
+#![deny(unsafe_code)]
+
+#[cfg(feature = "serde")]
+use serde::{Deserialize, Deserializer, Serialize, Serializer};
+use std::borrow::{Borrow, Cow};
+use std::cmp::Ordering;
+use std::error::Error;
+use std::fmt;
+use std::hash::{Hash, Hasher};
+use std::iter::once;
+use std::ops::Deref;
+use std::str::{FromStr, Split};
+
+/// A [RFC 5646](https://tools.ietf.org/html/rfc5646) language tag.
+///
+/// ```
+/// use oxilangtag::LanguageTag;
+///
+/// let language_tag = LanguageTag::parse("en-us").unwrap();
+/// assert_eq!(language_tag.into_inner(), "en-us")
+/// ```
+#[derive(Copy, Clone)]
+pub struct LanguageTag<T> {
+    tag: T,
+    positions: TagElementsPositions,
+}
+
+impl<T: Deref<Target = str>> LanguageTag<T> {
+    /// Parses a language tag acccording to [RFC 5646](https://tools.ietf.org/html/rfc5646).
+    /// and checks if the tag is ["well-formed"](https://tools.ietf.org/html/rfc5646#section-2.2.9).
+    ///
+    /// This operation keeps internally the `tag` parameter and does not allocate on the heap.
+    ///
+    /// ```
+    /// use oxilangtag::LanguageTag;
+    ///
+    /// let language_tag = LanguageTag::parse("en-us").unwrap();
+    /// assert_eq!(language_tag.into_inner(), "en-us")
+    /// ```
+    pub fn parse(tag: T) -> Result<Self, LanguageTagParseError> {
+        let positions = parse_language_tag(&tag, &mut VoidOutputBuffer::default())?;
+        Ok(Self { tag, positions })
+    }
+
+    /// Returns the underlying language tag representation.
+    #[inline]
+    pub fn as_str(&self) -> &str {
+        &self.tag
+    }
+
+    /// Returns the underlying language tag representation.
+    #[inline]
+    pub fn into_inner(self) -> T {
+        self.tag
+    }
+
+    /// Returns the [primary language subtag](https://tools.ietf.org/html/rfc5646#section-2.2.1).
+    ///
+    /// ```
+    /// use oxilangtag::LanguageTag;
+    ///
+    /// let language_tag = LanguageTag::parse("zh-cmn-Hans-CN").unwrap();
+    /// assert_eq!(language_tag.primary_language(), "zh");
+    /// ```
+    #[inline]
+    pub fn primary_language(&self) -> &str {
+        &self.tag[..self.positions.language_end]
+    }
+
+    /// Returns the [extended language subtags](https://tools.ietf.org/html/rfc5646#section-2.2.2).
+    ///
+    /// Valid language tags have at most one extended language.
+    ///
+    /// ```
+    /// use oxilangtag::LanguageTag;
+    ///
+    /// let language_tag = LanguageTag::parse("zh-cmn-Hans-CN").unwrap();
+    /// assert_eq!(language_tag.extended_language(), Some("cmn"));
+    /// ```
+    #[inline]
+    pub fn extended_language(&self) -> Option<&str> {
+        if self.positions.language_end == self.positions.extlang_end {
+            None
+        } else {
+            Some(&self.tag[self.positions.language_end + 1..self.positions.extlang_end])
+        }
+    }
+
+    /// Iterates on the [extended language subtags](https://tools.ietf.org/html/rfc5646#section-2.2.2).
+    ///
+    /// Valid language tags have at most one extended language.
+    ///
+    /// ```
+    /// use oxilangtag::LanguageTag;
+    ///
+    /// let language_tag = LanguageTag::parse("zh-cmn-Hans-CN").unwrap();
+    /// assert_eq!(language_tag.extended_language_subtags().collect::<Vec<_>>(), vec!["cmn"]);
+    /// ```
+    #[inline]
+    pub fn extended_language_subtags(&self) -> impl Iterator<Item = &str> {
+        self.extended_language().unwrap_or("").split_terminator('-')
+    }
+
+    /// Returns the [primary language subtag](https://tools.ietf.org/html/rfc5646#section-2.2.1)
+    /// and its [extended language subtags](https://tools.ietf.org/html/rfc5646#section-2.2.2).
+    ///
+    /// ```
+    /// use oxilangtag::LanguageTag;
+    ///
+    /// let language_tag = LanguageTag::parse("zh-cmn-Hans-CN").unwrap();
+    /// assert_eq!(language_tag.full_language(), "zh-cmn");
+    /// ```
+    #[inline]
+    pub fn full_language(&self) -> &str {
+        &self.tag[..self.positions.extlang_end]
+    }
+
+    /// Returns the [script subtag](https://tools.ietf.org/html/rfc5646#section-2.2.3).
+    ///
+    /// ```
+    /// use oxilangtag::LanguageTag;
+    ///
+    /// let language_tag = LanguageTag::parse("zh-cmn-Hans-CN").unwrap();
+    /// assert_eq!(language_tag.script(), Some("Hans"));
+    /// ```
+    #[inline]
+    pub fn script(&self) -> Option<&str> {
+        if self.positions.extlang_end == self.positions.script_end {
+            None
+        } else {
+            Some(&self.tag[self.positions.extlang_end + 1..self.positions.script_end])
+        }
+    }
+
+    /// Returns the [region subtag](https://tools.ietf.org/html/rfc5646#section-2.2.4).
+    ///
+    /// ```
+    /// use oxilangtag::LanguageTag;
+    ///
+    /// let language_tag = LanguageTag::parse("zh-cmn-Hans-CN").unwrap();
+    /// assert_eq!(language_tag.region(), Some("CN"));
+    /// ```
+    #[inline]
+    pub fn region(&self) -> Option<&str> {
+        if self.positions.script_end == self.positions.region_end {
+            None
+        } else {
+            Some(&self.tag[self.positions.script_end + 1..self.positions.region_end])
+        }
+    }
+
+    /// Returns the [variant subtags](https://tools.ietf.org/html/rfc5646#section-2.2.5).
+    ///
+    /// ```
+    /// use oxilangtag::LanguageTag;
+    ///
+    /// let language_tag = LanguageTag::parse("zh-Latn-TW-pinyin").unwrap();
+    /// assert_eq!(language_tag.variant(), Some("pinyin"));
+    /// ```
+    #[inline]
+    pub fn variant(&self) -> Option<&str> {
+        if self.positions.region_end == self.positions.variant_end {
+            None
+        } else {
+            Some(&self.tag[self.positions.region_end + 1..self.positions.variant_end])
+        }
+    }
+
+    /// Iterates on the [variant subtags](https://tools.ietf.org/html/rfc5646#section-2.2.5).
+    ///
+    /// ```
+    /// use oxilangtag::LanguageTag;
+    ///
+    /// let language_tag = LanguageTag::parse("zh-Latn-TW-pinyin").unwrap();
+    /// assert_eq!(language_tag.variant_subtags().collect::<Vec<_>>(), vec!["pinyin"]);
+    /// ```
+    #[inline]
+    pub fn variant_subtags(&self) -> impl Iterator<Item = &str> {
+        self.variant().unwrap_or("").split_terminator('-')
+    }
+
+    /// Returns the [extension subtags](https://tools.ietf.org/html/rfc5646#section-2.2.6).
+    ///
+    /// ```
+    /// use oxilangtag::LanguageTag;
+    ///
+    /// let language_tag = LanguageTag::parse("de-DE-u-co-phonebk").unwrap();
+    /// assert_eq!(language_tag.extension(), Some("u-co-phonebk"));
+    /// ```
+    #[inline]
+    pub fn extension(&self) -> Option<&str> {
+        if self.positions.variant_end == self.positions.extension_end {
+            None
+        } else {
+            Some(&self.tag[self.positions.variant_end + 1..self.positions.extension_end])
+        }
+    }
+
+    /// Iterates on the [extension subtags](https://tools.ietf.org/html/rfc5646#section-2.2.6).
+    ///
+    /// ```
+    /// use oxilangtag::LanguageTag;
+    ///
+    /// let language_tag = LanguageTag::parse("de-DE-u-co-phonebk").unwrap();
+    /// assert_eq!(language_tag.extension_subtags().collect::<Vec<_>>(), vec![('u', "co-phonebk")]);
+    /// ```
+    #[inline]
+    pub fn extension_subtags(&self) -> impl Iterator<Item = (char, &str)> {
+        match self.extension() {
+            Some(parts) => ExtensionsIterator::new(parts),
+            None => ExtensionsIterator::new(""),
+        }
+    }
+
+    /// Returns the [private use subtags](https://tools.ietf.org/html/rfc5646#section-2.2.7).
+    ///
+    /// ```
+    /// use oxilangtag::LanguageTag;
+    ///
+    /// let language_tag = LanguageTag::parse("de-x-foo-bar").unwrap();
+    /// assert_eq!(language_tag.private_use(), Some("x-foo-bar"));
+    /// ```
+    #[inline]
+    pub fn private_use(&self) -> Option<&str> {
+        if self.tag.starts_with("x-") {
+            Some(&self.tag)
+        } else if self.positions.extension_end == self.tag.len() {
+            None
+        } else {
+            Some(&self.tag[self.positions.extension_end + 1..])
+        }
+    }
+
+    /// Iterates on the [private use subtags](https://tools.ietf.org/html/rfc5646#section-2.2.7).
+    ///
+    /// ```
+    /// use oxilangtag::LanguageTag;
+    ///
+    /// let language_tag = LanguageTag::parse("de-x-foo-bar").unwrap();
+    /// assert_eq!(language_tag.private_use_subtags().collect::<Vec<_>>(), vec!["foo", "bar"]);
+    /// ```
+    #[inline]
+    pub fn private_use_subtags(&self) -> impl Iterator<Item = &str> {
+        self.private_use()
+            .map(|part| &part[2..])
+            .unwrap_or("")
+            .split_terminator('-')
+    }
+}
+
+impl LanguageTag<String> {
+    /// Parses a language tag acccording to [RFC 5646](https://tools.ietf.org/html/rfc5646)
+    /// and normalizes its case.
+    ///
+    /// This parser accepts the language tags that are "well-formed" according to
+    /// [RFC 5646](https://tools.ietf.org/html/rfc5646#section-2.2.9).
+    ///
+    /// This operation does heap allocation.
+    ///
+    /// ```
+    /// use oxilangtag::LanguageTag;
+    ///
+    /// let language_tag = LanguageTag::parse_and_normalize("en-us").unwrap();
+    /// assert_eq!(language_tag.into_inner(), "en-US")
+    /// ```
+    pub fn parse_and_normalize(tag: &str) -> Result<Self, LanguageTagParseError> {
+        let mut output_buffer = String::with_capacity(tag.len());
+        let positions = parse_language_tag(tag, &mut output_buffer)?;
+        Ok(Self {
+            tag: output_buffer,
+            positions,
+        })
+    }
+}
+
+impl<Lft: PartialEq<Rhs>, Rhs> PartialEq<LanguageTag<Rhs>> for LanguageTag<Lft> {
+    #[inline]
+    fn eq(&self, other: &LanguageTag<Rhs>) -> bool {
+        self.tag.eq(&other.tag)
+    }
+}
+
+impl<T: PartialEq<str>> PartialEq<str> for LanguageTag<T> {
+    #[inline]
+    fn eq(&self, other: &str) -> bool {
+        self.tag.eq(other)
+    }
+}
+
+impl<'a, T: PartialEq<&'a str>> PartialEq<&'a str> for LanguageTag<T> {
+    #[inline]
+    fn eq(&self, other: &&'a str) -> bool {
+        self.tag.eq(other)
+    }
+}
+
+impl<T: PartialEq<String>> PartialEq<String> for LanguageTag<T> {
+    #[inline]
+    fn eq(&self, other: &String) -> bool {
+        self.tag.eq(other)
+    }
+}
+
+impl<'a, T: PartialEq<Cow<'a, str>>> PartialEq<Cow<'a, str>> for LanguageTag<T> {
+    #[inline]
+    fn eq(&self, other: &Cow<'a, str>) -> bool {
+        self.tag.eq(other)
+    }
+}
+
+impl<T: PartialEq<str>> PartialEq<LanguageTag<T>> for str {
+    #[inline]
+    fn eq(&self, other: &LanguageTag<T>) -> bool {
+        other.tag.eq(self)
+    }
+}
+
+impl<'a, T: PartialEq<&'a str>> PartialEq<LanguageTag<T>> for &'a str {
+    #[inline]
+    fn eq(&self, other: &LanguageTag<T>) -> bool {
+        other.tag.eq(self)
+    }
+}
+
+impl<T: PartialEq<String>> PartialEq<LanguageTag<T>> for String {
+    #[inline]
+    fn eq(&self, other: &LanguageTag<T>) -> bool {
+        other.tag.eq(self)
+    }
+}
+
+impl<'a, T: PartialEq<Cow<'a, str>>> PartialEq<LanguageTag<T>> for Cow<'a, str> {
+    #[inline]
+    fn eq(&self, other: &LanguageTag<T>) -> bool {
+        other.tag.eq(self)
+    }
+}
+
+impl<T: Eq> Eq for LanguageTag<T> {}
+
+impl<T: Hash> Hash for LanguageTag<T> {
+    #[inline]
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        self.tag.hash(state)
+    }
+}
+
+impl<T: PartialOrd> PartialOrd for LanguageTag<T> {
+    #[inline]
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        self.tag.partial_cmp(&other.tag)
+    }
+}
+
+impl<T: Ord> Ord for LanguageTag<T> {
+    #[inline]
+    fn cmp(&self, other: &Self) -> Ordering {
+        self.tag.cmp(&other.tag)
+    }
+}
+
+impl<T: Deref<Target = str>> Deref for LanguageTag<T> {
+    type Target = str;
+
+    #[inline]
+    fn deref(&self) -> &str {
+        self.tag.deref()
+    }
+}
+
+impl<T: AsRef<str>> AsRef<str> for LanguageTag<T> {
+    #[inline]
+    fn as_ref(&self) -> &str {
+        self.tag.as_ref()
+    }
+}
+
+impl<T: Borrow<str>> Borrow<str> for LanguageTag<T> {
+    #[inline]
+    fn borrow(&self) -> &str {
+        self.tag.borrow()
+    }
+}
+
+impl<T: fmt::Debug> fmt::Debug for LanguageTag<T> {
+    #[inline]
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        self.tag.fmt(f)
+    }
+}
+
+impl<T: fmt::Display> fmt::Display for LanguageTag<T> {
+    #[inline]
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        self.tag.fmt(f)
+    }
+}
+
+impl FromStr for LanguageTag<String> {
+    type Err = LanguageTagParseError;
+
+    #[inline]
+    fn from_str(tag: &str) -> Result<Self, LanguageTagParseError> {
+        Self::parse_and_normalize(tag)
+    }
+}
+
+impl<'a> From<LanguageTag<&'a str>> for LanguageTag<String> {
+    #[inline]
+    fn from(tag: LanguageTag<&'a str>) -> Self {
+        Self {
+            tag: tag.tag.into(),
+            positions: tag.positions,
+        }
+    }
+}
+
+impl<'a> From<LanguageTag<Cow<'a, str>>> for LanguageTag<String> {
+    #[inline]
+    fn from(tag: LanguageTag<Cow<'a, str>>) -> Self {
+        Self {
+            tag: tag.tag.into(),
+            positions: tag.positions,
+        }
+    }
+}
+
+impl From<LanguageTag<Box<str>>> for LanguageTag<String> {
+    #[inline]
+    fn from(tag: LanguageTag<Box<str>>) -> Self {
+        Self {
+            tag: tag.tag.into(),
+            positions: tag.positions,
+        }
+    }
+}
+
+impl<'a> From<LanguageTag<&'a str>> for LanguageTag<Cow<'a, str>> {
+    #[inline]
+    fn from(tag: LanguageTag<&'a str>) -> Self {
+        Self {
+            tag: tag.tag.into(),
+            positions: tag.positions,
+        }
+    }
+}
+
+impl<'a> From<LanguageTag<String>> for LanguageTag<Cow<'a, str>> {
+    #[inline]
+    fn from(tag: LanguageTag<String>) -> Self {
+        Self {
+            tag: tag.tag.into(),
+            positions: tag.positions,
+        }
+    }
+}
+
+#[cfg(feature = "serde")]
+impl<T: Serialize> Serialize for LanguageTag<T> {
+    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
+        self.tag.serialize(serializer)
+    }
+}
+
+#[cfg(feature = "serde")]
+impl<'de, T: Deref<Target = str> + Deserialize<'de>> Deserialize<'de> for LanguageTag<T> {
+    fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<LanguageTag<T>, D::Error> {
+        use serde::de::Error;
+
+        Self::parse(T::deserialize(deserializer)?).map_err(D::Error::custom)
+    }
+}
+
+/// An error raised during [`LanguageTag`](struct.LanguageTag.html) validation.
+#[derive(Debug)]
+pub struct LanguageTagParseError {
+    kind: TagParseErrorKind,
+}
+
+impl fmt::Display for LanguageTagParseError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self.kind {
+            TagParseErrorKind::EmptyExtension => {
+                write!(f, "If an extension subtag is present, it must not be empty")
+            }
+            TagParseErrorKind::EmptyPrivateUse => {
+                write!(f, "If the `x` subtag is present, it must not be empty")
+            }
+            TagParseErrorKind::ForbiddenChar => {
+                write!(f, "The langtag contains a char not allowed")
+            }
+            TagParseErrorKind::InvalidSubtag => write!(
+                f,
+                "A subtag fails to parse, it does not match any other subtags"
+            ),
+            TagParseErrorKind::InvalidLanguage => write!(f, "The given language subtag is invalid"),
+            TagParseErrorKind::SubtagTooLong => {
+                write!(f, "A subtag may be eight characters in length at maximum")
+            }
+            TagParseErrorKind::EmptySubtag => write!(f, "A subtag should not be empty"),
+            TagParseErrorKind::TooManyExtlangs => {
+                write!(f, "At maximum three extlangs are allowed")
+            }
+        }
+    }
+}
+
+impl Error for LanguageTagParseError {}
+
+#[derive(Debug)]
+enum TagParseErrorKind {
+    /// If an extension subtag is present, it must not be empty.
+    EmptyExtension,
+    /// If the `x` subtag is present, it must not be empty.
+    EmptyPrivateUse,
+    /// The langtag contains a char that is not A-Z, a-z, 0-9 or the dash.
+    ForbiddenChar,
+    /// A subtag fails to parse, it does not match any other subtags.
+    InvalidSubtag,
+    /// The given language subtag is invalid.
+    InvalidLanguage,
+    /// A subtag may be eight characters in length at maximum.
+    SubtagTooLong,
+    /// A subtag should not be empty.
+    EmptySubtag,
+    /// At maximum three extlangs are allowed, but zero to one extlangs are preferred.
+    TooManyExtlangs,
+}
+
+#[derive(Copy, Clone, Debug)]
+struct TagElementsPositions {
+    language_end: usize,
+    extlang_end: usize,
+    script_end: usize,
+    region_end: usize,
+    variant_end: usize,
+    extension_end: usize,
+}
+
+trait OutputBuffer: Extend<char> {
+    fn push(&mut self, c: char);
+
+    fn push_str(&mut self, s: &str);
+}
+
+#[derive(Default)]
+struct VoidOutputBuffer {}
+
+impl OutputBuffer for VoidOutputBuffer {
+    #[inline]
+    fn push(&mut self, _: char) {}
+
+    #[inline]
+    fn push_str(&mut self, _: &str) {}
+}
+
+impl Extend<char> for VoidOutputBuffer {
+    #[inline]
+    fn extend<T: IntoIterator<Item = char>>(&mut self, _: T) {}
+}
+
+impl OutputBuffer for String {
+    #[inline]
+    fn push(&mut self, c: char) {
+        self.push(c);
+    }
+
+    #[inline]
+    fn push_str(&mut self, s: &str) {
+        self.push_str(s);
+    }
+}
+
+/// Parses language tag following [the RFC5646 grammar](https://tools.ietf.org/html/rfc5646#section-2.1)
+fn parse_language_tag(
+    input: &str,
+    output: &mut impl OutputBuffer,
+) -> Result<TagElementsPositions, LanguageTagParseError> {
+    //grandfathered tags
+    if let Some(tag) = GRANDFATHEREDS
+        .iter()
+        .find(|record| record.eq_ignore_ascii_case(input))
+    {
+        output.push_str(tag);
+        Ok(TagElementsPositions {
+            language_end: tag.len(),
+            extlang_end: tag.len(),
+            script_end: tag.len(),
+            region_end: tag.len(),
+            variant_end: tag.len(),
+            extension_end: tag.len(),
+        })
+    } else if input.starts_with("x-") || input.starts_with("X-") {
+        // private use
+        if !is_alphanumeric_or_dash(input) {
+            Err(LanguageTagParseError {
+                kind: TagParseErrorKind::ForbiddenChar,
+            })
+        } else if input.len() == 2 {
+            Err(LanguageTagParseError {
+                kind: TagParseErrorKind::EmptyPrivateUse,
+            })
+        } else {
+            output.extend(input.chars().map(|c| c.to_ascii_lowercase()));
+            Ok(TagElementsPositions {
+                language_end: input.len(),
+                extlang_end: input.len(),
+                script_end: input.len(),
+                region_end: input.len(),
+                variant_end: input.len(),
+                extension_end: input.len(),
+            })
+        }
+    } else {
+        parse_langtag(input, output)
+    }
+}
+
+/// Handles normal tags.
+fn parse_langtag(
+    input: &str,
+    output: &mut impl OutputBuffer,
+) -> Result<TagElementsPositions, LanguageTagParseError> {
+    #[derive(PartialEq, Eq)]
+    enum State {
+        Start,
+        AfterLanguage,
+        AfterExtLang,
+        AfterScript,
+        AfterRegion,
+        InExtension { expected: bool },
+        InPrivateUse { expected: bool },
+    }
+
+    let mut state = State::Start;
+    let mut language_end = 0;
+    let mut extlang_end = 0;
+    let mut script_end = 0;
+    let mut region_end = 0;
+    let mut variant_end = 0;
+    let mut extension_end = 0;
+    let mut extlangs_count = 0;
+    for (subtag, end) in SubTagIterator::new(input) {
+        if subtag.is_empty() {
+            return Err(LanguageTagParseError {
+                kind: TagParseErrorKind::EmptySubtag,
+            });
+        }
+        if subtag.len() > 8 {
+            return Err(LanguageTagParseError {
+                kind: TagParseErrorKind::SubtagTooLong,
+            });
+        }
+        if state == State::Start {
+            // Primary language
+            if subtag.len() < 2 || !is_alphabetic(subtag) {
+                return Err(LanguageTagParseError {
+                    kind: TagParseErrorKind::InvalidLanguage,
+                });
+            }
+            language_end = end;
+            output.extend(to_lowercase(subtag));
+            if subtag.len() < 4 {
+                // extlangs are only allowed for short language tags
+                state = State::AfterLanguage;
+            } else {
+                state = State::AfterExtLang;
+            }
+        } else if let State::InPrivateUse { .. } = state {
+            if !is_alphanumeric(subtag) {
+                return Err(LanguageTagParseError {
+                    kind: TagParseErrorKind::InvalidSubtag,
+                });
+            }
+            output.push('-');
+            output.extend(to_lowercase(subtag));
+            state = State::InPrivateUse { expected: false };
+        } else if subtag == "x" || subtag == "X" {
+            // We make sure extension is found
+            if let State::InExtension { expected: true } = state {
+                return Err(LanguageTagParseError {
+                    kind: TagParseErrorKind::EmptyExtension,
+                });
+            }
+            output.push('-');
+            output.push('x');
+            state = State::InPrivateUse { expected: true };
+        } else if subtag.len() == 1 && is_alphanumeric(subtag) {
+            // We make sure extension is found
+            if let State::InExtension { expected: true } = state {
+                return Err(LanguageTagParseError {
+                    kind: TagParseErrorKind::EmptyExtension,
+                });
+            }
+            let extension_tag = subtag.chars().next().unwrap().to_ascii_lowercase();
+            output.push('-');
+            output.push(extension_tag);
+            state = State::InExtension { expected: true };
+        } else if let State::InExtension { .. } = state {
+            if !is_alphanumeric(subtag) {
+                return Err(LanguageTagParseError {
+                    kind: TagParseErrorKind::InvalidSubtag,
+                });
+            }
+            extension_end = end;
+            output.push('-');
+            output.extend(to_lowercase(subtag));
+            state = State::InExtension { expected: false };
+        } else if state == State::AfterLanguage && subtag.len() == 3 && is_alphabetic(subtag) {
+            extlangs_count += 1;
+            if extlangs_count > 3 {
+                return Err(LanguageTagParseError {
+                    kind: TagParseErrorKind::TooManyExtlangs,
+                });
+            }
+            // valid extlangs
+            extlang_end = end;
+            output.push('-');
+            output.extend(to_lowercase(subtag));
+        } else if (state == State::AfterLanguage || state == State::AfterExtLang)
+            && subtag.len() == 4
+            && is_alphabetic(subtag)
+        {
+            // Script
+            script_end = end;
+            output.push('-');
+            output.extend(to_uppercase_first(subtag));
+            state = State::AfterScript;
+        } else if (state == State::AfterLanguage
+            || state == State::AfterExtLang
+            || state == State::AfterScript)
+            && (subtag.len() == 2 && is_alphabetic(subtag)
+                || subtag.len() == 3 && is_numeric(subtag))
+        {
+            // Region
+            region_end = end;
+            output.push('-');
+            output.extend(to_uppercase(subtag));
+            state = State::AfterRegion;
+        } else if (state == State::AfterLanguage
+            || state == State::AfterExtLang
+            || state == State::AfterScript
+            || state == State::AfterRegion)
+            && is_alphanumeric(subtag)
+            && (subtag.len() >= 5 && is_alphabetic(&subtag[0..1])
+                || subtag.len() >= 4 && is_numeric(&subtag[0..1]))
+        {
+            // Variant
+            variant_end = end;
+            output.push('-');
+            output.extend(to_lowercase(subtag));
+            state = State::AfterRegion;
+        } else {
+            return Err(LanguageTagParseError {
+                kind: TagParseErrorKind::InvalidSubtag,
+            });
+        }
+    }
+
+    //We make sure we are in a correct final state
+    if let State::InExtension { expected: true } = state {
+        return Err(LanguageTagParseError {
+            kind: TagParseErrorKind::EmptyExtension,
+        });
+    }
+    if let State::InPrivateUse { expected: true } = state {
+        return Err(LanguageTagParseError {
+            kind: TagParseErrorKind::EmptyPrivateUse,
+        });
+    }
+
+    //We make sure we have not skipped anyone
+    if extlang_end < language_end {
+        extlang_end = language_end;
+    }
+    if script_end < extlang_end {
+        script_end = extlang_end;
+    }
+    if region_end < script_end {
+        region_end = script_end;
+    }
+    if variant_end < region_end {
+        variant_end = region_end;
+    }
+    if extension_end < variant_end {
+        extension_end = variant_end;
+    }
+
+    Ok(TagElementsPositions {
+        language_end,
+        extlang_end,
+        script_end,
+        region_end,
+        variant_end,
+        extension_end,
+    })
+}
+
+struct ExtensionsIterator<'a> {
+    input: &'a str,
+}
+
+impl<'a> ExtensionsIterator<'a> {
+    fn new(input: &'a str) -> Self {
+        Self { input }
+    }
+}
+
+impl<'a> Iterator for ExtensionsIterator<'a> {
+    type Item = (char, &'a str);
+
+    fn next(&mut self) -> Option<(char, &'a str)> {
+        let mut parts_iterator = self.input.split_terminator('-');
+        let singleton = parts_iterator.next()?.chars().next().unwrap();
+        let mut content_size: usize = 2;
+        for part in parts_iterator {
+            if part.len() == 1 {
+                let content = &self.input[2..content_size - 1];
+                self.input = &self.input[content_size..];
+                return Some((singleton, content));
+            } else {
+                content_size += part.len() + 1;
+            }
+        }
+        let result = self.input.get(2..).map(|content| (singleton, content));
+        self.input = "";
+        result
+    }
+}
+
+struct SubTagIterator<'a> {
+    split: Split<'a, char>,
+    position: usize,
+}
+
+impl<'a> SubTagIterator<'a> {
+    #[inline]
+    fn new(input: &'a str) -> Self {
+        Self {
+            split: input.split('-'),
+            position: 0,
+        }
+    }
+}
+
+impl<'a> Iterator for SubTagIterator<'a> {
+    type Item = (&'a str, usize);
+
+    #[inline]
+    fn next(&mut self) -> Option<(&'a str, usize)> {
+        let tag = self.split.next()?;
+        let tag_end = self.position + tag.len();
+        self.position = tag_end + 1;
+        Some((tag, tag_end))
+    }
+}
+
+#[inline]
+fn is_alphabetic(s: &str) -> bool {
+    s.chars().all(|x| x.is_ascii_alphabetic())
+}
+
+#[inline]
+fn is_numeric(s: &str) -> bool {
+    s.chars().all(|x| x.is_ascii_digit())
+}
+
+#[inline]
+fn is_alphanumeric(s: &str) -> bool {
+    s.chars().all(|x| x.is_ascii_alphanumeric())
+}
+
+#[inline]
+fn is_alphanumeric_or_dash(s: &str) -> bool {
+    s.chars().all(|x| x.is_ascii_alphanumeric() || x == '-')
+}
+
+#[inline]
+fn to_uppercase(s: &str) -> impl Iterator<Item = char> + '_ {
+    s.chars().map(|c| c.to_ascii_uppercase())
+}
+
+// Beware: panics if s.len() == 0 (should never happen in our code)
+#[inline]
+fn to_uppercase_first(s: &str) -> impl Iterator<Item = char> + '_ {
+    let mut chars = s.chars();
+    once(chars.next().unwrap().to_ascii_uppercase()).chain(chars.map(|c| c.to_ascii_lowercase()))
+}
+
+#[inline]
+fn to_lowercase(s: &str) -> impl Iterator<Item = char> + '_ {
+    s.chars().map(|c| c.to_ascii_lowercase())
+}
+
+const GRANDFATHEREDS: [&str; 26] = [
+    "art-lojban",
+    "cel-gaulish",
+    "en-GB-oed",
+    "i-ami",
+    "i-bnn",
+    "i-default",
+    "i-enochian",
+    "i-hak",
+    "i-klingon",
+    "i-lux",
+    "i-mingo",
+    "i-navajo",
+    "i-pwn",
+    "i-tao",
+    "i-tay",
+    "i-tsu",
+    "no-bok",
+    "no-nyn",
+    "sgn-BE-FR",
+    "sgn-BE-NL",
+    "sgn-CH-DE",
+    "zh-guoyu",
+    "zh-hakka",
+    "zh-min",
+    "zh-min-nan",
+    "zh-xiang",
+];
--- a/third_party/rust/oxilangtag/tests/lib.rs
+++ b/third_party/rust/oxilangtag/tests/lib.rs
@ -0,0 +1,722 @@
+use oxilangtag::LanguageTag;
+use serde_test::{assert_de_tokens, assert_de_tokens_error};
+#[cfg(feature = "serde")]
+use serde_test::{assert_tokens, Token};
+use std::collections::hash_map::DefaultHasher;
+use std::hash::{Hash, Hasher};
+
+// Tests from RFC 5646 2.1.1
+#[test]
+fn test_formatting() {
+    assert_eq!(
+        "mn-Cyrl-MN",
+        LanguageTag::parse_and_normalize("mn-Cyrl-MN")
+            .unwrap()
+            .as_str()
+    );
+    assert_eq!(
+        "mn-Cyrl-MN",
+        LanguageTag::parse_and_normalize("MN-cYRL-mn")
+            .unwrap()
+            .as_str()
+    );
+    assert_eq!(
+        "mn-Cyrl-MN",
+        LanguageTag::parse_and_normalize("mN-cYrL-Mn")
+            .unwrap()
+            .as_str()
+    );
+    assert_eq!(
+        "en-CA-x-ca",
+        LanguageTag::parse_and_normalize("en-CA-x-ca")
+            .unwrap()
+            .as_str()
+    );
+    assert_eq!(
+        "sgn-BE-FR",
+        LanguageTag::parse_and_normalize("sgn-BE-FR")
+            .unwrap()
+            .as_str()
+    );
+    assert_eq!(
+        "az-Latn-x-latn",
+        LanguageTag::parse_and_normalize("az-Latn-x-latn")
+            .unwrap()
+            .as_str()
+    );
+    assert_eq!(
+        "i-ami",
+        LanguageTag::parse_and_normalize("i-ami").unwrap().as_str()
+    );
+    assert_eq!(
+        "i-ami",
+        LanguageTag::parse_and_normalize("I-AMI").unwrap().as_str()
+    );
+    assert_eq!(
+        "sl-afb-Latn-005-nedis",
+        LanguageTag::parse_and_normalize("SL-AFB-lATN-005-nEdis")
+            .unwrap()
+            .as_str()
+    )
+}
+
+// Tests from RFC 5646 2.2.1
+#[test]
+fn test_primary_language() {
+    assert_eq!(
+        "fr",
+        LanguageTag::parse_and_normalize("fr")
+            .unwrap()
+            .primary_language()
+    );
+    assert_eq!(
+        "de",
+        LanguageTag::parse_and_normalize("de")
+            .unwrap()
+            .primary_language()
+    );
+    assert_eq!(
+        "x-fr-ch",
+        LanguageTag::parse_and_normalize("x-fr-CH")
+            .unwrap()
+            .primary_language()
+    );
+    assert_eq!(
+        "i-klingon",
+        LanguageTag::parse_and_normalize("i-klingon")
+            .unwrap()
+            .primary_language()
+    );
+    assert_eq!(
+        "i-bnn",
+        LanguageTag::parse_and_normalize("i-bnn")
+            .unwrap()
+            .primary_language()
+    );
+    assert_eq!(
+        "zh-hakka",
+        LanguageTag::parse_and_normalize("zh-hakka")
+            .unwrap()
+            .primary_language()
+    )
+}
+
+// Tests from RFC 5646 2.2.2
+#[test]
+fn test_extended_language() {
+    fn parts(tag: &LanguageTag<String>) -> (&str, &str, Option<&str>, Vec<&str>) {
+        (
+            tag.full_language(),
+            tag.primary_language(),
+            tag.extended_language(),
+            tag.extended_language_subtags().collect(),
+        )
+    }
+
+    assert_eq!(("zh", "zh", None, vec![]), parts(&"zh".parse().unwrap()));
+    assert_eq!(
+        ("zh-gan", "zh", Some("gan"), vec!["gan"]),
+        parts(&"zh-gan".parse().unwrap())
+    );
+    assert_eq!(
+        ("zh-gan-foo", "zh", Some("gan-foo"), vec!["gan", "foo"]),
+        parts(&"zh-gan-foo".parse().unwrap())
+    );
+    assert_eq!(
+        ("zh-min-nan", "zh-min-nan", None, vec![]),
+        parts(&"zh-min-nan".parse().unwrap())
+    );
+    assert_eq!(
+        ("i-tsu", "i-tsu", None, vec![]),
+        parts(&"i-tsu".parse().unwrap())
+    );
+    assert_eq!(("zh", "zh", None, vec![]), parts(&"zh-CN".parse().unwrap()));
+    assert_eq!(
+        ("zh-gan", "zh", Some("gan"), vec!["gan"]),
+        parts(&"zh-gan-CN".parse().unwrap())
+    );
+    assert_eq!(
+        ("ar-afb", "ar", Some("afb"), vec!["afb"]),
+        parts(&"ar-afb".parse().unwrap())
+    );
+}
+
+// Tests from RFC 5646 2.2.3
+#[test]
+fn test_script() {
+    fn parts(tag: &LanguageTag<String>) -> (&str, Option<&str>) {
+        (tag.primary_language(), tag.script())
+    }
+
+    assert_eq!(("sr", Some("Latn")), parts(&"sr-Latn".parse().unwrap()));
+    assert_eq!(("ar", Some("Latn")), parts(&"ar-afb-Latn".parse().unwrap()))
+}
+
+// Tests from RFC 5646 2.2.4
+#[test]
+fn test_region() {
+    fn parts(tag: &LanguageTag<String>) -> (&str, Option<&str>, Option<&str>) {
+        (tag.primary_language(), tag.script(), tag.region())
+    }
+
+    assert_eq!(("de", None, Some("AT")), parts(&"de-AT".parse().unwrap()));
+    assert_eq!(
+        ("sr", Some("Latn"), Some("RS")),
+        parts(&"sr-Latn-RS".parse().unwrap())
+    );
+    assert_eq!(("es", None, Some("419")), parts(&"es-419".parse().unwrap()));
+    assert_eq!(("ar", None, Some("DE")), parts(&"ar-DE".parse().unwrap()));
+    assert_eq!(("ar", None, Some("005")), parts(&"ar-005".parse().unwrap()));
+}
+
+// Tests from RFC 5646 2.2.5
+#[test]
+fn test_variant() {
+    fn parts(tag: &LanguageTag<String>) -> (&str, Option<&str>, Vec<&str>) {
+        (
+            tag.primary_language(),
+            tag.variant(),
+            tag.variant_subtags().collect(),
+        )
+    }
+
+    assert_eq!(("sl", None, vec![]), parts(&"sl".parse().unwrap()));
+    assert_eq!(
+        ("sl", Some("nedis"), vec!["nedis"]),
+        parts(&"sl-nedis".parse().unwrap())
+    );
+    assert_eq!(
+        ("de", Some("1996"), vec!["1996"]),
+        parts(&"de-CH-1996".parse().unwrap())
+    );
+    assert_eq!(
+        ("art-lojban", None, vec![]),
+        parts(&"art-lojban".parse().unwrap())
+    );
+}
+
+// Tests from RFC 5646 2.2.6
+#[test]
+fn test_extension() {
+    fn parts(tag: &LanguageTag<String>) -> (&str, Option<&str>, Vec<(char, &str)>) {
+        (
+            tag.primary_language(),
+            tag.extension(),
+            tag.extension_subtags().collect(),
+        )
+    }
+
+    assert_eq!(("en", None, vec![]), parts(&"en".parse().unwrap()));
+    assert_eq!(
+        ("en", Some("a-bbb"), vec![('a', "bbb")]),
+        parts(&"en-a-bbb-x-a-ccc".parse().unwrap())
+    );
+    assert_eq!(
+        (
+            "en",
+            Some("a-babble-b-warble"),
+            vec![('a', "babble"), ('b', "warble")]
+        ),
+        parts(&"en-a-babble-b-warble".parse().unwrap())
+    );
+    assert_eq!(
+        ("fr", Some("a-latn"), vec![('a', "latn")]),
+        parts(&"fr-a-Latn".parse().unwrap())
+    );
+    assert_eq!(
+        (
+            "en",
+            Some("r-extended-sequence"),
+            vec![('r', "extended-sequence")]
+        ),
+        parts(
+            &"en-Latn-GB-boont-r-extended-sequence-x-private"
+                .parse()
+                .unwrap()
+        )
+    );
+    assert_eq!(
+        ("en", Some("r-az-r-qt"), vec![('r', "az"), ('r', "qt")]),
+        parts(&"en-r-az-r-qt".parse().unwrap())
+    );
+    assert_eq!(("i-tsu", None, vec![]), parts(&"i-tsu".parse().unwrap()));
+}
+
+// Tests from RFC 5646 2.2.7
+#[test]
+fn test_privateuse() {
+    fn parts(tag: &LanguageTag<String>) -> (&str, Option<&str>, Vec<&str>) {
+        (
+            tag.primary_language(),
+            tag.private_use(),
+            tag.private_use_subtags().collect(),
+        )
+    }
+
+    assert_eq!(("en", None, vec![]), parts(&"en".parse().unwrap()));
+    assert_eq!(
+        ("en", Some("x-us"), vec!["us"]),
+        parts(&"en-x-US".parse().unwrap())
+    );
+    assert_eq!(
+        ("el", Some("x-koine"), vec!["koine"]),
+        parts(&"el-x-koine".parse().unwrap())
+    );
+    assert_eq!(
+        ("x-fr-ch", Some("x-fr-ch"), vec!["fr", "ch"]),
+        parts(&"x-fr-ch".parse().unwrap())
+    );
+    assert_eq!(
+        ("es", Some("x-foobar-at-007"), vec!["foobar", "at", "007"]),
+        parts(&"es-x-foobar-AT-007".parse().unwrap())
+    )
+}
+
+#[test]
+fn test_fmt() {
+    assert_eq!(
+        "ar-arb-Latn-DE-nedis-foobar",
+        LanguageTag::parse_and_normalize("ar-arb-Latn-DE-nedis-foobar")
+            .unwrap()
+            .as_str()
+    );
+    assert_eq!(
+        "ar-arb-Latn-DE-nedis-foobar",
+        LanguageTag::parse_and_normalize("ar-arb-latn-de-nedis-foobar")
+            .unwrap()
+            .as_str()
+    );
+    assert_eq!(
+        "ar-arb-Latn-DE-nedis-foobar",
+        LanguageTag::parse_and_normalize("AR-ARB-LATN-DE-NEDIS-FOOBAR")
+            .unwrap()
+            .as_str()
+    );
+    assert_eq!(
+        "xx-z-foo-a-bar-f-spam-b-eggs",
+        LanguageTag::parse_and_normalize("xx-z-foo-a-bar-F-spam-b-eggs")
+            .unwrap()
+            .as_str()
+    );
+    assert_eq!(
+        "hkgnmerm-x-e5-zf-vddjcpz-1v6",
+        LanguageTag::parse_and_normalize("HkgnmerM-x-e5-zf-VdDjcpz-1V6")
+            .unwrap()
+            .to_string()
+    );
+    assert_eq!(
+        "mgxqa-Ywep-8lcw-7bvt-h-dp1md-0h7-0z3ir",
+        LanguageTag::parse_and_normalize("MgxQa-ywEp-8lcW-7bvT-h-dP1Md-0h7-0Z3ir")
+            .unwrap()
+            .as_str()
+    );
+}
+
+#[test]
+fn test_unicode() {
+    assert!(LanguageTag::parse("zh-x-Üńìcødê").is_err());
+}
+
+#[test]
+fn test_cmp() {
+    assert_eq!(
+        LanguageTag::parse_and_normalize("dE-AraB-lY").unwrap(),
+        LanguageTag::parse_and_normalize("DE-aRaB-LY").unwrap()
+    );
+    assert_ne!(
+        LanguageTag::parse_and_normalize("zh").unwrap(),
+        LanguageTag::parse_and_normalize("zh-Latn").unwrap()
+    );
+}
+
+// http://www.langtag.net/test-suites/well-formed-tags.txt
+#[test]
+fn test_wellformed_tags() {
+    let tags = vec![
+        "fr",
+        "fr-Latn",
+        "fr-fra", // Extended tag
+        "fr-Latn-FR",
+        "fr-Latn-419",
+        "fr-FR",
+        "ax-TZ",     // Not in the registry, but well-formed
+        "fr-shadok", // Variant
+        "fr-y-myext-myext2",
+        "fra-Latn", // ISO 639 can be 3-letters
+        "fra",
+        "fra-FX",
+        "i-klingon", // grandfathered with singleton
+        "I-kLINgon", // tags are case-insensitive...
+        "no-bok",    // grandfathered without singleton
+        "fr-Lat",    // Extended",
+        "mn-Cyrl-MN",
+        "mN-cYrL-Mn",
+        "fr-Latn-CA",
+        "en-US",
+        "fr-Latn-CA",
+        "i-enochian", // Grand fathered
+        "x-fr-CH",
+        "sr-Latn-CS",
+        "es-419",
+        "sl-nedis",
+        "de-CH-1996",
+        "de-Latg-1996",
+        "sl-IT-nedis",
+        "en-a-bbb-x-a-ccc",
+        "de-a-value",
+        "en-Latn-GB-boont-r-extended-sequence-x-private",
+        "en-x-US",
+        "az-Arab-x-AZE-derbend",
+        "es-Latn-CO-x-private",
+        "en-US-boont",
+        "ab-x-abc-x-abc",     // anything goes after x
+        "ab-x-abc-a-a",       // ditto",
+        "i-default",          // grandfathered",
+        "i-klingon",          // grandfathered",
+        "abcd-Latn",          // Language of 4 chars reserved for future use
+        "AaBbCcDd-x-y-any-x", // Language of 5-8 chars, registered
+        "en",
+        "de-AT",
+        "es-419",
+        "de-CH-1901",
+        "sr-Cyrl",
+        "sr-Cyrl-CS",
+        "sl-Latn-IT-rozaj",
+        "en-US-x-twain",
+        "zh-cmn",
+        "zh-cmn-Hant",
+        "zh-cmn-Hant-HK",
+        "zh-gan",
+        "zh-yue-Hant-HK",
+        "xr-lxs-qut", // extlangS
+        "xr-lqt-qu",  // extlang + region
+        "xr-p-lze",   // Extension
+    ];
+    for tag in tags {
+        let result = LanguageTag::parse(tag);
+        assert!(
+            result.is_ok(),
+            "{} should be considered well-formed but returned error {}",
+            tag,
+            result.err().unwrap()
+        );
+    }
+}
+
+// http://www.langtag.net/test-suites/broken-tags.txt
+#[test]
+fn test_broken_tags() {
+    let tags = vec![
+        "",
+        "f",
+        "f-Latn",
+        "fr-Latn-F",
+        "a-value",
+        "tlh-a-b-foo",
+        "i-notexist", // grandfathered but not registered: always invalid
+        "abcdefghi-012345678",
+        "ab-abc-abc-abc-abc",
+        "ab-abcd-abc",
+        "ab-ab-abc",
+        "ab-123-abc",
+        "a-Hant-ZH",
+        "a1-Hant-ZH",
+        "ab-abcde-abc",
+        "ab-1abc-abc",
+        "ab-ab-abcd",
+        "ab-123-abcd",
+        "ab-abcde-abcd",
+        "ab-1abc-abcd",
+        "ab-a-b",
+        "ab-a-x",
+        "ab--ab",
+        "ab-abc-",
+        "-ab-abc",
+        "abcd-efg",
+        "aabbccddE",
+    ];
+    for tag in tags {
+        let result = LanguageTag::parse(tag);
+        assert!(
+            result.is_err(),
+            "{} should be considered not well-formed but returned result {:?}",
+            tag,
+            result.ok().unwrap()
+        );
+    }
+}
+
+#[test]
+fn test_random_good_tags() {
+    // http://unicode.org/repos/cldr/trunk/tools/java/org/unicode/cldr/util/data/langtagTest.txt
+    let tags = vec![
+        "zszLDm-sCVS-es-x-gn762vG-83-S-mlL",
+        "IIJdFI-cfZv",
+        "kbAxSgJ-685",
+        "tbutP",
+        "hDL-595",
+        "dUf-iUjq-0hJ4P-5YkF-WD8fk",
+        "FZAABA-FH",
+        "xZ-lh-4QfM5z9J-1eG4-x-K-R6VPr2z",
+        "Fyi",
+        "SeI-DbaG",
+        "ch-xwFn",
+        "OeC-GPVI",
+        "JLzvUSi",
+        "Fxh-hLAs",
+        "pKHzCP-sgaO-554",
+        "eytqeW-hfgH-uQ",
+        "ydn-zeOP-PR",
+        "uoWmBM-yHCf-JE",
+        "xwYem",
+        "zie",
+        "Re-wjSv-Ey-i-XE-E-JjWTEB8-f-DLSH-NVzLH-AtnFGWoH-SIDE",
+        "Ri-063-c-u6v-ZfhkToTB-C-IFfmv-XT-j-rdyYFMhK-h-pY-D5-Oh6FqBhL-hcXt-v-WdpNx71-\
+         K-c74m4-eBTT7-JdH7Q1Z",
+        "ji",
+        "IM-487",
+        "EPZ-zwcB",
+        "GauwEcwo",
+        "kDEP",
+        "FwDYt-TNvo",
+        "ottqP-KLES-x-9-i9",
+        "fcflR-grQQ",
+        "TvFwdu-kYhs",
+        "WE-336",
+        "MgxQa-ywEp-8lcW-7bvT-h-dP1Md-0h7-0Z3ir-K-Srkm-kA-7LXM-Z-whb2MiO-2mNsvbLm-W3O\
+         -4r-U-KceIxHdI-gvMVgUBV-2uRUni-J0-7C8yTK2",
+        "Hyr-B-evMtVoB1-mtsVZf-vQMV-gM-I-rr-kvLzg-f-lAUK-Qb36Ne-Z-7eFzOD-mv6kKf-l-miZ\
+         7U3-k-XDGtNQG",
+        "ybrlCpzy",
+        "PTow-w-cAQ51-8Xd6E-cumicgt-WpkZv3NY-q-ORYPRy-v-A4jL4A-iNEqQZZ-sjKn-W-N1F-pzy\
+         c-xP5eWz-LmsCiCcZ",
+        "ih-DlPR-PE",
+        "Krf-362",
+        "WzaD",
+        "EPaOnB-gHHn",
+        "XYta",
+        "NZ-RgOO-tR",
+        "at-FE",
+        "Tpc-693",
+        "YFp",
+        "gRQrQULo",
+        "pVomZ-585",
+        "laSu-ZcAq-338",
+        "gCW",
+        "PydSwHRI-TYfF",
+        "zKmWDD",
+        "X-bCrL5RL",
+        "HK",
+        "YMKGcLY",
+        "GDJ-nHYa-bw-X-ke-rohH5GfS-LdJKsGVe",
+        "tfOxdau-yjge-489-a-oB-I8Csb-1ESaK1v-VFNz-N-FT-ZQyn-On2-I-hu-vaW3-jIQb-vg0U-h\
+         Ul-h-dO6KuJqB-U-tde2L-P3gHUY-vnl5c-RyO-H-gK1-zDPu-VF1oeh8W-kGzzvBbW-yuAJZ",
+        "LwDux",
+        "Zl-072",
+        "Ri-Ar",
+        "vocMSwo-cJnr-288",
+        "kUWq-gWfQ-794",
+        "YyzqKL-273",
+        "Xrw-ZHwH-841-9foT-ESSZF-6OqO-0knk-991U-9p3m-b-JhiV-0Kq7Y-h-cxphLb-cDlXUBOQ-X\
+         -4Ti-jty94yPp",
+        "en-GB-oed",
+        "LEuZl-so",
+        "HyvBvFi-cCAl-X-irMQA-Pzt-H",
+        "uDbsrAA-304",
+        "wTS",
+        "IWXS",
+        "XvDqNkSn-jRDR",
+        "gX-Ycbb-iLphEks-AQ1aJ5",
+        "FbSBz-VLcR-VL",
+        "JYoVQOP-Iytp",
+        "gDSoDGD-lq-v-7aFec-ag-k-Z4-0kgNxXC-7h",
+        "Bjvoayy-029",
+        "qSDJd",
+        "qpbQov",
+        "fYIll-516",
+        "GfgLyfWE-EHtB",
+        "Wc-ZMtk",
+        "cgh-VEYK",
+        "WRZs-AaFd-yQ",
+        "eSb-CpsZ-788",
+        "YVwFU",
+        "JSsHiQhr-MpjT-381",
+        "LuhtJIQi-JKYt",
+        "vVTvS-RHcP",
+        "SY",
+        "fSf-EgvQfI-ktWoG-8X5z-63PW",
+        "NOKcy",
+        "OjJb-550",
+        "KB",
+        "qzKBv-zDKk-589",
+        "Jr",
+        "Acw-GPXf-088",
+        "WAFSbos",
+        "HkgnmerM-x-e5-zf-VdDjcpz-1V6",
+        "UAfYflJU-uXDc-YV",
+        "x-CHsHx-VDcOUAur-FqagDTx-H-V0e74R",
+        "uZIAZ-Xmbh-pd",
+    ];
+    for tag in tags {
+        let result = LanguageTag::parse(tag);
+        assert!(
+            result.is_ok(),
+            "{} should be considered well-formed but returned error {}",
+            tag,
+            result.err().unwrap()
+        );
+    }
+}
+
+#[test]
+fn test_random_bad_tags() {
+    // http://unicode.org/repos/cldr/trunk/tools/java/org/unicode/cldr/util/data/langtagTest.txt
+    let tags = vec![
+        "EdY-z_H791Xx6_m_kj",
+        "qWt85_8S0-L_rbBDq0gl_m_O_zsAx_nRS",
+        "VzyL2",
+        "T_VFJq-L-0JWuH_u2_VW-hK-kbE",
+        "u-t",
+        "Q-f_ZVJXyc-doj_k-i",
+        "JWB7gNa_K-5GB-25t_W-s-ZbGVwDu1-H3E",
+        "b-2T-Qob_L-C9v_2CZxK86",
+        "fQTpX_0_4Vg_L3L_g7VtALh2",
+        "S-Z-E_J",
+        "f6wsq-02_i-F",
+        "9_GcUPq_G",
+        "QjsIy_9-0-7_Dv2yPV09_D-JXWXM",
+        "D_se-f-k",
+        "ON47Wv1_2_W",
+        "f-z-R_s-ha",
+        "N3APeiw_195_Bx2-mM-pf-Z-Ip5lXWa-5r",
+        "IRjxU-E_6kS_D_b1b_H",
+        "NB-3-5-AyW_FQ-9hB-TrRJg3JV_3C",
+        "yF-3a_V_FoJQAHeL_Z-Mc-u",
+        "n_w_bbunOG_1-s-tJMT5je",
+        "Q-AEWE_X",
+        "57b1O_k_R6MU_sb",
+        "hK_65J_i-o_SI-Y",
+        "wB4B7u_5I2_I_NZPI",
+        "J24Nb_q_d-zE",
+        "v6-dHjJmvPS_IEb-x_A-O-i",
+        "8_8_dl-ZgBr84u-P-E",
+        "nIn-xD7EVhe_C",
+        "5_N-6P_x7Of_Lo_6_YX_R",
+        "0_46Oo0sZ-YNwiU8Wr_d-M-pg1OriV",
+        "laiY-5",
+        "K-8Mdd-j_ila0sSpo_aO8_J",
+        "wNATtSL-Cp4_gPa_fD41_9z",
+        "H_FGz5V8_n6rrcoz0_1O6d-kH-7-N",
+        "wDOrnHU-odqJ_vWl",
+        "gP_qO-I-jH",
+        "h",
+        "dJ0hX-o_csBykEhU-F",
+        "L-Vf7_BV_eRJ5goSF_Kp",
+        "y-oF-chnavU-H",
+        "9FkG-8Q-8_v",
+        "W_l_NDQqI-O_SFSAOVq",
+        "kDG3fzXw",
+        "t-nsSp-7-t-mUK2",
+        "Yw-F",
+        "1-S_3_l",
+        "u-v_brn-Y",
+        "4_ft_3ZPZC5lA_D",
+        "n_dR-QodsqJnh_e",
+        "Hwvt-bSwZwj_KL-hxg0m-3_hUG",
+        "mQHzvcV-UL-o2O_1KhUJQo_G2_uryk3-a",
+        "b-UTn33HF",
+        "r-Ep-jY-aFM_N_H",
+        "K-k-krEZ0gwD_k_ua-9dm3Oy-s_v",
+        "XS_oS-p",
+        "EIx_h-zf5",
+        "p_z-0_i-omQCo3B",
+        "1_q0N_jo_9",
+        "0Ai-6-S",
+        "L-LZEp_HtW",
+        "Zj-A4JD_2A5Aj7_b-m3",
+        "x",
+        "p-qPuXQpp_d-jeKifB-c-7_G-X",
+        "X94cvJ_A",
+        "F2D25R_qk_W-w_Okf_kx",
+        "rc-f",
+        "D",
+        "gD_WrDfxmF-wu-E-U4t",
+        "Z_BN9O4_D9-D_0E_KnCwZF-84b-19",
+        "T-8_g-u-0_E",
+        "lXTtys9j_X_A_m-vtNiNMw_X_b-C6Nr",
+        "V_Ps-4Y-S",
+        "X5wGEA",
+        "mIbHFf_ALu4_Jo1Z1",
+        "ET-TacYx_c",
+        "Z-Lm5cAP_ri88-d_q_fi8-x",
+        "rTi2ah-4j_j_4AlxTs6m_8-g9zqncIf-N5",
+        "FBaLB85_u-0NxhAy-ZU_9c",
+        "x_j_l-5_aV95_s_tY_jp4",
+        "PL768_D-m7jNWjfD-Nl_7qvb_bs_8_Vg",
+        "9-yOc-gbh",
+        "6DYxZ_SL-S_Ye",
+        "ZCa-U-muib-6-d-f_oEh_O",
+        "Qt-S-o8340F_f_aGax-c-jbV0gfK_p",
+        "WE_SzOI_OGuoBDk-gDp",
+        "cs-Y_9",
+        "m1_uj",
+        "Y-ob_PT",
+        "li-B",
+        "f-2-7-9m_f8den_J_T_d",
+        "p-Os0dua-H_o-u",
+        "L",
+        "rby-w",
+    ];
+    for tag in tags {
+        let result = LanguageTag::parse(tag);
+        assert!(
+            result.is_err(),
+            "{} should be considered not well-formed but returned result {:?}",
+            tag,
+            result.ok().unwrap()
+        );
+    }
+}
+
+#[test]
+fn test_eq() {
+    let tag = LanguageTag::parse("en-fr").unwrap();
+    assert_eq!(tag, "en-fr");
+    assert_ne!(tag, "en-FR");
+    assert_eq!("en-fr", tag);
+    assert_eq!(hash(&tag), hash("en-fr"));
+    assert_ne!(hash(&tag), hash("en-FR"));
+}
+
+fn hash(value: impl Hash) -> u64 {
+    let mut hasher = DefaultHasher::new();
+    value.hash(&mut hasher);
+    hasher.finish()
+}
+
+#[test]
+fn test_str() {
+    let tag = LanguageTag::parse("en-fr").unwrap();
+    assert!(tag.starts_with("en-"));
+}
+
+#[cfg(feature = "serde")]
+#[test]
+fn test_serd_impl() {
+    assert_tokens(
+        &LanguageTag::parse("en-us").unwrap(),
+        &[Token::BorrowedStr("en-us")],
+    );
+    assert_tokens(
+        &LanguageTag::parse("en-US".to_string()).unwrap(),
+        &[Token::String("en-US")],
+    );
+    assert_de_tokens(
+        &LanguageTag::parse("en-US".to_string()).unwrap(),
+        &[Token::BorrowedStr("en-US")],
+    );
+    assert_de_tokens_error::<LanguageTag<String>>(
+        &[Token::String("verybadvalue")],
+        "A subtag may be eight characters in length at maximum",
+    );
+}
--- a/toolkit/library/rust/shared/Cargo.toml
+++ b/toolkit/library/rust/shared/Cargo.toml
@ -56,6 +56,7 @@ unic-langid = { version = "0.9", features = ["likelysubtags"] }
 unic-langid-ffi = { path = "../../../../intl/locale/rust/unic-langid-ffi" }
 fluent-langneg = { version = "0.13", features = ["cldr"] }
 fluent-langneg-ffi = { path = "../../../../intl/locale/rust/fluent-langneg-ffi" }
+oxilangtag = "0.1.3"
 rure = "0.2.2"
 rust_minidump_writer_linux = { path = "../../../crashreporter/rust_minidump_writer_linux", optional = true }
 mozannotation_client = { path = "../../../crashreporter/mozannotation_client", optional = true }
				`@ -0,0 +1 @@`
				{"files":{"CHANGELOG.md":"3d0f3240ed450d19b894dd8715e20bbec50a14eb0d357df8c09a4af1f19fc831","Cargo.toml":"b8414a40b2cdeb5b34dc4b7e79a5e192b56b953d9db1a762dbf3e8728074dd6a","LICENSE":"3fe41c99abc306c2cd34a9365b1810035ae93335ebf4736c0240b469b3f410eb","README.md":"fc98b140225bc0521a136c2c1ed8146f7398349a36d52481f97d8ec2b7679619","benches/lib.rs":"61c94b95e005c0df25ff740ddc7801d65f68bd6e00c0b8aca7eeb66b103f9eea","deny.toml":"fce6beebdde75e3950abfd230b5110d485f2daf5a333cc77b447669593fa7c62","src/lib.rs":"92c85f535a42b8dde8c2f3078c61e4e1580d326ac621eba2f410bdee521be41d","tests/lib.rs":"9927c137f39094cfd8fbcf56069a047818112374148e8950fd73708e9ae0382a"},"package":"8d91edf4fbb970279443471345a4e8c491bf05bb283b3e6c88e4e606fd8c181b"}