mirror of
https://github.com/openharmony/third_party_rust_rust-url.git
synced 2026-07-01 21:04:00 -04:00
Split IDNA into a separate crate.
This commit is contained in:
+2
-2
@@ -1,3 +1,3 @@
|
||||
/target
|
||||
/Cargo.lock
|
||||
target
|
||||
Cargo.lock
|
||||
/.cargo/config
|
||||
|
||||
+1
-6
@@ -16,10 +16,6 @@ name = "format"
|
||||
[[test]]
|
||||
name = "form_urlencoded"
|
||||
[[test]]
|
||||
name = "idna"
|
||||
[[test]]
|
||||
name = "punycode"
|
||||
[[test]]
|
||||
name = "tests"
|
||||
[[test]]
|
||||
name = "wpt"
|
||||
@@ -50,8 +46,7 @@ version = ">=0.6.1, <0.8"
|
||||
optional = true
|
||||
|
||||
[dependencies]
|
||||
idna = { version = "0.1.0", path = "./idna" }
|
||||
uuid = { version = "0.2", features = ["v4"] }
|
||||
rustc-serialize = "0.3"
|
||||
unicode-bidi = "0.2.3"
|
||||
unicode-normalization = "0.1.2"
|
||||
matches = "0.1"
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
[package]
|
||||
name = "idna"
|
||||
version = "0.1.0"
|
||||
authors = ["Simon Sapin <simon.sapin@exyr.org>"]
|
||||
description = "IDNA (Internationalizing Domain Names in Applications) and Punycode."
|
||||
repository = "https://github.com/servo/rust-url/"
|
||||
license = "MIT/Apache-2.0"
|
||||
|
||||
[dependencies]
|
||||
unicode-bidi = "0.2.3"
|
||||
unicode-normalization = "0.1.2"
|
||||
matches = "0.1"
|
||||
|
||||
[dev-dependencies]
|
||||
rustc-serialize = "0.3"
|
||||
|
||||
[[test]]
|
||||
name = "tests"
|
||||
@@ -0,0 +1,73 @@
|
||||
// Copyright 2016 Simon Sapin.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
//! This Rust crate implements IDNA
|
||||
//! [per the WHATWG URL Standard](https://url.spec.whatwg.org/#idna).
|
||||
//!
|
||||
//! It also exposes the underlying algorithms from [*Unicode IDNA Compatibility Processing*
|
||||
//! (Unicode Technical Standard #46)](http://www.unicode.org/reports/tr46/)
|
||||
//! and [Punycode (RFC 3492)](https://tools.ietf.org/html/rfc3492).
|
||||
//!
|
||||
//! Quoting from [UTS #46’s introduction](http://www.unicode.org/reports/tr46/#Introduction):
|
||||
//!
|
||||
//! > Initially, domain names were restricted to ASCII characters.
|
||||
//! > A system was introduced in 2003 for internationalized domain names (IDN).
|
||||
//! > This system is called Internationalizing Domain Names for Applications,
|
||||
//! > or IDNA2003 for short.
|
||||
//! > This mechanism supports IDNs by means of a client software transformation
|
||||
//! > into a format known as Punycode.
|
||||
//! > A revision of IDNA was approved in 2010 (IDNA2008).
|
||||
//! > This revision has a number of incompatibilities with IDNA2003.
|
||||
//! >
|
||||
//! > The incompatibilities force implementers of client software,
|
||||
//! > such as browsers and emailers,
|
||||
//! > to face difficult choices during the transition period
|
||||
//! > as registries shift from IDNA2003 to IDNA2008.
|
||||
//! > This document specifies a mechanism
|
||||
//! > that minimizes the impact of this transition for client software,
|
||||
//! > allowing client software to access domains that are valid under either system.
|
||||
|
||||
#[macro_use] extern crate matches;
|
||||
extern crate unicode_bidi;
|
||||
extern crate unicode_normalization;
|
||||
|
||||
pub mod punycode;
|
||||
pub mod uts46;
|
||||
|
||||
/// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm.
|
||||
///
|
||||
/// Return the ASCII representation a domain name,
|
||||
/// normalizing characters (upper-case to lower-case and other kinds of equivalence)
|
||||
/// and using Punycode as necessary.
|
||||
///
|
||||
/// This process may fail.
|
||||
pub fn domain_to_ascii(domain: &str) -> Result<String, uts46::Errors> {
|
||||
uts46::to_ascii(domain, uts46::Flags {
|
||||
use_std3_ascii_rules: false,
|
||||
transitional_processing: true, // XXX: switch when Firefox does
|
||||
verify_dns_length: false,
|
||||
})
|
||||
}
|
||||
|
||||
/// The [domain to Unicode](https://url.spec.whatwg.org/#concept-domain-to-unicode) algorithm.
|
||||
///
|
||||
/// Return the Unicode representation of a domain name,
|
||||
/// normalizing characters (upper-case to lower-case and other kinds of equivalence)
|
||||
/// and decoding Punycode as necessary.
|
||||
///
|
||||
/// This may indicate [syntax violations](https://url.spec.whatwg.org/#syntax-violation)
|
||||
/// but always returns a string for the mapped domain.
|
||||
pub fn domain_to_unicode(domain: &str) -> (String, Result<(), uts46::Errors>) {
|
||||
uts46::to_unicode(domain, uts46::Flags {
|
||||
use_std3_ascii_rules: false,
|
||||
|
||||
// Unused:
|
||||
transitional_processing: true,
|
||||
verify_dns_length: false,
|
||||
})
|
||||
}
|
||||
@@ -6,8 +6,7 @@
|
||||
# option. This file may not be copied, modified, or distributed
|
||||
# except according to those terms.
|
||||
|
||||
|
||||
# Run as: python make_idna_table.py idna_table.txt > src/idna_table.rs
|
||||
# Run as: python make_uts46_mapping_table.py IdnaMappingTable.txt > uts46_mapping_table.rs
|
||||
# You can get the latest idna table from
|
||||
# http://www.unicode.org/Public/idna/latest/IdnaMappingTable.txt
|
||||
|
||||
@@ -1,6 +1,13 @@
|
||||
//! International domain names
|
||||
//!
|
||||
//! https://url.spec.whatwg.org/#idna
|
||||
// Copyright 2013-2014 Valentin Gosu.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
//! [*Unicode IDNA Compatibility Processing*
|
||||
//! (Unicode Technical Standard #46)](http://www.unicode.org/reports/tr46/)
|
||||
|
||||
use self::Mapping::*;
|
||||
use punycode;
|
||||
@@ -9,7 +16,7 @@ use unicode_normalization::UnicodeNormalization;
|
||||
use unicode_normalization::char::is_combining_mark;
|
||||
use unicode_bidi::{BidiClass, bidi_class};
|
||||
|
||||
include!("idna_mapping.rs");
|
||||
include!("uts46_mapping_table.rs");
|
||||
|
||||
#[derive(Debug)]
|
||||
enum Mapping {
|
||||
@@ -23,9 +30,9 @@ enum Mapping {
|
||||
}
|
||||
|
||||
struct Range {
|
||||
pub from: char,
|
||||
pub to: char,
|
||||
pub mapping: Mapping,
|
||||
from: char,
|
||||
to: char,
|
||||
mapping: Mapping,
|
||||
}
|
||||
|
||||
fn find_char(codepoint: char) -> &'static Mapping {
|
||||
@@ -45,7 +52,7 @@ fn find_char(codepoint: char) -> &'static Mapping {
|
||||
&TABLE[min].mapping
|
||||
}
|
||||
|
||||
fn map_char(codepoint: char, flags: Uts46Flags, output: &mut String, errors: &mut Vec<Error>) {
|
||||
fn map_char(codepoint: char, flags: Flags, output: &mut String, errors: &mut Vec<Error>) {
|
||||
match *find_char(codepoint) {
|
||||
Mapping::Valid => output.push(codepoint),
|
||||
Mapping::Ignored => {},
|
||||
@@ -185,7 +192,7 @@ fn passes_bidi(label: &str, transitional_processing: bool) -> bool {
|
||||
}
|
||||
|
||||
/// http://www.unicode.org/reports/tr46/#Validity_Criteria
|
||||
fn validate(label: &str, flags: Uts46Flags, errors: &mut Vec<Error>) {
|
||||
fn validate(label: &str, flags: Flags, errors: &mut Vec<Error>) {
|
||||
if label.nfc().ne(label.chars()) {
|
||||
errors.push(Error::ValidityCriteria);
|
||||
}
|
||||
@@ -212,7 +219,7 @@ fn validate(label: &str, flags: Uts46Flags, errors: &mut Vec<Error>) {
|
||||
}
|
||||
|
||||
/// http://www.unicode.org/reports/tr46/#Processing
|
||||
fn uts46_processing(domain: &str, flags: Uts46Flags, errors: &mut Vec<Error>) -> String {
|
||||
fn processing(domain: &str, flags: Flags, errors: &mut Vec<Error>) -> String {
|
||||
let mut mapped = String::new();
|
||||
for c in domain.chars() {
|
||||
map_char(c, flags, &mut mapped, errors)
|
||||
@@ -226,7 +233,7 @@ fn uts46_processing(domain: &str, flags: Uts46Flags, errors: &mut Vec<Error>) ->
|
||||
if label.starts_with("xn--") {
|
||||
match punycode::decode_to_string(&label["xn--".len()..]) {
|
||||
Some(decoded_label) => {
|
||||
let flags = Uts46Flags { transitional_processing: false, ..flags };
|
||||
let flags = Flags { transitional_processing: false, ..flags };
|
||||
validate(&decoded_label, flags, errors);
|
||||
validated.push_str(&decoded_label)
|
||||
}
|
||||
@@ -241,14 +248,14 @@ fn uts46_processing(domain: &str, flags: Uts46Flags, errors: &mut Vec<Error>) ->
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct Uts46Flags {
|
||||
pub struct Flags {
|
||||
pub use_std3_ascii_rules: bool,
|
||||
pub transitional_processing: bool,
|
||||
pub verify_dns_length: bool,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
|
||||
pub enum Error {
|
||||
enum Error {
|
||||
PunycodeError,
|
||||
ValidityCriteria,
|
||||
DissallowedByStd3AsciiRules,
|
||||
@@ -257,11 +264,18 @@ pub enum Error {
|
||||
TooLongForDns,
|
||||
}
|
||||
|
||||
/// Errors recorded during UTS #46 processing.
|
||||
///
|
||||
/// This is opaque for now, only indicating the precense of at least one error.
|
||||
/// More details may be exposed in the future.
|
||||
#[derive(Debug)]
|
||||
pub struct Errors(Vec<Error>);
|
||||
|
||||
/// http://www.unicode.org/reports/tr46/#ToASCII
|
||||
pub fn uts46_to_ascii(domain: &str, flags: Uts46Flags) -> Result<String, Vec<Error>> {
|
||||
pub fn to_ascii(domain: &str, flags: Flags) -> Result<String, Errors> {
|
||||
let mut errors = Vec::new();
|
||||
let mut result = String::new();
|
||||
for label in uts46_processing(domain, flags, &mut errors).split('.') {
|
||||
for label in processing(domain, flags, &mut errors).split('.') {
|
||||
if result.len() > 0 {
|
||||
result.push('.');
|
||||
}
|
||||
@@ -288,36 +302,21 @@ pub fn uts46_to_ascii(domain: &str, flags: Uts46Flags) -> Result<String, Vec<Err
|
||||
if errors.is_empty() {
|
||||
Ok(result)
|
||||
} else {
|
||||
Err(errors)
|
||||
Err(Errors(errors))
|
||||
}
|
||||
}
|
||||
|
||||
/// https://url.spec.whatwg.org/#concept-domain-to-ascii
|
||||
pub fn domain_to_ascii(domain: &str) -> Result<String, Vec<Error>> {
|
||||
uts46_to_ascii(domain, Uts46Flags {
|
||||
use_std3_ascii_rules: false,
|
||||
transitional_processing: true, // XXX: switch when Firefox does
|
||||
verify_dns_length: false,
|
||||
})
|
||||
}
|
||||
|
||||
/// http://www.unicode.org/reports/tr46/#ToUnicode
|
||||
///
|
||||
/// Only `use_std3_ascii_rules` is used in `flags`.
|
||||
pub fn uts46_to_unicode(domain: &str, mut flags: Uts46Flags) -> (String, Vec<Error>) {
|
||||
pub fn to_unicode(domain: &str, mut flags: Flags) -> (String, Result<(), Errors>) {
|
||||
flags.transitional_processing = false;
|
||||
let mut errors = Vec::new();
|
||||
let domain = uts46_processing(domain, flags, &mut errors);
|
||||
let domain = processing(domain, flags, &mut errors);
|
||||
let errors = if errors.is_empty() {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(Errors(errors))
|
||||
};
|
||||
(domain, errors)
|
||||
}
|
||||
|
||||
/// https://url.spec.whatwg.org/#concept-domain-to-unicode
|
||||
pub fn domain_to_unicode(domain: &str) -> (String, Vec<Error>) {
|
||||
uts46_to_unicode(domain, Uts46Flags {
|
||||
use_std3_ascii_rules: false,
|
||||
|
||||
// Unused:
|
||||
transitional_processing: true,
|
||||
verify_dns_length: false,
|
||||
})
|
||||
}
|
||||
@@ -1,7 +1,12 @@
|
||||
extern crate url;
|
||||
extern crate rustc_serialize;
|
||||
// Copyright 2013 Simon Sapin.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
use url::punycode::{decode, encode_str};
|
||||
use idna::punycode::{decode, encode_str};
|
||||
use rustc_serialize::json::{Json, Object};
|
||||
|
||||
fn one_test(description: &str, decoded: &str, encoded: &str) {
|
||||
@@ -0,0 +1,5 @@
|
||||
extern crate idna;
|
||||
extern crate rustc_serialize;
|
||||
|
||||
mod punycode;
|
||||
mod uts46;
|
||||
@@ -1,7 +1,13 @@
|
||||
extern crate url;
|
||||
// Copyright 2013-2014 Valentin Gosu.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
use std::char;
|
||||
use url::idna;
|
||||
use idna::uts46;
|
||||
|
||||
#[test]
|
||||
fn test_uts46() {
|
||||
@@ -35,7 +41,7 @@ fn test_uts46() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let result = idna::uts46_to_ascii(&source, idna::Uts46Flags {
|
||||
let result = uts46::to_ascii(&source, uts46::Flags {
|
||||
use_std3_ascii_rules: true,
|
||||
transitional_processing: test_type == "T",
|
||||
verify_dns_length: true,
|
||||
+1
-4
@@ -141,8 +141,7 @@ extern crate serde;
|
||||
#[cfg(feature="heap_size")]
|
||||
#[macro_use] extern crate heapsize;
|
||||
|
||||
extern crate unicode_normalization;
|
||||
extern crate unicode_bidi;
|
||||
extern crate idna;
|
||||
|
||||
use std::fmt::{self, Formatter};
|
||||
use std::str;
|
||||
@@ -170,9 +169,7 @@ mod parser;
|
||||
pub mod urlutils;
|
||||
pub mod percent_encoding;
|
||||
pub mod form_urlencoded;
|
||||
pub mod punycode;
|
||||
pub mod format;
|
||||
pub mod idna;
|
||||
|
||||
/// The parsed representation of an absolute URL.
|
||||
#[derive(PartialEq, Eq, Clone, Debug, Hash, PartialOrd, Ord)]
|
||||
|
||||
Reference in New Issue
Block a user