mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-29 07:42:04 +00:00
Backed out 2 changesets (bug 1151899) for build bustage a=backout
Backed out changeset 7b5270bac41b (bug 1151899) Backed out changeset 62f38c70d152 (bug 1151899)
This commit is contained in:
parent
00d282bd65
commit
90f6b234f3
2
netwerk/base/rust-url-capi/.gitignore
vendored
2
netwerk/base/rust-url-capi/.gitignore
vendored
@ -1,2 +0,0 @@
|
||||
/target
|
||||
/Cargo.lock
|
@ -1,19 +0,0 @@
|
||||
[package]
|
||||
|
||||
name = "rust_url_capi"
|
||||
version = "0.0.1"
|
||||
authors = ["Valentin Gosu <valentin.gosu@gmail.com>"]
|
||||
|
||||
[profile.dev]
|
||||
opt-level = 3
|
||||
debug = true
|
||||
rpath = true
|
||||
lto = true
|
||||
|
||||
[lib]
|
||||
name = "rust_url_capi"
|
||||
|
||||
|
||||
[dependencies]
|
||||
libc = "0.2.0"
|
||||
url = "1.2.1"
|
@ -1,68 +0,0 @@
|
||||
use url::ParseError;
|
||||
|
||||
pub trait ErrorCode {
|
||||
fn error_code(&self) -> i32;
|
||||
}
|
||||
|
||||
impl<T: ErrorCode> ErrorCode for Result<(), T> {
|
||||
fn error_code(&self) -> i32 {
|
||||
match *self {
|
||||
Ok(_) => 0,
|
||||
Err(ref error) => error.error_code(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ErrorCode for () {
|
||||
fn error_code(&self) -> i32 {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
impl ErrorCode for ParseError {
|
||||
fn error_code(&self) -> i32 {
|
||||
return -1;
|
||||
// match *self {
|
||||
// ParseError::EmptyHost => -1,
|
||||
// ParseError::InvalidScheme => -2,
|
||||
// ParseError::InvalidPort => -3,
|
||||
// ParseError::InvalidIpv6Address => -4,
|
||||
// ParseError::InvalidDomainCharacter => -5,
|
||||
// ParseError::InvalidCharacter => -6,
|
||||
// ParseError::InvalidBackslash => -7,
|
||||
// ParseError::InvalidPercentEncoded => -8,
|
||||
// ParseError::InvalidAtSymbolInUser => -9,
|
||||
// ParseError::ExpectedTwoSlashes => -10,
|
||||
// ParseError::ExpectedInitialSlash => -11,
|
||||
// ParseError::NonUrlCodePoint => -12,
|
||||
// ParseError::RelativeUrlWithScheme => -13,
|
||||
// ParseError::RelativeUrlWithoutBase => -14,
|
||||
// ParseError::RelativeUrlWithNonRelativeBase => -15,
|
||||
// ParseError::NonAsciiDomainsNotSupportedYet => -16,
|
||||
// ParseError::CannotSetJavascriptFragment => -17,
|
||||
// ParseError::CannotSetPortWithFileLikeScheme => -18,
|
||||
// ParseError::CannotSetUsernameWithNonRelativeScheme => -19,
|
||||
// ParseError::CannotSetPasswordWithNonRelativeScheme => -20,
|
||||
// ParseError::CannotSetHostPortWithNonRelativeScheme => -21,
|
||||
// ParseError::CannotSetHostWithNonRelativeScheme => -22,
|
||||
// ParseError::CannotSetPortWithNonRelativeScheme => -23,
|
||||
// ParseError::CannotSetPathWithNonRelativeScheme => -24,
|
||||
// }
|
||||
}
|
||||
}
|
||||
|
||||
pub enum NSError {
|
||||
OK,
|
||||
InvalidArg,
|
||||
Failure,
|
||||
}
|
||||
|
||||
impl ErrorCode for NSError {
|
||||
#[allow(overflowing_literals)]
|
||||
fn error_code(&self) -> i32 {
|
||||
match *self {
|
||||
NSError::OK => 0,
|
||||
NSError::InvalidArg => 0x80070057,
|
||||
NSError::Failure => 0x80004005
|
||||
}
|
||||
}
|
||||
}
|
@ -1,477 +0,0 @@
|
||||
extern crate url;
|
||||
use url::{Url, ParseError, ParseOptions};
|
||||
use url::quirks;
|
||||
extern crate libc;
|
||||
use libc::size_t;
|
||||
|
||||
|
||||
use std::mem;
|
||||
use std::str;
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
pub type rusturl_ptr = *const libc::c_void;
|
||||
|
||||
mod string_utils;
|
||||
pub use string_utils::*;
|
||||
|
||||
mod error_mapping;
|
||||
use error_mapping::*;
|
||||
|
||||
fn parser<'a>() -> ParseOptions<'a> {
|
||||
Url::options()
|
||||
}
|
||||
|
||||
fn default_port(scheme: &str) -> Option<u32> {
|
||||
match scheme {
|
||||
"ftp" => Some(21),
|
||||
"gopher" => Some(70),
|
||||
"http" => Some(80),
|
||||
"https" => Some(443),
|
||||
"ws" => Some(80),
|
||||
"wss" => Some(443),
|
||||
"rtsp" => Some(443),
|
||||
"moz-anno" => Some(443),
|
||||
"android" => Some(443),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn rusturl_new(spec: *mut libc::c_char, len: size_t) -> rusturl_ptr {
|
||||
let slice = std::slice::from_raw_parts(spec as *const libc::c_uchar, len as usize);
|
||||
let url_spec = match str::from_utf8(slice) {
|
||||
Ok(spec) => spec,
|
||||
Err(_) => return 0 as rusturl_ptr
|
||||
};
|
||||
|
||||
let url = match parser().parse(url_spec) {
|
||||
Ok(url) => url,
|
||||
Err(_) => return 0 as rusturl_ptr
|
||||
};
|
||||
|
||||
let url = Box::new(url);
|
||||
Box::into_raw(url) as rusturl_ptr
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn rusturl_free(urlptr: rusturl_ptr) {
|
||||
if urlptr.is_null() {
|
||||
return ();
|
||||
}
|
||||
let url: Box<Url> = Box::from_raw(urlptr as *mut url::Url);
|
||||
drop(url);
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn rusturl_get_spec(urlptr: rusturl_ptr, cont: *mut libc::c_void) -> i32 {
|
||||
if urlptr.is_null() {
|
||||
return NSError::InvalidArg.error_code();
|
||||
}
|
||||
let url: &Url = mem::transmute(urlptr);
|
||||
cont.assign(&url.to_string())
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn rusturl_get_scheme(urlptr: rusturl_ptr, cont: *mut libc::c_void) -> i32 {
|
||||
if urlptr.is_null() {
|
||||
return NSError::InvalidArg.error_code();
|
||||
}
|
||||
let url: &Url = mem::transmute(urlptr);
|
||||
cont.assign(&url.scheme())
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn rusturl_get_username(urlptr: rusturl_ptr, cont: *mut libc::c_void) -> i32 {
|
||||
if urlptr.is_null() {
|
||||
return NSError::InvalidArg.error_code();
|
||||
}
|
||||
let url: &Url = mem::transmute(urlptr);
|
||||
if url.cannot_be_a_base() {
|
||||
cont.set_size(0)
|
||||
} else {
|
||||
cont.assign(url.username())
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn rusturl_get_password(urlptr: rusturl_ptr, cont: *mut libc::c_void) -> i32 {
|
||||
if urlptr.is_null() {
|
||||
return NSError::InvalidArg.error_code();
|
||||
}
|
||||
let url: &Url = mem::transmute(urlptr);
|
||||
match url.password() {
|
||||
Some(p) => cont.assign(&p.to_string()),
|
||||
None => cont.set_size(0)
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn rusturl_get_host(urlptr: rusturl_ptr, cont: *mut libc::c_void) -> i32 {
|
||||
if urlptr.is_null() {
|
||||
return NSError::InvalidArg.error_code();
|
||||
}
|
||||
let url: &Url = mem::transmute(urlptr);
|
||||
|
||||
match url.host() {
|
||||
Some(h) => cont.assign(&h.to_string()),
|
||||
None => cont.set_size(0)
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn rusturl_get_port(urlptr: rusturl_ptr) -> i32 {
|
||||
if urlptr.is_null() {
|
||||
return NSError::InvalidArg.error_code();
|
||||
}
|
||||
let url: &Url = mem::transmute(urlptr);
|
||||
|
||||
match url.port() {
|
||||
Some(port) => port as i32,
|
||||
None => -1
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn rusturl_get_path(urlptr: rusturl_ptr, cont: *mut libc::c_void) -> i32 {
|
||||
if urlptr.is_null() {
|
||||
return NSError::InvalidArg.error_code();
|
||||
}
|
||||
let url: &Url = mem::transmute(urlptr);
|
||||
if url.cannot_be_a_base() {
|
||||
cont.set_size(0)
|
||||
} else {
|
||||
cont.assign(url.path())
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn rusturl_get_query(urlptr: rusturl_ptr, cont: *mut libc::c_void) -> i32 {
|
||||
if urlptr.is_null() {
|
||||
return NSError::InvalidArg.error_code();
|
||||
}
|
||||
let url: &Url = mem::transmute(urlptr);
|
||||
match url.query() {
|
||||
Some(ref s) => cont.assign(s),
|
||||
None => cont.set_size(0)
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn rusturl_get_fragment(urlptr: rusturl_ptr, cont: *mut libc::c_void) -> i32 {
|
||||
if urlptr.is_null() {
|
||||
return NSError::InvalidArg.error_code();
|
||||
}
|
||||
let url: &Url = mem::transmute(urlptr);
|
||||
|
||||
match url.fragment() {
|
||||
Some(ref fragment) => cont.assign(fragment),
|
||||
None => cont.set_size(0)
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn rusturl_has_fragment(urlptr: rusturl_ptr) -> i32 {
|
||||
if urlptr.is_null() {
|
||||
return NSError::InvalidArg.error_code();
|
||||
}
|
||||
let url: &Url = mem::transmute(urlptr);
|
||||
|
||||
match url.fragment() {
|
||||
Some(_) => return 1,
|
||||
None => return 0
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn rusturl_set_scheme(urlptr: rusturl_ptr, scheme: *mut libc::c_char, len: size_t) -> i32 {
|
||||
if urlptr.is_null() {
|
||||
return NSError::InvalidArg.error_code();
|
||||
}
|
||||
let mut url: &mut Url = mem::transmute(urlptr);
|
||||
let slice = std::slice::from_raw_parts(scheme as *const libc::c_uchar, len as usize);
|
||||
|
||||
let scheme_ = match str::from_utf8(slice).ok() {
|
||||
Some(p) => p,
|
||||
None => return ParseError::InvalidDomainCharacter.error_code() // utf-8 failed
|
||||
};
|
||||
|
||||
quirks::set_protocol(url, scheme_).error_code()
|
||||
}
|
||||
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn rusturl_set_username(urlptr: rusturl_ptr, username: *mut libc::c_char, len: size_t) -> i32 {
|
||||
if urlptr.is_null() {
|
||||
return NSError::InvalidArg.error_code();
|
||||
}
|
||||
let mut url: &mut Url = mem::transmute(urlptr);
|
||||
let slice = std::slice::from_raw_parts(username as *const libc::c_uchar, len as usize);
|
||||
|
||||
let username_ = match str::from_utf8(slice).ok() {
|
||||
Some(p) => p,
|
||||
None => return ParseError::InvalidDomainCharacter.error_code() // utf-8 failed
|
||||
};
|
||||
|
||||
quirks::set_username(url, username_).error_code()
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn rusturl_set_password(urlptr: rusturl_ptr, password: *mut libc::c_char, len: size_t) -> i32 {
|
||||
if urlptr.is_null() {
|
||||
return NSError::InvalidArg.error_code();
|
||||
}
|
||||
let mut url: &mut Url = mem::transmute(urlptr);
|
||||
let slice = std::slice::from_raw_parts(password as *const libc::c_uchar, len as usize);
|
||||
|
||||
let password_ = match str::from_utf8(slice).ok() {
|
||||
Some(p) => p,
|
||||
None => return ParseError::InvalidDomainCharacter.error_code() // utf-8 failed
|
||||
};
|
||||
|
||||
quirks::set_password(url, password_).error_code()
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn rusturl_set_host_and_port(urlptr: rusturl_ptr, host_and_port: *mut libc::c_char, len: size_t) -> i32 {
|
||||
if urlptr.is_null() {
|
||||
return NSError::InvalidArg.error_code();
|
||||
}
|
||||
let mut url: &mut Url = mem::transmute(urlptr);
|
||||
let slice = std::slice::from_raw_parts(host_and_port as *const libc::c_uchar, len as usize);
|
||||
|
||||
let host_and_port_ = match str::from_utf8(slice).ok() {
|
||||
Some(p) => p,
|
||||
None => return ParseError::InvalidDomainCharacter.error_code() // utf-8 failed
|
||||
};
|
||||
|
||||
quirks::set_host(url, host_and_port_).error_code()
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn rusturl_set_host(urlptr: rusturl_ptr, host: *mut libc::c_char, len: size_t) -> i32 {
|
||||
if urlptr.is_null() {
|
||||
return NSError::InvalidArg.error_code();
|
||||
}
|
||||
let mut url: &mut Url = mem::transmute(urlptr);
|
||||
let slice = std::slice::from_raw_parts(host as *const libc::c_uchar, len as usize);
|
||||
|
||||
let hostname = match str::from_utf8(slice).ok() {
|
||||
Some(h) => h,
|
||||
None => return ParseError::InvalidDomainCharacter.error_code() // utf-8 failed
|
||||
};
|
||||
|
||||
quirks::set_hostname(url, hostname).error_code()
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn rusturl_set_port(urlptr: rusturl_ptr, port: *mut libc::c_char, len: size_t) -> i32 {
|
||||
if urlptr.is_null() {
|
||||
return NSError::InvalidArg.error_code();
|
||||
}
|
||||
let mut url: &mut Url = mem::transmute(urlptr);
|
||||
let slice = std::slice::from_raw_parts(port as *const libc::c_uchar, len as usize);
|
||||
|
||||
let port_ = match str::from_utf8(slice).ok() {
|
||||
Some(p) => p,
|
||||
None => return ParseError::InvalidDomainCharacter.error_code() // utf-8 failed
|
||||
};
|
||||
|
||||
quirks::set_port(url, port_).error_code()
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn rusturl_set_port_no(urlptr: rusturl_ptr, new_port: i32) -> i32 {
|
||||
if urlptr.is_null() {
|
||||
return NSError::InvalidArg.error_code();
|
||||
}
|
||||
let mut url: &mut Url = mem::transmute(urlptr);
|
||||
if url.cannot_be_a_base() {
|
||||
-100
|
||||
} else {
|
||||
if url.scheme() == "file" {
|
||||
return -100;
|
||||
}
|
||||
match default_port(url.scheme()) {
|
||||
Some(def_port) => if new_port == def_port as i32 {
|
||||
let _ = url.set_port(None);
|
||||
return NSError::OK.error_code();
|
||||
},
|
||||
None => {}
|
||||
};
|
||||
if new_port > std::u16::MAX as i32 || new_port < 0 {
|
||||
let _ = url.set_port(None);
|
||||
} else {
|
||||
let _ = url.set_port(Some(new_port as u16));
|
||||
}
|
||||
NSError::OK.error_code()
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn rusturl_set_path(urlptr: rusturl_ptr, path: *mut libc::c_char, len: size_t) -> i32 {
|
||||
if urlptr.is_null() {
|
||||
return NSError::InvalidArg.error_code();
|
||||
}
|
||||
let mut url: &mut Url = mem::transmute(urlptr);
|
||||
let slice = std::slice::from_raw_parts(path as *const libc::c_uchar, len as usize);
|
||||
|
||||
let path_ = match str::from_utf8(slice).ok() {
|
||||
Some(p) => p,
|
||||
None => return ParseError::InvalidDomainCharacter.error_code() // utf-8 failed
|
||||
};
|
||||
|
||||
quirks::set_pathname(url, path_).error_code()
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn rusturl_set_query(urlptr: rusturl_ptr, query: *mut libc::c_char, len: size_t) -> i32 {
|
||||
if urlptr.is_null() {
|
||||
return NSError::InvalidArg.error_code();
|
||||
}
|
||||
let mut url: &mut Url = mem::transmute(urlptr);
|
||||
let slice = std::slice::from_raw_parts(query as *const libc::c_uchar, len as usize);
|
||||
|
||||
let query_ = match str::from_utf8(slice).ok() {
|
||||
Some(p) => p,
|
||||
None => return ParseError::InvalidDomainCharacter.error_code() // utf-8 failed
|
||||
};
|
||||
|
||||
quirks::set_search(url, query_).error_code()
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn rusturl_set_fragment(urlptr: rusturl_ptr, fragment: *mut libc::c_char, len: size_t) -> i32 {
|
||||
if urlptr.is_null() {
|
||||
return NSError::InvalidArg.error_code();
|
||||
}
|
||||
let mut url: &mut Url = mem::transmute(urlptr);
|
||||
let slice = std::slice::from_raw_parts(fragment as *const libc::c_uchar, len as usize);
|
||||
|
||||
let fragment_ = match str::from_utf8(slice).ok() {
|
||||
Some(p) => p,
|
||||
None => return ParseError::InvalidDomainCharacter.error_code() // utf-8 failed
|
||||
};
|
||||
|
||||
quirks::set_hash(url, fragment_).error_code()
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn rusturl_resolve(urlptr: rusturl_ptr, resolve: *mut libc::c_char, len: size_t, cont: *mut libc::c_void) -> i32 {
|
||||
if urlptr.is_null() {
|
||||
return NSError::InvalidArg.error_code();
|
||||
}
|
||||
let url: &mut Url = mem::transmute(urlptr);
|
||||
|
||||
let slice = std::slice::from_raw_parts(resolve as *const libc::c_uchar, len as usize);
|
||||
|
||||
let resolve_ = match str::from_utf8(slice).ok() {
|
||||
Some(p) => p,
|
||||
None => return NSError::Failure.error_code()
|
||||
};
|
||||
|
||||
match parser().base_url(Some(&url)).parse(resolve_).ok() {
|
||||
Some(u) => cont.assign(&u.to_string()),
|
||||
None => cont.set_size(0)
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn rusturl_common_base_spec(urlptr1: rusturl_ptr, urlptr2: rusturl_ptr, cont: *mut libc::c_void) -> i32 {
|
||||
if urlptr1.is_null() || urlptr2.is_null() {
|
||||
return NSError::InvalidArg.error_code();
|
||||
}
|
||||
let url1: &Url = mem::transmute(urlptr1);
|
||||
let url2: &Url = mem::transmute(urlptr2);
|
||||
|
||||
if url1 == url2 {
|
||||
return cont.assign(&url1.to_string());
|
||||
}
|
||||
|
||||
if url1.scheme() != url2.scheme() ||
|
||||
url1.host() != url2.host() ||
|
||||
url1.username() != url2.username() ||
|
||||
url1.password() != url2.password() ||
|
||||
url1.port() != url2.port() {
|
||||
return cont.set_size(0);
|
||||
}
|
||||
|
||||
let path1 = match url1.path_segments() {
|
||||
Some(path) => path,
|
||||
None => return cont.set_size(0)
|
||||
};
|
||||
let path2 = match url2.path_segments() {
|
||||
Some(path) => path,
|
||||
None => return cont.set_size(0)
|
||||
};
|
||||
|
||||
let mut url = url1.clone();
|
||||
url.set_query(None);
|
||||
let _ = url.set_host(None);
|
||||
{
|
||||
let mut new_segments = if let Ok(segments) = url.path_segments_mut() {
|
||||
segments
|
||||
} else {
|
||||
return cont.set_size(0)
|
||||
};
|
||||
|
||||
for (p1, p2) in path1.zip(path2) {
|
||||
if p1 != p2 {
|
||||
break;
|
||||
} else {
|
||||
new_segments.push(p1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cont.assign(&url.to_string())
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn rusturl_relative_spec(urlptr1: rusturl_ptr, urlptr2: rusturl_ptr, cont: *mut libc::c_void) -> i32 {
|
||||
if urlptr1.is_null() || urlptr2.is_null() {
|
||||
return NSError::InvalidArg.error_code();
|
||||
}
|
||||
let url1: &Url = mem::transmute(urlptr1);
|
||||
let url2: &Url = mem::transmute(urlptr2);
|
||||
|
||||
if url1 == url2 {
|
||||
return cont.set_size(0);
|
||||
}
|
||||
|
||||
if url1.scheme() != url2.scheme() ||
|
||||
url1.host() != url2.host() ||
|
||||
url1.username() != url2.username() ||
|
||||
url1.password() != url2.password() ||
|
||||
url1.port() != url2.port() {
|
||||
return cont.assign(&url2.to_string());
|
||||
}
|
||||
|
||||
let mut path1 = match url1.path_segments() {
|
||||
Some(path) => path,
|
||||
None => return cont.assign(&url2.to_string())
|
||||
};
|
||||
let mut path2 = match url2.path_segments() {
|
||||
Some(path) => path,
|
||||
None => return cont.assign(&url2.to_string())
|
||||
};
|
||||
|
||||
// TODO: file:// on WIN?
|
||||
|
||||
// Exhaust the part of the iterators that match
|
||||
while let (Some(ref p1), Some(ref p2)) = (path1.next(), path2.next()) {
|
||||
if p1 != p2 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let mut buffer: String = "".to_string();
|
||||
for _ in path1 {
|
||||
buffer = buffer + "../";
|
||||
}
|
||||
for p2 in path2 {
|
||||
buffer = buffer + p2 + "/";
|
||||
}
|
||||
|
||||
return cont.assign(&buffer);
|
||||
}
|
||||
|
@ -1,45 +0,0 @@
|
||||
#ifndef __RUST_URL_CAPI
|
||||
#define __RUST_URL_CAPI
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct rusturl;
|
||||
typedef struct rusturl* rusturl_ptr;
|
||||
|
||||
rusturl_ptr rusturl_new(const char *spec, size_t src_len);
|
||||
void rusturl_free(rusturl_ptr url);
|
||||
|
||||
int32_t rusturl_get_spec(rusturl_ptr url, void*);
|
||||
int32_t rusturl_get_scheme(rusturl_ptr url, void*);
|
||||
int32_t rusturl_get_username(rusturl_ptr url, void*);
|
||||
int32_t rusturl_get_password(rusturl_ptr url, void*);
|
||||
int32_t rusturl_get_host(rusturl_ptr url, void*);
|
||||
int32_t rusturl_get_port(rusturl_ptr url); // returns port or -1
|
||||
int32_t rusturl_get_path(rusturl_ptr url, void*);
|
||||
int32_t rusturl_get_query(rusturl_ptr url, void*);
|
||||
int32_t rusturl_get_fragment(rusturl_ptr url, void*);
|
||||
int32_t rusturl_has_fragment(rusturl_ptr url); // 1 true, 0 false, < 0 error
|
||||
|
||||
int32_t rusturl_set_scheme(rusturl_ptr url, const char *scheme, size_t len);
|
||||
int32_t rusturl_set_username(rusturl_ptr url, const char *user, size_t len);
|
||||
int32_t rusturl_set_password(rusturl_ptr url, const char *pass, size_t len);
|
||||
int32_t rusturl_set_host_and_port(rusturl_ptr url, const char *hostport, size_t len);
|
||||
int32_t rusturl_set_host(rusturl_ptr url, const char *host, size_t len);
|
||||
int32_t rusturl_set_port(rusturl_ptr url, const char *port, size_t len);
|
||||
int32_t rusturl_set_port_no(rusturl_ptr url, const int32_t port);
|
||||
int32_t rusturl_set_path(rusturl_ptr url, const char *path, size_t len);
|
||||
int32_t rusturl_set_query(rusturl_ptr url, const char *path, size_t len);
|
||||
int32_t rusturl_set_fragment(rusturl_ptr url, const char *path, size_t len);
|
||||
|
||||
int32_t rusturl_resolve(rusturl_ptr url, const char *relative, size_t len, void*);
|
||||
int32_t rusturl_common_base_spec(rusturl_ptr url1, rusturl_ptr url2, void*);
|
||||
int32_t rusturl_relative_spec(rusturl_ptr url1, rusturl_ptr url2, void*);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // __RUST_URL_CAPI
|
@ -1,57 +0,0 @@
|
||||
extern crate libc;
|
||||
use libc::size_t;
|
||||
|
||||
extern crate std;
|
||||
use std::ptr;
|
||||
|
||||
use error_mapping::*;
|
||||
|
||||
extern "C" {
|
||||
fn c_fn_set_size(user: *mut libc::c_void, size: size_t) -> i32;
|
||||
fn c_fn_get_buffer(user: *mut libc::c_void) -> *mut libc::c_char;
|
||||
}
|
||||
|
||||
pub trait StringContainer {
|
||||
fn set_size(&self, size_t) -> i32;
|
||||
fn get_buffer(&self) -> *mut libc::c_char;
|
||||
fn assign(&self, content: &str) -> i32;
|
||||
}
|
||||
|
||||
impl StringContainer for *mut libc::c_void {
|
||||
fn set_size(&self, size: size_t) -> i32 {
|
||||
if (*self).is_null() {
|
||||
return NSError::InvalidArg.error_code();
|
||||
}
|
||||
unsafe {
|
||||
c_fn_set_size(*self, size);
|
||||
}
|
||||
|
||||
return NSError::OK.error_code();
|
||||
}
|
||||
fn get_buffer(&self) -> *mut libc::c_char {
|
||||
if (*self).is_null() {
|
||||
return 0 as *mut libc::c_char;
|
||||
}
|
||||
unsafe {
|
||||
c_fn_get_buffer(*self)
|
||||
}
|
||||
}
|
||||
fn assign(&self, content: &str) -> i32 {
|
||||
if (*self).is_null() {
|
||||
return NSError::InvalidArg.error_code();
|
||||
}
|
||||
|
||||
unsafe {
|
||||
let slice = content.as_bytes();
|
||||
c_fn_set_size(*self, slice.len());
|
||||
let buf = c_fn_get_buffer(*self);
|
||||
if buf.is_null() {
|
||||
return NSError::Failure.error_code();
|
||||
}
|
||||
|
||||
ptr::copy(slice.as_ptr(), buf as *mut u8, slice.len());
|
||||
}
|
||||
|
||||
NSError::OK.error_code()
|
||||
}
|
||||
}
|
@ -1,4 +0,0 @@
|
||||
all:
|
||||
cd .. && cargo build
|
||||
g++ -Wall -o test test.cpp ../target/debug/librust*.a -ldl -lpthread -lrt -lgcc_s -lpthread -lc -lm -std=c++0x
|
||||
./test
|
@ -1,141 +0,0 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include "../src/rust-url-capi.h"
|
||||
|
||||
class StringContainer
|
||||
{
|
||||
public:
|
||||
StringContainer()
|
||||
{
|
||||
mBuffer = nullptr;
|
||||
mLength = 0;
|
||||
}
|
||||
|
||||
~StringContainer()
|
||||
{
|
||||
free(mBuffer);
|
||||
mBuffer = nullptr;
|
||||
}
|
||||
|
||||
void SetSize(size_t size)
|
||||
{
|
||||
mLength = size;
|
||||
if (mBuffer) {
|
||||
mBuffer = (char *)realloc(mBuffer, size);
|
||||
return;
|
||||
}
|
||||
mBuffer = (char *)malloc(size);
|
||||
}
|
||||
|
||||
char * GetBuffer()
|
||||
{
|
||||
return mBuffer;
|
||||
}
|
||||
|
||||
void CheckEquals(const char * ref) {
|
||||
int32_t refLen = strlen(ref);
|
||||
printf("CheckEquals: %s (len:%d)\n", ref, refLen);
|
||||
if (refLen != mLength || strncmp(mBuffer, ref, mLength)) {
|
||||
printf("\t--- ERROR ---\n");
|
||||
printf("Got : ");
|
||||
fwrite(mBuffer, mLength, 1, stdout);
|
||||
printf(" (len:%d)\n", mLength);
|
||||
exit(-1);
|
||||
}
|
||||
printf("-> OK\n");
|
||||
}
|
||||
private:
|
||||
int32_t mLength;
|
||||
char * mBuffer;
|
||||
};
|
||||
|
||||
extern "C" int32_t c_fn_set_size(void * container, size_t size)
|
||||
{
|
||||
((StringContainer *) container)->SetSize(size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern "C" char * c_fn_get_buffer(void * container)
|
||||
{
|
||||
return ((StringContainer *) container)->GetBuffer();
|
||||
}
|
||||
|
||||
#define TEST_CALL(func, expected) \
|
||||
{ \
|
||||
int32_t code = func; \
|
||||
printf("%s -> code %d\n", #func, code); \
|
||||
assert(code == expected); \
|
||||
printf("-> OK\n"); \
|
||||
} \
|
||||
|
||||
|
||||
int main() {
|
||||
// Create URL
|
||||
rusturl_ptr url = rusturl_new("http://example.com/path/some/file.txt",
|
||||
strlen("http://example.com/path/some/file.txt"));
|
||||
assert(url); // Check we have a URL
|
||||
|
||||
StringContainer container;
|
||||
|
||||
TEST_CALL(rusturl_get_spec(url, &container), 0);
|
||||
container.CheckEquals("http://example.com/path/some/file.txt");
|
||||
TEST_CALL(rusturl_set_host(url, "test.com", strlen("test.com")), 0);
|
||||
TEST_CALL(rusturl_get_host(url, &container), 0);
|
||||
container.CheckEquals("test.com");
|
||||
TEST_CALL(rusturl_get_path(url, &container), 0);
|
||||
container.CheckEquals("/path/some/file.txt");
|
||||
TEST_CALL(rusturl_set_path(url, "hello/../else.txt", strlen("hello/../else.txt")), 0);
|
||||
TEST_CALL(rusturl_get_path(url, &container), 0);
|
||||
container.CheckEquals("/else.txt");
|
||||
TEST_CALL(rusturl_resolve(url, "./bla/file.txt", strlen("./bla/file.txt"), &container), 0);
|
||||
container.CheckEquals("http://test.com/bla/file.txt");
|
||||
TEST_CALL(rusturl_get_scheme(url, &container), 0);
|
||||
container.CheckEquals("http");
|
||||
TEST_CALL(rusturl_set_username(url, "user", strlen("user")), 0);
|
||||
TEST_CALL(rusturl_get_username(url, &container), 0);
|
||||
container.CheckEquals("user");
|
||||
TEST_CALL(rusturl_get_spec(url, &container), 0);
|
||||
container.CheckEquals("http://user@test.com/else.txt");
|
||||
TEST_CALL(rusturl_set_password(url, "pass", strlen("pass")), 0);
|
||||
TEST_CALL(rusturl_get_password(url, &container), 0);
|
||||
container.CheckEquals("pass");
|
||||
TEST_CALL(rusturl_get_spec(url, &container), 0);
|
||||
container.CheckEquals("http://user:pass@test.com/else.txt");
|
||||
TEST_CALL(rusturl_set_username(url, "", strlen("")), 0);
|
||||
TEST_CALL(rusturl_set_password(url, "", strlen("")), 0);
|
||||
TEST_CALL(rusturl_get_spec(url, &container), 0);
|
||||
container.CheckEquals("http://test.com/else.txt");
|
||||
TEST_CALL(rusturl_set_host_and_port(url, "example.org:1234", strlen("example.org:1234")), 0);
|
||||
TEST_CALL(rusturl_get_host(url, &container), 0);
|
||||
container.CheckEquals("example.org");
|
||||
assert(rusturl_get_port(url) == 1234);
|
||||
TEST_CALL(rusturl_set_port(url, "9090", strlen("9090")), 0);
|
||||
assert(rusturl_get_port(url) == 9090);
|
||||
TEST_CALL(rusturl_set_query(url, "x=1", strlen("x=1")), 0);
|
||||
TEST_CALL(rusturl_get_query(url, &container), 0);
|
||||
container.CheckEquals("x=1");
|
||||
TEST_CALL(rusturl_set_fragment(url, "fragment", strlen("fragment")), 0);
|
||||
TEST_CALL(rusturl_get_fragment(url, &container), 0);
|
||||
container.CheckEquals("fragment");
|
||||
TEST_CALL(rusturl_get_spec(url, &container), 0);
|
||||
container.CheckEquals("http://example.org:9090/else.txt?x=1#fragment");
|
||||
|
||||
// Free the URL
|
||||
rusturl_free(url);
|
||||
|
||||
url = rusturl_new("http://example.com/#",
|
||||
strlen("http://example.com/#"));
|
||||
assert(url); // Check we have a URL
|
||||
|
||||
assert(rusturl_has_fragment(url) == 1);
|
||||
TEST_CALL(rusturl_set_fragment(url, "", 0), 0);
|
||||
assert(rusturl_has_fragment(url) == 0);
|
||||
TEST_CALL(rusturl_get_spec(url, &container), 0);
|
||||
container.CheckEquals("http://example.com/");
|
||||
|
||||
rusturl_free(url);
|
||||
|
||||
printf("SUCCESS\n");
|
||||
return 0;
|
||||
}
|
1
third_party/rust/idna/.cargo-checksum.json
vendored
1
third_party/rust/idna/.cargo-checksum.json
vendored
@ -1 +0,0 @@
|
||||
{"files":{".cargo-ok":"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855","Cargo.toml":"d0aad6852ec947597bab20cd85ad15e014044cb7717c9a91f4b8da0081b7134d","src/IdnaMappingTable.txt":"5e9f5929130b713e698162ac5b60a99ccfb831606686b1c50777cd920b55dee2","src/lib.rs":"b8e85707a40e8472d2e90849a1e0e24a7442f3c4614a57a60125f87d11e985a4","src/make_uts46_mapping_table.py":"36fa77c443672f15872d60438f96b7302eae28ec506b60a892579debc79b8e39","src/punycode.rs":"df883ec00b35cab38f96992667eef0767d8587746bfdab0613a03e4c49a26c16","src/uts46.rs":"c879570c511f210565ac0dbce8a212be6503e62cd7ed698830804bb7729e98de","src/uts46_mapping_table.rs":"daa59e4b6399a738f73967b222b7dce6c9706a471d306330d77380c89089fa24","tests/IdnaTest.txt":"12e7e150b04a7a2cb1f9b72222174844342218807126e9dbc53069505a5f6000","tests/punycode.rs":"2f4086411c00b0641377afe81071e51a695110a0cce474287557738c07f74322","tests/punycode_tests.json":"3d4ac0cf25984c37b9ce197f5df680a0136f728fb8ec82bc76624e42139eb3a8","tests/tests.rs":"bb92e129dc5e17e9a86ec6062dd7b3f4c905c4af69e773d7c70efea177654c7b","tests/uts46.rs":"be9f928c60b88a8e277ddfb1769f09a8cd273e2e120e8450fb9b34f4dc852b37"},"package":"1053236e00ce4f668aeca4a769a09b3bf5a682d802abd6f3cb39374f6b162c11"}
|
0
third_party/rust/idna/.cargo-ok
vendored
0
third_party/rust/idna/.cargo-ok
vendored
24
third_party/rust/idna/Cargo.toml
vendored
24
third_party/rust/idna/Cargo.toml
vendored
@ -1,24 +0,0 @@
|
||||
[package]
|
||||
name = "idna"
|
||||
version = "0.1.0"
|
||||
authors = ["Simon Sapin <simon.sapin@exyr.org>"]
|
||||
description = "IDNA (Internationalizing Domain Names in Applications) and Punycode."
|
||||
repository = "https://github.com/servo/rust-url/"
|
||||
license = "MIT/Apache-2.0"
|
||||
|
||||
[lib]
|
||||
doctest = false
|
||||
test = false
|
||||
|
||||
[[test]]
|
||||
name = "tests"
|
||||
harness = false
|
||||
|
||||
[dev-dependencies]
|
||||
rustc-test = "0.1"
|
||||
rustc-serialize = "0.3"
|
||||
|
||||
[dependencies]
|
||||
unicode-bidi = "0.2.3"
|
||||
unicode-normalization = "0.1.2"
|
||||
matches = "0.1"
|
8190
third_party/rust/idna/src/IdnaMappingTable.txt
vendored
8190
third_party/rust/idna/src/IdnaMappingTable.txt
vendored
File diff suppressed because it is too large
Load Diff
73
third_party/rust/idna/src/lib.rs
vendored
73
third_party/rust/idna/src/lib.rs
vendored
@ -1,73 +0,0 @@
|
||||
// Copyright 2016 Simon Sapin.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
//! This Rust crate implements IDNA
|
||||
//! [per the WHATWG URL Standard](https://url.spec.whatwg.org/#idna).
|
||||
//!
|
||||
//! It also exposes the underlying algorithms from [*Unicode IDNA Compatibility Processing*
|
||||
//! (Unicode Technical Standard #46)](http://www.unicode.org/reports/tr46/)
|
||||
//! and [Punycode (RFC 3492)](https://tools.ietf.org/html/rfc3492).
|
||||
//!
|
||||
//! Quoting from [UTS #46’s introduction](http://www.unicode.org/reports/tr46/#Introduction):
|
||||
//!
|
||||
//! > Initially, domain names were restricted to ASCII characters.
|
||||
//! > A system was introduced in 2003 for internationalized domain names (IDN).
|
||||
//! > This system is called Internationalizing Domain Names for Applications,
|
||||
//! > or IDNA2003 for short.
|
||||
//! > This mechanism supports IDNs by means of a client software transformation
|
||||
//! > into a format known as Punycode.
|
||||
//! > A revision of IDNA was approved in 2010 (IDNA2008).
|
||||
//! > This revision has a number of incompatibilities with IDNA2003.
|
||||
//! >
|
||||
//! > The incompatibilities force implementers of client software,
|
||||
//! > such as browsers and emailers,
|
||||
//! > to face difficult choices during the transition period
|
||||
//! > as registries shift from IDNA2003 to IDNA2008.
|
||||
//! > This document specifies a mechanism
|
||||
//! > that minimizes the impact of this transition for client software,
|
||||
//! > allowing client software to access domains that are valid under either system.
|
||||
|
||||
#[macro_use] extern crate matches;
|
||||
extern crate unicode_bidi;
|
||||
extern crate unicode_normalization;
|
||||
|
||||
pub mod punycode;
|
||||
pub mod uts46;
|
||||
|
||||
/// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm.
|
||||
///
|
||||
/// Return the ASCII representation a domain name,
|
||||
/// normalizing characters (upper-case to lower-case and other kinds of equivalence)
|
||||
/// and using Punycode as necessary.
|
||||
///
|
||||
/// This process may fail.
|
||||
pub fn domain_to_ascii(domain: &str) -> Result<String, uts46::Errors> {
|
||||
uts46::to_ascii(domain, uts46::Flags {
|
||||
use_std3_ascii_rules: false,
|
||||
transitional_processing: true, // XXX: switch when Firefox does
|
||||
verify_dns_length: false,
|
||||
})
|
||||
}
|
||||
|
||||
/// The [domain to Unicode](https://url.spec.whatwg.org/#concept-domain-to-unicode) algorithm.
|
||||
///
|
||||
/// Return the Unicode representation of a domain name,
|
||||
/// normalizing characters (upper-case to lower-case and other kinds of equivalence)
|
||||
/// and decoding Punycode as necessary.
|
||||
///
|
||||
/// This may indicate [syntax violations](https://url.spec.whatwg.org/#syntax-violation)
|
||||
/// but always returns a string for the mapped domain.
|
||||
pub fn domain_to_unicode(domain: &str) -> (String, Result<(), uts46::Errors>) {
|
||||
uts46::to_unicode(domain, uts46::Flags {
|
||||
use_std3_ascii_rules: false,
|
||||
|
||||
// Unused:
|
||||
transitional_processing: true,
|
||||
verify_dns_length: false,
|
||||
})
|
||||
}
|
@ -1,56 +0,0 @@
|
||||
# Copyright 2013-2014 Valentin Gosu.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
# option. This file may not be copied, modified, or distributed
|
||||
# except according to those terms.
|
||||
|
||||
# Run as: python make_uts46_mapping_table.py IdnaMappingTable.txt > uts46_mapping_table.rs
|
||||
# You can get the latest idna table from
|
||||
# http://www.unicode.org/Public/idna/latest/IdnaMappingTable.txt
|
||||
|
||||
print('''\
|
||||
// Copyright 2013-2014 Valentin Gosu.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
// Generated by make_idna_table.py
|
||||
|
||||
static TABLE: &'static [Range] = &[
|
||||
''')
|
||||
|
||||
txt = open("IdnaMappingTable.txt")
|
||||
|
||||
def char(s):
|
||||
return (unichr(int(s, 16))
|
||||
.encode('utf8')
|
||||
.replace('\\', '\\\\')
|
||||
.replace('"', '\\"')
|
||||
.replace('\0', '\\0'))
|
||||
|
||||
for line in txt:
|
||||
# remove comments
|
||||
line, _, _ = line.partition('#')
|
||||
# skip empty lines
|
||||
if len(line.strip()) == 0:
|
||||
continue
|
||||
fields = line.split(';')
|
||||
if fields[0].strip() == 'D800..DFFF':
|
||||
continue # Surrogates don't occur in Rust strings.
|
||||
first, _, last = fields[0].strip().partition('..')
|
||||
if not last:
|
||||
last = first
|
||||
mapping = fields[1].strip().replace('_', ' ').title().replace(' ', '')
|
||||
if len(fields) > 2:
|
||||
if fields[2].strip():
|
||||
mapping += '("%s")' % ''.join(char(c) for c in fields[2].strip().split(' '))
|
||||
elif mapping == "Deviation":
|
||||
mapping += '("")'
|
||||
print(" Range { from: '%s', to: '%s', mapping: %s }," % (char(first), char(last), mapping))
|
||||
|
||||
print("];")
|
213
third_party/rust/idna/src/punycode.rs
vendored
213
third_party/rust/idna/src/punycode.rs
vendored
@ -1,213 +0,0 @@
|
||||
// Copyright 2013 Simon Sapin.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
//! Punycode ([RFC 3492](http://tools.ietf.org/html/rfc3492)) implementation.
|
||||
//!
|
||||
//! Since Punycode fundamentally works on unicode code points,
|
||||
//! `encode` and `decode` take and return slices and vectors of `char`.
|
||||
//! `encode_str` and `decode_to_string` provide convenience wrappers
|
||||
//! that convert from and to Rust’s UTF-8 based `str` and `String` types.
|
||||
|
||||
use std::u32;
|
||||
use std::char;
|
||||
use std::ascii::AsciiExt;
|
||||
|
||||
// Bootstring parameters for Punycode
|
||||
static BASE: u32 = 36;
|
||||
static T_MIN: u32 = 1;
|
||||
static T_MAX: u32 = 26;
|
||||
static SKEW: u32 = 38;
|
||||
static DAMP: u32 = 700;
|
||||
static INITIAL_BIAS: u32 = 72;
|
||||
static INITIAL_N: u32 = 0x80;
|
||||
static DELIMITER: char = '-';
|
||||
|
||||
|
||||
#[inline]
|
||||
fn adapt(mut delta: u32, num_points: u32, first_time: bool) -> u32 {
|
||||
delta /= if first_time { DAMP } else { 2 };
|
||||
delta += delta / num_points;
|
||||
let mut k = 0;
|
||||
while delta > ((BASE - T_MIN) * T_MAX) / 2 {
|
||||
delta /= BASE - T_MIN;
|
||||
k += BASE;
|
||||
}
|
||||
k + (((BASE - T_MIN + 1) * delta) / (delta + SKEW))
|
||||
}
|
||||
|
||||
|
||||
/// Convert Punycode to an Unicode `String`.
|
||||
///
|
||||
/// This is a convenience wrapper around `decode`.
|
||||
#[inline]
|
||||
pub fn decode_to_string(input: &str) -> Option<String> {
|
||||
decode(input).map(|chars| chars.into_iter().collect())
|
||||
}
|
||||
|
||||
|
||||
/// Convert Punycode to Unicode.
|
||||
///
|
||||
/// Return None on malformed input or overflow.
|
||||
/// Overflow can only happen on inputs that take more than
|
||||
/// 63 encoded bytes, the DNS limit on domain name labels.
|
||||
pub fn decode(input: &str) -> Option<Vec<char>> {
|
||||
// Handle "basic" (ASCII) code points.
|
||||
// They are encoded as-is before the last delimiter, if any.
|
||||
let (mut output, input) = match input.rfind(DELIMITER) {
|
||||
None => (Vec::new(), input),
|
||||
Some(position) => (
|
||||
input[..position].chars().collect(),
|
||||
if position > 0 { &input[position + 1..] } else { input }
|
||||
)
|
||||
};
|
||||
let mut code_point = INITIAL_N;
|
||||
let mut bias = INITIAL_BIAS;
|
||||
let mut i = 0;
|
||||
let mut iter = input.bytes();
|
||||
loop {
|
||||
let previous_i = i;
|
||||
let mut weight = 1;
|
||||
let mut k = BASE;
|
||||
let mut byte = match iter.next() {
|
||||
None => break,
|
||||
Some(byte) => byte,
|
||||
};
|
||||
// Decode a generalized variable-length integer into delta,
|
||||
// which gets added to i.
|
||||
loop {
|
||||
let digit = match byte {
|
||||
byte @ b'0' ... b'9' => byte - b'0' + 26,
|
||||
byte @ b'A' ... b'Z' => byte - b'A',
|
||||
byte @ b'a' ... b'z' => byte - b'a',
|
||||
_ => return None
|
||||
} as u32;
|
||||
if digit > (u32::MAX - i) / weight {
|
||||
return None // Overflow
|
||||
}
|
||||
i += digit * weight;
|
||||
let t = if k <= bias { T_MIN }
|
||||
else if k >= bias + T_MAX { T_MAX }
|
||||
else { k - bias };
|
||||
if digit < t {
|
||||
break
|
||||
}
|
||||
if weight > u32::MAX / (BASE - t) {
|
||||
return None // Overflow
|
||||
}
|
||||
weight *= BASE - t;
|
||||
k += BASE;
|
||||
byte = match iter.next() {
|
||||
None => return None, // End of input before the end of this delta
|
||||
Some(byte) => byte,
|
||||
};
|
||||
}
|
||||
let length = output.len() as u32;
|
||||
bias = adapt(i - previous_i, length + 1, previous_i == 0);
|
||||
if i / (length + 1) > u32::MAX - code_point {
|
||||
return None // Overflow
|
||||
}
|
||||
// i was supposed to wrap around from length+1 to 0,
|
||||
// incrementing code_point each time.
|
||||
code_point += i / (length + 1);
|
||||
i %= length + 1;
|
||||
let c = match char::from_u32(code_point) {
|
||||
Some(c) => c,
|
||||
None => return None
|
||||
};
|
||||
output.insert(i as usize, c);
|
||||
i += 1;
|
||||
}
|
||||
Some(output)
|
||||
}
|
||||
|
||||
|
||||
/// Convert an Unicode `str` to Punycode.
|
||||
///
|
||||
/// This is a convenience wrapper around `encode`.
|
||||
#[inline]
|
||||
pub fn encode_str(input: &str) -> Option<String> {
|
||||
encode(&input.chars().collect::<Vec<char>>())
|
||||
}
|
||||
|
||||
|
||||
/// Convert Unicode to Punycode.
|
||||
///
|
||||
/// Return None on overflow, which can only happen on inputs that would take more than
|
||||
/// 63 encoded bytes, the DNS limit on domain name labels.
|
||||
pub fn encode(input: &[char]) -> Option<String> {
|
||||
// Handle "basic" (ASCII) code points. They are encoded as-is.
|
||||
let output_bytes = input.iter().filter_map(|&c|
|
||||
if c.is_ascii() { Some(c as u8) } else { None }
|
||||
).collect();
|
||||
let mut output = unsafe { String::from_utf8_unchecked(output_bytes) };
|
||||
let basic_length = output.len() as u32;
|
||||
if basic_length > 0 {
|
||||
output.push_str("-")
|
||||
}
|
||||
let mut code_point = INITIAL_N;
|
||||
let mut delta = 0;
|
||||
let mut bias = INITIAL_BIAS;
|
||||
let mut processed = basic_length;
|
||||
let input_length = input.len() as u32;
|
||||
while processed < input_length {
|
||||
// All code points < code_point have been handled already.
|
||||
// Find the next larger one.
|
||||
let min_code_point = input.iter().map(|&c| c as u32)
|
||||
.filter(|&c| c >= code_point).min().unwrap();
|
||||
if min_code_point - code_point > (u32::MAX - delta) / (processed + 1) {
|
||||
return None // Overflow
|
||||
}
|
||||
// Increase delta to advance the decoder’s <code_point,i> state to <min_code_point,0>
|
||||
delta += (min_code_point - code_point) * (processed + 1);
|
||||
code_point = min_code_point;
|
||||
for &c in input {
|
||||
let c = c as u32;
|
||||
if c < code_point {
|
||||
delta += 1;
|
||||
if delta == 0 {
|
||||
return None // Overflow
|
||||
}
|
||||
}
|
||||
if c == code_point {
|
||||
// Represent delta as a generalized variable-length integer:
|
||||
let mut q = delta;
|
||||
let mut k = BASE;
|
||||
loop {
|
||||
let t = if k <= bias { T_MIN }
|
||||
else if k >= bias + T_MAX { T_MAX }
|
||||
else { k - bias };
|
||||
if q < t {
|
||||
break
|
||||
}
|
||||
let value = t + ((q - t) % (BASE - t));
|
||||
value_to_digit(value, &mut output);
|
||||
q = (q - t) / (BASE - t);
|
||||
k += BASE;
|
||||
}
|
||||
value_to_digit(q, &mut output);
|
||||
bias = adapt(delta, processed + 1, processed == basic_length);
|
||||
delta = 0;
|
||||
processed += 1;
|
||||
}
|
||||
}
|
||||
delta += 1;
|
||||
code_point += 1;
|
||||
}
|
||||
Some(output)
|
||||
}
|
||||
|
||||
|
||||
#[inline]
|
||||
fn value_to_digit(value: u32, output: &mut String) {
|
||||
let code_point = match value {
|
||||
0 ... 25 => value + 0x61, // a..z
|
||||
26 ... 35 => value - 26 + 0x30, // 0..9
|
||||
_ => panic!()
|
||||
};
|
||||
unsafe { output.as_mut_vec().push(code_point as u8) }
|
||||
}
|
322
third_party/rust/idna/src/uts46.rs
vendored
322
third_party/rust/idna/src/uts46.rs
vendored
@ -1,322 +0,0 @@
|
||||
// Copyright 2013-2014 Valentin Gosu.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
//! [*Unicode IDNA Compatibility Processing*
|
||||
//! (Unicode Technical Standard #46)](http://www.unicode.org/reports/tr46/)
|
||||
|
||||
use self::Mapping::*;
|
||||
use punycode;
|
||||
use std::ascii::AsciiExt;
|
||||
use unicode_normalization::UnicodeNormalization;
|
||||
use unicode_normalization::char::is_combining_mark;
|
||||
use unicode_bidi::{BidiClass, bidi_class};
|
||||
|
||||
include!("uts46_mapping_table.rs");
|
||||
|
||||
#[derive(Debug)]
|
||||
enum Mapping {
|
||||
Valid,
|
||||
Ignored,
|
||||
Mapped(&'static str),
|
||||
Deviation(&'static str),
|
||||
Disallowed,
|
||||
DisallowedStd3Valid,
|
||||
DisallowedStd3Mapped(&'static str),
|
||||
}
|
||||
|
||||
struct Range {
|
||||
from: char,
|
||||
to: char,
|
||||
mapping: Mapping,
|
||||
}
|
||||
|
||||
fn find_char(codepoint: char) -> &'static Mapping {
|
||||
let mut min = 0;
|
||||
let mut max = TABLE.len() - 1;
|
||||
while max > min {
|
||||
let mid = (min + max) >> 1;
|
||||
if codepoint > TABLE[mid].to {
|
||||
min = mid;
|
||||
} else if codepoint < TABLE[mid].from {
|
||||
max = mid;
|
||||
} else {
|
||||
min = mid;
|
||||
max = mid;
|
||||
}
|
||||
}
|
||||
&TABLE[min].mapping
|
||||
}
|
||||
|
||||
fn map_char(codepoint: char, flags: Flags, output: &mut String, errors: &mut Vec<Error>) {
|
||||
match *find_char(codepoint) {
|
||||
Mapping::Valid => output.push(codepoint),
|
||||
Mapping::Ignored => {},
|
||||
Mapping::Mapped(mapping) => output.push_str(mapping),
|
||||
Mapping::Deviation(mapping) => {
|
||||
if flags.transitional_processing {
|
||||
output.push_str(mapping)
|
||||
} else {
|
||||
output.push(codepoint)
|
||||
}
|
||||
}
|
||||
Mapping::Disallowed => {
|
||||
errors.push(Error::DissallowedCharacter);
|
||||
output.push(codepoint);
|
||||
}
|
||||
Mapping::DisallowedStd3Valid => {
|
||||
if flags.use_std3_ascii_rules {
|
||||
errors.push(Error::DissallowedByStd3AsciiRules);
|
||||
}
|
||||
output.push(codepoint)
|
||||
}
|
||||
Mapping::DisallowedStd3Mapped(mapping) => {
|
||||
if flags.use_std3_ascii_rules {
|
||||
errors.push(Error::DissallowedMappedInStd3);
|
||||
}
|
||||
output.push_str(mapping)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// http://tools.ietf.org/html/rfc5893#section-2
|
||||
fn passes_bidi(label: &str, transitional_processing: bool) -> bool {
|
||||
let mut chars = label.chars();
|
||||
let class = match chars.next() {
|
||||
Some(c) => bidi_class(c),
|
||||
None => return true, // empty string
|
||||
};
|
||||
|
||||
if class == BidiClass::L
|
||||
|| (class == BidiClass::ON && transitional_processing) // starts with \u200D
|
||||
|| (class == BidiClass::ES && transitional_processing) // hack: 1.35.+33.49
|
||||
|| class == BidiClass::EN // hack: starts with number 0à.\u05D0
|
||||
{ // LTR
|
||||
// Rule 5
|
||||
loop {
|
||||
match chars.next() {
|
||||
Some(c) => {
|
||||
let c = bidi_class(c);
|
||||
if !matches!(c, BidiClass::L | BidiClass::EN |
|
||||
BidiClass::ES | BidiClass::CS |
|
||||
BidiClass::ET | BidiClass::ON |
|
||||
BidiClass::BN | BidiClass::NSM) {
|
||||
return false;
|
||||
}
|
||||
},
|
||||
None => { break; },
|
||||
}
|
||||
}
|
||||
|
||||
// Rule 6
|
||||
let mut rev_chars = label.chars().rev();
|
||||
let mut last = rev_chars.next();
|
||||
loop { // must end in L or EN followed by 0 or more NSM
|
||||
match last {
|
||||
Some(c) if bidi_class(c) == BidiClass::NSM => {
|
||||
last = rev_chars.next();
|
||||
continue;
|
||||
}
|
||||
_ => { break; },
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: does not pass for àˇ.\u05D0
|
||||
// match last {
|
||||
// Some(c) if bidi_class(c) == BidiClass::L
|
||||
// || bidi_class(c) == BidiClass::EN => {},
|
||||
// Some(c) => { return false; },
|
||||
// _ => {}
|
||||
// }
|
||||
|
||||
} else if class == BidiClass::R || class == BidiClass::AL { // RTL
|
||||
let mut found_en = false;
|
||||
let mut found_an = false;
|
||||
|
||||
// Rule 2
|
||||
loop {
|
||||
match chars.next() {
|
||||
Some(c) => {
|
||||
let char_class = bidi_class(c);
|
||||
|
||||
if char_class == BidiClass::EN {
|
||||
found_en = true;
|
||||
}
|
||||
if char_class == BidiClass::AN {
|
||||
found_an = true;
|
||||
}
|
||||
|
||||
if !matches!(char_class, BidiClass::R | BidiClass::AL |
|
||||
BidiClass::AN | BidiClass::EN |
|
||||
BidiClass::ES | BidiClass::CS |
|
||||
BidiClass::ET | BidiClass::ON |
|
||||
BidiClass::BN | BidiClass::NSM) {
|
||||
return false;
|
||||
}
|
||||
},
|
||||
None => { break; },
|
||||
}
|
||||
}
|
||||
// Rule 3
|
||||
let mut rev_chars = label.chars().rev();
|
||||
let mut last = rev_chars.next();
|
||||
loop { // must end in L or EN followed by 0 or more NSM
|
||||
match last {
|
||||
Some(c) if bidi_class(c) == BidiClass::NSM => {
|
||||
last = rev_chars.next();
|
||||
continue;
|
||||
}
|
||||
_ => { break; },
|
||||
}
|
||||
}
|
||||
match last {
|
||||
Some(c) if matches!(bidi_class(c), BidiClass::R | BidiClass::AL |
|
||||
BidiClass::EN | BidiClass::AN) => {},
|
||||
_ => { return false; }
|
||||
}
|
||||
|
||||
// Rule 4
|
||||
if found_an && found_en {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
// Rule 2: Should start with L or R/AL
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// http://www.unicode.org/reports/tr46/#Validity_Criteria
|
||||
fn validate(label: &str, flags: Flags, errors: &mut Vec<Error>) {
|
||||
if label.nfc().ne(label.chars()) {
|
||||
errors.push(Error::ValidityCriteria);
|
||||
}
|
||||
|
||||
// Can not contain '.' since the input is from .split('.')
|
||||
if {
|
||||
let mut chars = label.chars().skip(2);
|
||||
let third = chars.next();
|
||||
let fourth = chars.next();
|
||||
(third, fourth) == (Some('-'), Some('-'))
|
||||
} || label.starts_with("-")
|
||||
|| label.ends_with("-")
|
||||
|| label.chars().next().map_or(false, is_combining_mark)
|
||||
|| label.chars().any(|c| match *find_char(c) {
|
||||
Mapping::Valid => false,
|
||||
Mapping::Deviation(_) => flags.transitional_processing,
|
||||
Mapping::DisallowedStd3Valid => flags.use_std3_ascii_rules,
|
||||
_ => true,
|
||||
})
|
||||
|| !passes_bidi(label, flags.transitional_processing)
|
||||
{
|
||||
errors.push(Error::ValidityCriteria)
|
||||
}
|
||||
}
|
||||
|
||||
/// http://www.unicode.org/reports/tr46/#Processing
|
||||
fn processing(domain: &str, flags: Flags, errors: &mut Vec<Error>) -> String {
|
||||
let mut mapped = String::new();
|
||||
for c in domain.chars() {
|
||||
map_char(c, flags, &mut mapped, errors)
|
||||
}
|
||||
let normalized: String = mapped.nfc().collect();
|
||||
let mut validated = String::new();
|
||||
for label in normalized.split('.') {
|
||||
if validated.len() > 0 {
|
||||
validated.push('.');
|
||||
}
|
||||
if label.starts_with("xn--") {
|
||||
match punycode::decode_to_string(&label["xn--".len()..]) {
|
||||
Some(decoded_label) => {
|
||||
let flags = Flags { transitional_processing: false, ..flags };
|
||||
validate(&decoded_label, flags, errors);
|
||||
validated.push_str(&decoded_label)
|
||||
}
|
||||
None => errors.push(Error::PunycodeError)
|
||||
}
|
||||
} else {
|
||||
validate(label, flags, errors);
|
||||
validated.push_str(label)
|
||||
}
|
||||
}
|
||||
validated
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct Flags {
|
||||
pub use_std3_ascii_rules: bool,
|
||||
pub transitional_processing: bool,
|
||||
pub verify_dns_length: bool,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
|
||||
enum Error {
|
||||
PunycodeError,
|
||||
ValidityCriteria,
|
||||
DissallowedByStd3AsciiRules,
|
||||
DissallowedMappedInStd3,
|
||||
DissallowedCharacter,
|
||||
TooLongForDns,
|
||||
}
|
||||
|
||||
/// Errors recorded during UTS #46 processing.
|
||||
///
|
||||
/// This is opaque for now, only indicating the precense of at least one error.
|
||||
/// More details may be exposed in the future.
|
||||
#[derive(Debug)]
|
||||
pub struct Errors(Vec<Error>);
|
||||
|
||||
/// http://www.unicode.org/reports/tr46/#ToASCII
|
||||
pub fn to_ascii(domain: &str, flags: Flags) -> Result<String, Errors> {
|
||||
let mut errors = Vec::new();
|
||||
let mut result = String::new();
|
||||
for label in processing(domain, flags, &mut errors).split('.') {
|
||||
if result.len() > 0 {
|
||||
result.push('.');
|
||||
}
|
||||
if label.is_ascii() {
|
||||
result.push_str(label);
|
||||
} else {
|
||||
match punycode::encode_str(label) {
|
||||
Some(x) => {
|
||||
result.push_str("xn--");
|
||||
result.push_str(&x);
|
||||
},
|
||||
None => errors.push(Error::PunycodeError)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if flags.verify_dns_length {
|
||||
let domain = if result.ends_with(".") { &result[..result.len()-1] } else { &*result };
|
||||
if domain.len() < 1 || domain.len() > 253 ||
|
||||
domain.split('.').any(|label| label.len() < 1 || label.len() > 63) {
|
||||
errors.push(Error::TooLongForDns)
|
||||
}
|
||||
}
|
||||
if errors.is_empty() {
|
||||
Ok(result)
|
||||
} else {
|
||||
Err(Errors(errors))
|
||||
}
|
||||
}
|
||||
|
||||
/// http://www.unicode.org/reports/tr46/#ToUnicode
|
||||
///
|
||||
/// Only `use_std3_ascii_rules` is used in `flags`.
|
||||
pub fn to_unicode(domain: &str, mut flags: Flags) -> (String, Result<(), Errors>) {
|
||||
flags.transitional_processing = false;
|
||||
let mut errors = Vec::new();
|
||||
let domain = processing(domain, flags, &mut errors);
|
||||
let errors = if errors.is_empty() {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(Errors(errors))
|
||||
};
|
||||
(domain, errors)
|
||||
}
|
8191
third_party/rust/idna/src/uts46_mapping_table.rs
vendored
8191
third_party/rust/idna/src/uts46_mapping_table.rs
vendored
File diff suppressed because it is too large
Load Diff
5097
third_party/rust/idna/tests/IdnaTest.txt
vendored
5097
third_party/rust/idna/tests/IdnaTest.txt
vendored
File diff suppressed because it is too large
Load Diff
65
third_party/rust/idna/tests/punycode.rs
vendored
65
third_party/rust/idna/tests/punycode.rs
vendored
@ -1,65 +0,0 @@
|
||||
// Copyright 2013 Simon Sapin.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
use idna::punycode::{decode, encode_str};
|
||||
use rustc_serialize::json::{Json, Object};
|
||||
use test::TestFn;
|
||||
|
||||
fn one_test(decoded: &str, encoded: &str) {
|
||||
match decode(encoded) {
|
||||
None => panic!("Decoding {} failed.", encoded),
|
||||
Some(result) => {
|
||||
let result = result.into_iter().collect::<String>();
|
||||
assert!(result == decoded,
|
||||
format!("Incorrect decoding of \"{}\":\n \"{}\"\n!= \"{}\"\n",
|
||||
encoded, result, decoded))
|
||||
}
|
||||
}
|
||||
|
||||
match encode_str(decoded) {
|
||||
None => panic!("Encoding {} failed.", decoded),
|
||||
Some(result) => {
|
||||
assert!(result == encoded,
|
||||
format!("Incorrect encoding of \"{}\":\n \"{}\"\n!= \"{}\"\n",
|
||||
decoded, result, encoded))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_string<'a>(map: &'a Object, key: &str) -> &'a str {
|
||||
match map.get(&key.to_string()) {
|
||||
Some(&Json::String(ref s)) => s,
|
||||
None => "",
|
||||
_ => panic!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn collect_tests<F: FnMut(String, TestFn)>(add_test: &mut F) {
|
||||
match Json::from_str(include_str!("punycode_tests.json")) {
|
||||
Ok(Json::Array(tests)) => for (i, test) in tests.into_iter().enumerate() {
|
||||
match test {
|
||||
Json::Object(o) => {
|
||||
let test_name = {
|
||||
let desc = get_string(&o, "description");
|
||||
if desc.is_empty() {
|
||||
format!("Punycode {}", i + 1)
|
||||
} else {
|
||||
format!("Punycode {}: {}", i + 1, desc)
|
||||
}
|
||||
};
|
||||
add_test(test_name, TestFn::dyn_test_fn(move || one_test(
|
||||
get_string(&o, "decoded"),
|
||||
get_string(&o, "encoded"),
|
||||
)))
|
||||
}
|
||||
_ => panic!(),
|
||||
}
|
||||
},
|
||||
other => panic!("{:?}", other)
|
||||
}
|
||||
}
|
120
third_party/rust/idna/tests/punycode_tests.json
vendored
120
third_party/rust/idna/tests/punycode_tests.json
vendored
@ -1,120 +0,0 @@
|
||||
[
|
||||
{
|
||||
"description": "These tests are copied from https://github.com/bestiejs/punycode.js/blob/master/tests/tests.js , used under the MIT license.",
|
||||
"decoded": "",
|
||||
"encoded": ""
|
||||
},
|
||||
{
|
||||
"description": "a single basic code point",
|
||||
"decoded": "Bach",
|
||||
"encoded": "Bach-"
|
||||
},
|
||||
{
|
||||
"description": "a single non-ASCII character",
|
||||
"decoded": "\u00FC",
|
||||
"encoded": "tda"
|
||||
},
|
||||
{
|
||||
"description": "multiple non-ASCII characters",
|
||||
"decoded": "\u00FC\u00EB\u00E4\u00F6\u2665",
|
||||
"encoded": "4can8av2009b"
|
||||
},
|
||||
{
|
||||
"description": "mix of ASCII and non-ASCII characters",
|
||||
"decoded": "b\u00FCcher",
|
||||
"encoded": "bcher-kva"
|
||||
},
|
||||
{
|
||||
"description": "long string with both ASCII and non-ASCII characters",
|
||||
"decoded": "Willst du die Bl\u00FCthe des fr\u00FChen, die Fr\u00FCchte des sp\u00E4teren Jahres",
|
||||
"encoded": "Willst du die Blthe des frhen, die Frchte des spteren Jahres-x9e96lkal"
|
||||
},
|
||||
{
|
||||
"description": "Arabic (Egyptian)",
|
||||
"decoded": "\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F",
|
||||
"encoded": "egbpdaj6bu4bxfgehfvwxn"
|
||||
},
|
||||
{
|
||||
"description": "Chinese (simplified)",
|
||||
"decoded": "\u4ED6\u4EEC\u4E3A\u4EC0\u4E48\u4E0D\u8BF4\u4E2d\u6587",
|
||||
"encoded": "ihqwcrb4cv8a8dqg056pqjye"
|
||||
},
|
||||
{
|
||||
"description": "Chinese (traditional)",
|
||||
"decoded": "\u4ED6\u5011\u7232\u4EC0\u9EBD\u4E0D\u8AAA\u4E2D\u6587",
|
||||
"encoded": "ihqwctvzc91f659drss3x8bo0yb"
|
||||
},
|
||||
{
|
||||
"description": "Czech",
|
||||
"decoded": "Pro\u010Dprost\u011Bnemluv\u00ED\u010Desky",
|
||||
"encoded": "Proprostnemluvesky-uyb24dma41a"
|
||||
},
|
||||
{
|
||||
"description": "Hebrew",
|
||||
"decoded": "\u05DC\u05DE\u05D4\u05D4\u05DD\u05E4\u05E9\u05D5\u05D8\u05DC\u05D0\u05DE\u05D3\u05D1\u05E8\u05D9\u05DD\u05E2\u05D1\u05E8\u05D9\u05EA",
|
||||
"encoded": "4dbcagdahymbxekheh6e0a7fei0b"
|
||||
},
|
||||
{
|
||||
"description": "Hindi (Devanagari)",
|
||||
"decoded": "\u092F\u0939\u0932\u094B\u0917\u0939\u093F\u0928\u094D\u0926\u0940\u0915\u094D\u092F\u094B\u0902\u0928\u0939\u0940\u0902\u092C\u094B\u0932\u0938\u0915\u0924\u0947\u0939\u0948\u0902",
|
||||
"encoded": "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd"
|
||||
},
|
||||
{
|
||||
"description": "Japanese (kanji and hiragana)",
|
||||
"decoded": "\u306A\u305C\u307F\u3093\u306A\u65E5\u672C\u8A9E\u3092\u8A71\u3057\u3066\u304F\u308C\u306A\u3044\u306E\u304B",
|
||||
"encoded": "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa"
|
||||
},
|
||||
{
|
||||
"description": "Korean (Hangul syllables)",
|
||||
"decoded": "\uC138\uACC4\uC758\uBAA8\uB4E0\uC0AC\uB78C\uB4E4\uC774\uD55C\uAD6D\uC5B4\uB97C\uC774\uD574\uD55C\uB2E4\uBA74\uC5BC\uB9C8\uB098\uC88B\uC744\uAE4C",
|
||||
"encoded": "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c"
|
||||
},
|
||||
{
|
||||
"description": "Russian (Cyrillic)",
|
||||
"decoded": "\u043F\u043E\u0447\u0435\u043C\u0443\u0436\u0435\u043E\u043D\u0438\u043D\u0435\u0433\u043E\u0432\u043E\u0440\u044F\u0442\u043F\u043E\u0440\u0443\u0441\u0441\u043A\u0438",
|
||||
"encoded": "b1abfaaepdrnnbgefbadotcwatmq2g4l"
|
||||
},
|
||||
{
|
||||
"description": "Spanish",
|
||||
"decoded": "Porqu\u00E9nopuedensimplementehablarenEspa\u00F1ol",
|
||||
"encoded": "PorqunopuedensimplementehablarenEspaol-fmd56a"
|
||||
},
|
||||
{
|
||||
"description": "Vietnamese",
|
||||
"decoded": "T\u1EA1isaoh\u1ECDkh\u00F4ngth\u1EC3ch\u1EC9n\u00F3iti\u1EBFngVi\u1EC7t",
|
||||
"encoded": "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g"
|
||||
},
|
||||
{
|
||||
"decoded": "3\u5E74B\u7D44\u91D1\u516B\u5148\u751F",
|
||||
"encoded": "3B-ww4c5e180e575a65lsy2b"
|
||||
},
|
||||
{
|
||||
"decoded": "\u5B89\u5BA4\u5948\u7F8E\u6075-with-SUPER-MONKEYS",
|
||||
"encoded": "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n"
|
||||
},
|
||||
{
|
||||
"decoded": "Hello-Another-Way-\u305D\u308C\u305E\u308C\u306E\u5834\u6240",
|
||||
"encoded": "Hello-Another-Way--fc4qua05auwb3674vfr0b"
|
||||
},
|
||||
{
|
||||
"decoded": "\u3072\u3068\u3064\u5C4B\u6839\u306E\u4E0B2",
|
||||
"encoded": "2-u9tlzr9756bt3uc0v"
|
||||
},
|
||||
{
|
||||
"decoded": "Maji\u3067Koi\u3059\u308B5\u79D2\u524D",
|
||||
"encoded": "MajiKoi5-783gue6qz075azm5e"
|
||||
},
|
||||
{
|
||||
"decoded": "\u30D1\u30D5\u30A3\u30FCde\u30EB\u30F3\u30D0",
|
||||
"encoded": "de-jg4avhby1noc0d"
|
||||
},
|
||||
{
|
||||
"decoded": "\u305D\u306E\u30B9\u30D4\u30FC\u30C9\u3067",
|
||||
"encoded": "d9juau41awczczp"
|
||||
},
|
||||
{
|
||||
"description": "ASCII string that breaks the existing rules for host-name labels (It's not a realistic example for IDNA, because IDNA never encodes pure ASCII labels.)",
|
||||
"decoded": "-> $1.00 <-",
|
||||
"encoded": "-> $1.00 <--"
|
||||
}
|
||||
]
|
25
third_party/rust/idna/tests/tests.rs
vendored
25
third_party/rust/idna/tests/tests.rs
vendored
@ -1,25 +0,0 @@
|
||||
extern crate idna;
|
||||
extern crate rustc_serialize;
|
||||
extern crate test;
|
||||
|
||||
mod punycode;
|
||||
mod uts46;
|
||||
|
||||
fn main() {
|
||||
let mut tests = Vec::new();
|
||||
{
|
||||
let mut add_test = |name, run| {
|
||||
tests.push(test::TestDescAndFn {
|
||||
desc: test::TestDesc {
|
||||
name: test::DynTestName(name),
|
||||
ignore: false,
|
||||
should_panic: test::ShouldPanic::No,
|
||||
},
|
||||
testfn: run,
|
||||
})
|
||||
};
|
||||
punycode::collect_tests(&mut add_test);
|
||||
uts46::collect_tests(&mut add_test);
|
||||
}
|
||||
test::test_main(&std::env::args().collect::<Vec<_>>(), tests)
|
||||
}
|
117
third_party/rust/idna/tests/uts46.rs
vendored
117
third_party/rust/idna/tests/uts46.rs
vendored
@ -1,117 +0,0 @@
|
||||
// Copyright 2013-2014 Valentin Gosu.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
use std::char;
|
||||
use idna::uts46;
|
||||
use test::TestFn;
|
||||
|
||||
pub fn collect_tests<F: FnMut(String, TestFn)>(add_test: &mut F) {
|
||||
// http://www.unicode.org/Public/idna/latest/IdnaTest.txt
|
||||
for (i, line) in include_str!("IdnaTest.txt").lines().enumerate() {
|
||||
if line == "" || line.starts_with("#") {
|
||||
continue
|
||||
}
|
||||
// Remove comments
|
||||
let mut line = match line.find("#") {
|
||||
Some(index) => &line[0..index],
|
||||
None => line
|
||||
};
|
||||
|
||||
let mut expected_failure = false;
|
||||
if line.starts_with("XFAIL") {
|
||||
expected_failure = true;
|
||||
line = &line[5..line.len()];
|
||||
};
|
||||
|
||||
let mut pieces = line.split(';').map(|x| x.trim()).collect::<Vec<&str>>();
|
||||
|
||||
let test_type = pieces.remove(0);
|
||||
let original = pieces.remove(0);
|
||||
let source = unescape(original);
|
||||
let to_unicode = pieces.remove(0);
|
||||
let to_ascii = pieces.remove(0);
|
||||
let nv8 = if pieces.len() > 0 { pieces.remove(0) } else { "" };
|
||||
|
||||
if expected_failure {
|
||||
continue;
|
||||
}
|
||||
|
||||
let test_name = format!("UTS #46 line {}", i + 1);
|
||||
add_test(test_name, TestFn::dyn_test_fn(move || {
|
||||
let result = uts46::to_ascii(&source, uts46::Flags {
|
||||
use_std3_ascii_rules: true,
|
||||
transitional_processing: test_type == "T",
|
||||
verify_dns_length: true,
|
||||
});
|
||||
|
||||
if to_ascii.starts_with("[") {
|
||||
if to_ascii.starts_with("[C") {
|
||||
// http://unicode.org/reports/tr46/#Deviations
|
||||
// applications that perform IDNA2008 lookup are not required to check
|
||||
// for these contexts
|
||||
return;
|
||||
}
|
||||
let res = result.ok();
|
||||
assert!(res == None, "Expected error. result: {} | original: {} | source: {}",
|
||||
res.unwrap(), original, source);
|
||||
return;
|
||||
}
|
||||
|
||||
let to_ascii = if to_ascii.len() > 0 {
|
||||
to_ascii.to_string()
|
||||
} else {
|
||||
if to_unicode.len() > 0 {
|
||||
to_unicode.to_string()
|
||||
} else {
|
||||
source.clone()
|
||||
}
|
||||
};
|
||||
|
||||
if nv8 == "NV8" {
|
||||
// This result isn't valid under IDNA2008. Skip it
|
||||
return;
|
||||
}
|
||||
|
||||
assert!(result.is_ok(), "Couldn't parse {} | original: {} | error: {:?}",
|
||||
source, original, result.err());
|
||||
let output = result.ok().unwrap();
|
||||
assert!(output == to_ascii, "result: {} | expected: {} | original: {} | source: {}",
|
||||
output, to_ascii, original, source);
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
fn unescape(input: &str) -> String {
|
||||
let mut output = String::new();
|
||||
let mut chars = input.chars();
|
||||
loop {
|
||||
match chars.next() {
|
||||
None => return output,
|
||||
Some(c) =>
|
||||
if c == '\\' {
|
||||
match chars.next().unwrap() {
|
||||
'\\' => output.push('\\'),
|
||||
'u' => {
|
||||
let c1 = chars.next().unwrap().to_digit(16).unwrap();
|
||||
let c2 = chars.next().unwrap().to_digit(16).unwrap();
|
||||
let c3 = chars.next().unwrap().to_digit(16).unwrap();
|
||||
let c4 = chars.next().unwrap().to_digit(16).unwrap();
|
||||
match char::from_u32((((c1 * 16 + c2) * 16 + c3) * 16 + c4))
|
||||
{
|
||||
Some(c) => output.push(c),
|
||||
None => { output.push_str(&format!("\\u{:X}{:X}{:X}{:X}",c1,c2,c3,c4)); }
|
||||
};
|
||||
}
|
||||
_ => panic!("Invalid test data input"),
|
||||
}
|
||||
} else {
|
||||
output.push(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1 +0,0 @@
|
||||
{"files":{".cargo-ok":"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855","Cargo.toml":"a55cb9efcb3820aa9fcdff157bffaa287988ea9d4cec33e638fa7fdd978c7742","lib.rs":"d927e99fc4fd88b0e30b44580384b411e63bc504e9953ac7cc85029437372266"},"package":"bcc3ad8109fa4b522f9b0cd81440422781f564aaf8c195de6b9d6642177ad0dd"}
|
0
third_party/rust/matches/.cargo-ok
vendored
0
third_party/rust/matches/.cargo-ok
vendored
12
third_party/rust/matches/Cargo.toml
vendored
12
third_party/rust/matches/Cargo.toml
vendored
@ -1,12 +0,0 @@
|
||||
[package]
|
||||
name = "matches"
|
||||
version = "0.1.3"
|
||||
authors = ["Simon Sapin <simon.sapin@exyr.org>"]
|
||||
license = "MIT"
|
||||
repository = "https://github.com/SimonSapin/rust-std-candidates"
|
||||
description = "A macro to evaluate, as a boolean, whether an expression matches a pattern."
|
||||
|
||||
[lib]
|
||||
name = "matches"
|
||||
path = "lib.rs"
|
||||
doctest = false
|
62
third_party/rust/matches/lib.rs
vendored
62
third_party/rust/matches/lib.rs
vendored
@ -1,62 +0,0 @@
|
||||
#[macro_export]
|
||||
macro_rules! matches {
|
||||
($expression:expr, $($pattern:tt)+) => {
|
||||
_matches_tt_as_expr_hack! {
|
||||
match $expression {
|
||||
$($pattern)+ => true,
|
||||
_ => false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Work around "error: unexpected token: `an interpolated tt`", whatever that means.
|
||||
#[macro_export]
|
||||
macro_rules! _matches_tt_as_expr_hack {
|
||||
($value:expr) => ($value)
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! assert_matches {
|
||||
($expression:expr, $($pattern:tt)+) => {
|
||||
_matches_tt_as_expr_hack! {
|
||||
match $expression {
|
||||
$($pattern)+ => (),
|
||||
ref e => panic!("assertion failed: `{:?}` does not match `{}`", e, stringify!($($pattern)+)),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! debug_assert_matches {
|
||||
($($arg:tt)*) => (if cfg!(debug_assertions) { assert_matches!($($arg)*); })
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn matches_works() {
|
||||
let foo = Some("-12");
|
||||
assert!(matches!(foo, Some(bar) if
|
||||
matches!(bar.as_bytes()[0], b'+' | b'-') &&
|
||||
matches!(bar.as_bytes()[1], b'0'...b'9')
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn assert_matches_works() {
|
||||
let foo = Some("-12");
|
||||
assert_matches!(foo, Some(bar) if
|
||||
matches!(bar.as_bytes()[0], b'+' | b'-') &&
|
||||
matches!(bar.as_bytes()[1], b'0'...b'9')
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "assertion failed: `Some(\"-AB\")` does not match ")]
|
||||
fn assert_matches_panics() {
|
||||
let foo = Some("-AB");
|
||||
assert_matches!(foo, Some(bar) if
|
||||
matches!(bar.as_bytes()[0], b'+' | b'-') &&
|
||||
matches!(bar.as_bytes()[1], b'0'...b'9')
|
||||
);
|
||||
}
|
@ -1 +0,0 @@
|
||||
{"files":{".cargo-ok":"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",".gitignore":"f39078a8fd87a9cdecaf02e9f817d2e3320cc150c557faf0abf39791e5ac4da1",".travis.yml":"5c3f9b92ab6ca79a98400b8c344f3a06fcc096b5df6f49fbae1a83dceb540170","AUTHORS":"6029da5a270f7047db642cfdaa521acdfd93dcae9f9cd02dc1bd43dfa526fc18","COPYRIGHT":"edb20b474f6cbd4f4db066b54a9e0f687d0009d309412a63431189b59b8e2a07","Cargo.toml":"d08a56cbe64f6b2e82c0f32cf5bdd4f8e64acc9cc5ecfb610a5f65c2c0b2f4b8","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"7b63ecd5f1902af1b63729947373683c32745c16a10e8e6292e2e2dcd7e90ae0","README.md":"e764abd9a6abbe9517dd9dedc003dd64594351282437a509fec9362ef136686d","src/BidiCharacterTest.txt":"baadfd1464e5b16586cf561dc6f0bc1bcb204efbd88b0fded7f197d106c8dd00","src/BidiTest.txt":"4697ad750044c2c05004350eee3f3ac2c9543615b9b43a02c235d8ff89a9b951","src/lib.rs":"228c34bd2e011b8d7cd7438f0d2e416b3ce80cf2dea516ad35394850d12a273a","src/tables.rs":"905aabf453f3a997fcb8784bbc2271ff61d587a8ce0492d140f144f475da0ddd","tools/generate.py":"30aaf9c8c4cf7c164d3b98af83159ba195b6fbf26135f0bacde1c4e225d8845d"},"package":"c1f7ceb96afdfeedee42bade65a0d585a6a0106f681b6749c8ff4daa8df30b3f"}
|
0
third_party/rust/unicode-bidi/.cargo-ok
vendored
0
third_party/rust/unicode-bidi/.cargo-ok
vendored
4
third_party/rust/unicode-bidi/.gitignore
vendored
4
third_party/rust/unicode-bidi/.gitignore
vendored
@ -1,4 +0,0 @@
|
||||
Cargo.lock
|
||||
target
|
||||
src/ReadMe.txt
|
||||
src/UnicodeData.txt
|
9
third_party/rust/unicode-bidi/.travis.yml
vendored
9
third_party/rust/unicode-bidi/.travis.yml
vendored
@ -1,9 +0,0 @@
|
||||
sudo: false
|
||||
language: rust
|
||||
rust:
|
||||
- nightly
|
||||
- beta
|
||||
- stable
|
||||
|
||||
notifications:
|
||||
webhooks: http://build.servo.org:54856/travis
|
3
third_party/rust/unicode-bidi/AUTHORS
vendored
3
third_party/rust/unicode-bidi/AUTHORS
vendored
@ -1,3 +0,0 @@
|
||||
This software was written by the following people:
|
||||
|
||||
Matt Brubeck <mbrubeck@limpet.net>
|
8
third_party/rust/unicode-bidi/COPYRIGHT
vendored
8
third_party/rust/unicode-bidi/COPYRIGHT
vendored
@ -1,8 +0,0 @@
|
||||
This project is copyright 2015, The Servo Project Developers (given in the
|
||||
file AUTHORS).
|
||||
|
||||
Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
http://www.apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
|
||||
http://opensource.org/licenses/MIT>, at your option. All files in the project
|
||||
carrying such notice may not be copied, modified, or distributed except
|
||||
according to those terms.
|
15
third_party/rust/unicode-bidi/Cargo.toml
vendored
15
third_party/rust/unicode-bidi/Cargo.toml
vendored
@ -1,15 +0,0 @@
|
||||
[package]
|
||||
name = "unicode-bidi"
|
||||
version = "0.2.3"
|
||||
authors = ["The Servo Project Developers"]
|
||||
license = "MIT / Apache-2.0"
|
||||
description = "Implementation of the Unicode Bidirectional Algorithm"
|
||||
repository = "https://github.com/servo/unicode-bidi"
|
||||
documentation = "http://doc.servo.org/unicode_bidi/"
|
||||
keywords = ["rtl", "unicode", "text", "layout", "bidi"]
|
||||
|
||||
[lib]
|
||||
name = "unicode_bidi"
|
||||
|
||||
[dependencies]
|
||||
matches = "0.1"
|
201
third_party/rust/unicode-bidi/LICENSE-APACHE
vendored
201
third_party/rust/unicode-bidi/LICENSE-APACHE
vendored
@ -1,201 +0,0 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
25
third_party/rust/unicode-bidi/LICENSE-MIT
vendored
25
third_party/rust/unicode-bidi/LICENSE-MIT
vendored
@ -1,25 +0,0 @@
|
||||
Copyright (c) 2015 The Rust Project Developers
|
||||
|
||||
Permission is hereby granted, free of charge, to any
|
||||
person obtaining a copy of this software and associated
|
||||
documentation files (the "Software"), to deal in the
|
||||
Software without restriction, including without
|
||||
limitation the rights to use, copy, modify, merge,
|
||||
publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software
|
||||
is furnished to do so, subject to the following
|
||||
conditions:
|
||||
|
||||
The above copyright notice and this permission notice
|
||||
shall be included in all copies or substantial portions
|
||||
of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
||||
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
|
||||
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
|
||||
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
11
third_party/rust/unicode-bidi/README.md
vendored
11
third_party/rust/unicode-bidi/README.md
vendored
@ -1,11 +0,0 @@
|
||||
# unicode-bidi
|
||||
|
||||
This crate implements the [Unicode Bidirectional Algorithm][tr9] for display
|
||||
of mixed right-to-left and left-to-right text. It is written in safe Rust,
|
||||
compatible with the current stable release.
|
||||
|
||||
[Documentation](http://doc.servo.org/unicode_bidi/)
|
||||
|
||||
[![Build Status](https://travis-ci.org/servo/unicode-bidi.svg?branch=master)](https://travis-ci.org/servo/unicode-bidi)
|
||||
|
||||
[tr9]: http://www.unicode.org/reports/tr9/
|
96392
third_party/rust/unicode-bidi/src/BidiCharacterTest.txt
vendored
96392
third_party/rust/unicode-bidi/src/BidiCharacterTest.txt
vendored
File diff suppressed because it is too large
Load Diff
497588
third_party/rust/unicode-bidi/src/BidiTest.txt
vendored
497588
third_party/rust/unicode-bidi/src/BidiTest.txt
vendored
File diff suppressed because it is too large
Load Diff
1026
third_party/rust/unicode-bidi/src/lib.rs
vendored
1026
third_party/rust/unicode-bidi/src/lib.rs
vendored
File diff suppressed because it is too large
Load Diff
477
third_party/rust/unicode-bidi/src/tables.rs
vendored
477
third_party/rust/unicode-bidi/src/tables.rs
vendored
@ -1,477 +0,0 @@
|
||||
// NOTE:
|
||||
// The following code was generated by "tools/generate.py". do not edit directly
|
||||
|
||||
#![allow(missing_docs, non_upper_case_globals, non_snake_case)]
|
||||
|
||||
/// The version of [Unicode](http://www.unicode.org/)
|
||||
/// that the `bidi_class` function is based on.
|
||||
pub const UNICODE_VERSION: (u64, u64, u64) = (8, 0, 0);
|
||||
pub use self::BidiClass::*;
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
/// Represents the Unicode character property **Bidi_Class**, also known as
|
||||
/// the *bidirectional character type*.
|
||||
///
|
||||
/// Use the `bidi_class` function to look up the BidiClass of a code point.
|
||||
///
|
||||
/// http://www.unicode.org/reports/tr9/#Bidirectional_Character_Types
|
||||
pub enum BidiClass {
|
||||
AL,
|
||||
AN,
|
||||
B,
|
||||
BN,
|
||||
CS,
|
||||
EN,
|
||||
ES,
|
||||
ET,
|
||||
FSI,
|
||||
L,
|
||||
LRE,
|
||||
LRI,
|
||||
LRO,
|
||||
NSM,
|
||||
ON,
|
||||
PDF,
|
||||
PDI,
|
||||
R,
|
||||
RLE,
|
||||
RLI,
|
||||
RLO,
|
||||
S,
|
||||
WS,
|
||||
}
|
||||
|
||||
fn bsearch_range_value_table(c: char, r: &'static [(char, char, BidiClass)]) -> BidiClass {
|
||||
use ::std::cmp::Ordering::{Equal, Less, Greater};
|
||||
match r.binary_search_by(|&(lo, hi, _)| {
|
||||
if lo <= c && c <= hi { Equal }
|
||||
else if hi < c { Less }
|
||||
else { Greater }
|
||||
}) {
|
||||
Ok(idx) => {
|
||||
let (_, _, cat) = r[idx];
|
||||
cat
|
||||
}
|
||||
// UCD/extracted/DerivedBidiClass.txt: "All code points not explicitly listed
|
||||
// for Bidi_Class have the value Left_To_Right (L)."
|
||||
Err(_) => L
|
||||
}
|
||||
}
|
||||
|
||||
/// Find the BidiClass of a single char.
|
||||
pub fn bidi_class(c: char) -> BidiClass {
|
||||
bsearch_range_value_table(c, bidi_class_table)
|
||||
}
|
||||
|
||||
const bidi_class_table: &'static [(char, char, BidiClass)] = &[
|
||||
('\u{0}', '\u{8}', BN), ('\u{9}', '\u{9}', S), ('\u{a}', '\u{a}', B), ('\u{b}', '\u{b}', S),
|
||||
('\u{c}', '\u{c}', WS), ('\u{d}', '\u{d}', B), ('\u{e}', '\u{1b}', BN), ('\u{1c}', '\u{1e}',
|
||||
B), ('\u{1f}', '\u{1f}', S), ('\u{20}', '\u{20}', WS), ('\u{21}', '\u{22}', ON), ('\u{23}',
|
||||
'\u{25}', ET), ('\u{26}', '\u{2a}', ON), ('\u{2b}', '\u{2b}', ES), ('\u{2c}', '\u{2c}', CS),
|
||||
('\u{2d}', '\u{2d}', ES), ('\u{2e}', '\u{2f}', CS), ('\u{30}', '\u{39}', EN), ('\u{3a}',
|
||||
'\u{3a}', CS), ('\u{3b}', '\u{40}', ON), ('\u{41}', '\u{5a}', L), ('\u{5b}', '\u{60}', ON),
|
||||
('\u{61}', '\u{7a}', L), ('\u{7b}', '\u{7e}', ON), ('\u{7f}', '\u{84}', BN), ('\u{85}',
|
||||
'\u{85}', B), ('\u{86}', '\u{9f}', BN), ('\u{a0}', '\u{a0}', CS), ('\u{a1}', '\u{a1}', ON),
|
||||
('\u{a2}', '\u{a5}', ET), ('\u{a6}', '\u{a9}', ON), ('\u{aa}', '\u{aa}', L), ('\u{ab}',
|
||||
'\u{ac}', ON), ('\u{ad}', '\u{ad}', BN), ('\u{ae}', '\u{af}', ON), ('\u{b0}', '\u{b1}', ET),
|
||||
('\u{b2}', '\u{b3}', EN), ('\u{b4}', '\u{b4}', ON), ('\u{b5}', '\u{b5}', L), ('\u{b6}',
|
||||
'\u{b8}', ON), ('\u{b9}', '\u{b9}', EN), ('\u{ba}', '\u{ba}', L), ('\u{bb}', '\u{bf}', ON),
|
||||
('\u{c0}', '\u{d6}', L), ('\u{d7}', '\u{d7}', ON), ('\u{d8}', '\u{f6}', L), ('\u{f7}',
|
||||
'\u{f7}', ON), ('\u{f8}', '\u{2b8}', L), ('\u{2b9}', '\u{2ba}', ON), ('\u{2bb}', '\u{2c1}',
|
||||
L), ('\u{2c2}', '\u{2cf}', ON), ('\u{2d0}', '\u{2d1}', L), ('\u{2d2}', '\u{2df}', ON),
|
||||
('\u{2e0}', '\u{2e4}', L), ('\u{2e5}', '\u{2ed}', ON), ('\u{2ee}', '\u{2ee}', L),
|
||||
('\u{2ef}', '\u{2ff}', ON), ('\u{300}', '\u{36f}', NSM), ('\u{370}', '\u{373}', L),
|
||||
('\u{374}', '\u{375}', ON), ('\u{376}', '\u{377}', L), ('\u{37a}', '\u{37d}', L),
|
||||
('\u{37e}', '\u{37e}', ON), ('\u{37f}', '\u{37f}', L), ('\u{384}', '\u{385}', ON),
|
||||
('\u{386}', '\u{386}', L), ('\u{387}', '\u{387}', ON), ('\u{388}', '\u{38a}', L),
|
||||
('\u{38c}', '\u{38c}', L), ('\u{38e}', '\u{3a1}', L), ('\u{3a3}', '\u{3f5}', L), ('\u{3f6}',
|
||||
'\u{3f6}', ON), ('\u{3f7}', '\u{482}', L), ('\u{483}', '\u{489}', NSM), ('\u{48a}',
|
||||
'\u{52f}', L), ('\u{531}', '\u{556}', L), ('\u{559}', '\u{55f}', L), ('\u{561}', '\u{587}',
|
||||
L), ('\u{589}', '\u{589}', L), ('\u{58a}', '\u{58a}', ON), ('\u{58d}', '\u{58e}', ON),
|
||||
('\u{58f}', '\u{58f}', ET), ('\u{590}', '\u{590}', R), ('\u{591}', '\u{5bd}', NSM),
|
||||
('\u{5be}', '\u{5be}', R), ('\u{5bf}', '\u{5bf}', NSM), ('\u{5c0}', '\u{5c0}', R),
|
||||
('\u{5c1}', '\u{5c2}', NSM), ('\u{5c3}', '\u{5c3}', R), ('\u{5c4}', '\u{5c5}', NSM),
|
||||
('\u{5c6}', '\u{5c6}', R), ('\u{5c7}', '\u{5c7}', NSM), ('\u{5c8}', '\u{5ff}', R),
|
||||
('\u{600}', '\u{605}', AN), ('\u{606}', '\u{607}', ON), ('\u{608}', '\u{608}', AL),
|
||||
('\u{609}', '\u{60a}', ET), ('\u{60b}', '\u{60b}', AL), ('\u{60c}', '\u{60c}', CS),
|
||||
('\u{60d}', '\u{60d}', AL), ('\u{60e}', '\u{60f}', ON), ('\u{610}', '\u{61a}', NSM),
|
||||
('\u{61b}', '\u{64a}', AL), ('\u{64b}', '\u{65f}', NSM), ('\u{660}', '\u{669}', AN),
|
||||
('\u{66a}', '\u{66a}', ET), ('\u{66b}', '\u{66c}', AN), ('\u{66d}', '\u{66f}', AL),
|
||||
('\u{670}', '\u{670}', NSM), ('\u{671}', '\u{6d5}', AL), ('\u{6d6}', '\u{6dc}', NSM),
|
||||
('\u{6dd}', '\u{6dd}', AN), ('\u{6de}', '\u{6de}', ON), ('\u{6df}', '\u{6e4}', NSM),
|
||||
('\u{6e5}', '\u{6e6}', AL), ('\u{6e7}', '\u{6e8}', NSM), ('\u{6e9}', '\u{6e9}', ON),
|
||||
('\u{6ea}', '\u{6ed}', NSM), ('\u{6ee}', '\u{6ef}', AL), ('\u{6f0}', '\u{6f9}', EN),
|
||||
('\u{6fa}', '\u{710}', AL), ('\u{711}', '\u{711}', NSM), ('\u{712}', '\u{72f}', AL),
|
||||
('\u{730}', '\u{74a}', NSM), ('\u{74b}', '\u{7a5}', AL), ('\u{7a6}', '\u{7b0}', NSM),
|
||||
('\u{7b1}', '\u{7bf}', AL), ('\u{7c0}', '\u{7ea}', R), ('\u{7eb}', '\u{7f3}', NSM),
|
||||
('\u{7f4}', '\u{7f5}', R), ('\u{7f6}', '\u{7f9}', ON), ('\u{7fa}', '\u{815}', R),
|
||||
('\u{816}', '\u{819}', NSM), ('\u{81a}', '\u{81a}', R), ('\u{81b}', '\u{823}', NSM),
|
||||
('\u{824}', '\u{824}', R), ('\u{825}', '\u{827}', NSM), ('\u{828}', '\u{828}', R),
|
||||
('\u{829}', '\u{82d}', NSM), ('\u{82e}', '\u{858}', R), ('\u{859}', '\u{85b}', NSM),
|
||||
('\u{85c}', '\u{89f}', R), ('\u{8a0}', '\u{8e2}', AL), ('\u{8e3}', '\u{902}', NSM),
|
||||
('\u{903}', '\u{939}', L), ('\u{93a}', '\u{93a}', NSM), ('\u{93b}', '\u{93b}', L),
|
||||
('\u{93c}', '\u{93c}', NSM), ('\u{93d}', '\u{940}', L), ('\u{941}', '\u{948}', NSM),
|
||||
('\u{949}', '\u{94c}', L), ('\u{94d}', '\u{94d}', NSM), ('\u{94e}', '\u{950}', L),
|
||||
('\u{951}', '\u{957}', NSM), ('\u{958}', '\u{961}', L), ('\u{962}', '\u{963}', NSM),
|
||||
('\u{964}', '\u{980}', L), ('\u{981}', '\u{981}', NSM), ('\u{982}', '\u{983}', L),
|
||||
('\u{985}', '\u{98c}', L), ('\u{98f}', '\u{990}', L), ('\u{993}', '\u{9a8}', L), ('\u{9aa}',
|
||||
'\u{9b0}', L), ('\u{9b2}', '\u{9b2}', L), ('\u{9b6}', '\u{9b9}', L), ('\u{9bc}', '\u{9bc}',
|
||||
NSM), ('\u{9bd}', '\u{9c0}', L), ('\u{9c1}', '\u{9c4}', NSM), ('\u{9c7}', '\u{9c8}', L),
|
||||
('\u{9cb}', '\u{9cc}', L), ('\u{9cd}', '\u{9cd}', NSM), ('\u{9ce}', '\u{9ce}', L),
|
||||
('\u{9d7}', '\u{9d7}', L), ('\u{9dc}', '\u{9dd}', L), ('\u{9df}', '\u{9e1}', L), ('\u{9e2}',
|
||||
'\u{9e3}', NSM), ('\u{9e6}', '\u{9f1}', L), ('\u{9f2}', '\u{9f3}', ET), ('\u{9f4}',
|
||||
'\u{9fa}', L), ('\u{9fb}', '\u{9fb}', ET), ('\u{a01}', '\u{a02}', NSM), ('\u{a03}',
|
||||
'\u{a03}', L), ('\u{a05}', '\u{a0a}', L), ('\u{a0f}', '\u{a10}', L), ('\u{a13}', '\u{a28}',
|
||||
L), ('\u{a2a}', '\u{a30}', L), ('\u{a32}', '\u{a33}', L), ('\u{a35}', '\u{a36}', L),
|
||||
('\u{a38}', '\u{a39}', L), ('\u{a3c}', '\u{a3c}', NSM), ('\u{a3e}', '\u{a40}', L),
|
||||
('\u{a41}', '\u{a42}', NSM), ('\u{a47}', '\u{a48}', NSM), ('\u{a4b}', '\u{a4d}', NSM),
|
||||
('\u{a51}', '\u{a51}', NSM), ('\u{a59}', '\u{a5c}', L), ('\u{a5e}', '\u{a5e}', L),
|
||||
('\u{a66}', '\u{a6f}', L), ('\u{a70}', '\u{a71}', NSM), ('\u{a72}', '\u{a74}', L),
|
||||
('\u{a75}', '\u{a75}', NSM), ('\u{a81}', '\u{a82}', NSM), ('\u{a83}', '\u{a83}', L),
|
||||
('\u{a85}', '\u{a8d}', L), ('\u{a8f}', '\u{a91}', L), ('\u{a93}', '\u{aa8}', L), ('\u{aaa}',
|
||||
'\u{ab0}', L), ('\u{ab2}', '\u{ab3}', L), ('\u{ab5}', '\u{ab9}', L), ('\u{abc}', '\u{abc}',
|
||||
NSM), ('\u{abd}', '\u{ac0}', L), ('\u{ac1}', '\u{ac5}', NSM), ('\u{ac7}', '\u{ac8}', NSM),
|
||||
('\u{ac9}', '\u{ac9}', L), ('\u{acb}', '\u{acc}', L), ('\u{acd}', '\u{acd}', NSM),
|
||||
('\u{ad0}', '\u{ad0}', L), ('\u{ae0}', '\u{ae1}', L), ('\u{ae2}', '\u{ae3}', NSM),
|
||||
('\u{ae6}', '\u{af0}', L), ('\u{af1}', '\u{af1}', ET), ('\u{af9}', '\u{af9}', L),
|
||||
('\u{b01}', '\u{b01}', NSM), ('\u{b02}', '\u{b03}', L), ('\u{b05}', '\u{b0c}', L),
|
||||
('\u{b0f}', '\u{b10}', L), ('\u{b13}', '\u{b28}', L), ('\u{b2a}', '\u{b30}', L), ('\u{b32}',
|
||||
'\u{b33}', L), ('\u{b35}', '\u{b39}', L), ('\u{b3c}', '\u{b3c}', NSM), ('\u{b3d}',
|
||||
'\u{b3e}', L), ('\u{b3f}', '\u{b3f}', NSM), ('\u{b40}', '\u{b40}', L), ('\u{b41}',
|
||||
'\u{b44}', NSM), ('\u{b47}', '\u{b48}', L), ('\u{b4b}', '\u{b4c}', L), ('\u{b4d}',
|
||||
'\u{b4d}', NSM), ('\u{b56}', '\u{b56}', NSM), ('\u{b57}', '\u{b57}', L), ('\u{b5c}',
|
||||
'\u{b5d}', L), ('\u{b5f}', '\u{b61}', L), ('\u{b62}', '\u{b63}', NSM), ('\u{b66}',
|
||||
'\u{b77}', L), ('\u{b82}', '\u{b82}', NSM), ('\u{b83}', '\u{b83}', L), ('\u{b85}',
|
||||
'\u{b8a}', L), ('\u{b8e}', '\u{b90}', L), ('\u{b92}', '\u{b95}', L), ('\u{b99}', '\u{b9a}',
|
||||
L), ('\u{b9c}', '\u{b9c}', L), ('\u{b9e}', '\u{b9f}', L), ('\u{ba3}', '\u{ba4}', L),
|
||||
('\u{ba8}', '\u{baa}', L), ('\u{bae}', '\u{bb9}', L), ('\u{bbe}', '\u{bbf}', L), ('\u{bc0}',
|
||||
'\u{bc0}', NSM), ('\u{bc1}', '\u{bc2}', L), ('\u{bc6}', '\u{bc8}', L), ('\u{bca}',
|
||||
'\u{bcc}', L), ('\u{bcd}', '\u{bcd}', NSM), ('\u{bd0}', '\u{bd0}', L), ('\u{bd7}',
|
||||
'\u{bd7}', L), ('\u{be6}', '\u{bf2}', L), ('\u{bf3}', '\u{bf8}', ON), ('\u{bf9}', '\u{bf9}',
|
||||
ET), ('\u{bfa}', '\u{bfa}', ON), ('\u{c00}', '\u{c00}', NSM), ('\u{c01}', '\u{c03}', L),
|
||||
('\u{c05}', '\u{c0c}', L), ('\u{c0e}', '\u{c10}', L), ('\u{c12}', '\u{c28}', L), ('\u{c2a}',
|
||||
'\u{c39}', L), ('\u{c3d}', '\u{c3d}', L), ('\u{c3e}', '\u{c40}', NSM), ('\u{c41}',
|
||||
'\u{c44}', L), ('\u{c46}', '\u{c48}', NSM), ('\u{c4a}', '\u{c4d}', NSM), ('\u{c55}',
|
||||
'\u{c56}', NSM), ('\u{c58}', '\u{c5a}', L), ('\u{c60}', '\u{c61}', L), ('\u{c62}',
|
||||
'\u{c63}', NSM), ('\u{c66}', '\u{c6f}', L), ('\u{c78}', '\u{c7e}', ON), ('\u{c7f}',
|
||||
'\u{c7f}', L), ('\u{c81}', '\u{c81}', NSM), ('\u{c82}', '\u{c83}', L), ('\u{c85}',
|
||||
'\u{c8c}', L), ('\u{c8e}', '\u{c90}', L), ('\u{c92}', '\u{ca8}', L), ('\u{caa}', '\u{cb3}',
|
||||
L), ('\u{cb5}', '\u{cb9}', L), ('\u{cbc}', '\u{cbc}', NSM), ('\u{cbd}', '\u{cc4}', L),
|
||||
('\u{cc6}', '\u{cc8}', L), ('\u{cca}', '\u{ccb}', L), ('\u{ccc}', '\u{ccd}', NSM),
|
||||
('\u{cd5}', '\u{cd6}', L), ('\u{cde}', '\u{cde}', L), ('\u{ce0}', '\u{ce1}', L), ('\u{ce2}',
|
||||
'\u{ce3}', NSM), ('\u{ce6}', '\u{cef}', L), ('\u{cf1}', '\u{cf2}', L), ('\u{d01}',
|
||||
'\u{d01}', NSM), ('\u{d02}', '\u{d03}', L), ('\u{d05}', '\u{d0c}', L), ('\u{d0e}',
|
||||
'\u{d10}', L), ('\u{d12}', '\u{d3a}', L), ('\u{d3d}', '\u{d40}', L), ('\u{d41}', '\u{d44}',
|
||||
NSM), ('\u{d46}', '\u{d48}', L), ('\u{d4a}', '\u{d4c}', L), ('\u{d4d}', '\u{d4d}', NSM),
|
||||
('\u{d4e}', '\u{d4e}', L), ('\u{d57}', '\u{d57}', L), ('\u{d5f}', '\u{d61}', L), ('\u{d62}',
|
||||
'\u{d63}', NSM), ('\u{d66}', '\u{d75}', L), ('\u{d79}', '\u{d7f}', L), ('\u{d82}',
|
||||
'\u{d83}', L), ('\u{d85}', '\u{d96}', L), ('\u{d9a}', '\u{db1}', L), ('\u{db3}', '\u{dbb}',
|
||||
L), ('\u{dbd}', '\u{dbd}', L), ('\u{dc0}', '\u{dc6}', L), ('\u{dca}', '\u{dca}', NSM),
|
||||
('\u{dcf}', '\u{dd1}', L), ('\u{dd2}', '\u{dd4}', NSM), ('\u{dd6}', '\u{dd6}', NSM),
|
||||
('\u{dd8}', '\u{ddf}', L), ('\u{de6}', '\u{def}', L), ('\u{df2}', '\u{df4}', L), ('\u{e01}',
|
||||
'\u{e30}', L), ('\u{e31}', '\u{e31}', NSM), ('\u{e32}', '\u{e33}', L), ('\u{e34}',
|
||||
'\u{e3a}', NSM), ('\u{e3f}', '\u{e3f}', ET), ('\u{e40}', '\u{e46}', L), ('\u{e47}',
|
||||
'\u{e4e}', NSM), ('\u{e4f}', '\u{e5b}', L), ('\u{e81}', '\u{e82}', L), ('\u{e84}',
|
||||
'\u{e84}', L), ('\u{e87}', '\u{e88}', L), ('\u{e8a}', '\u{e8a}', L), ('\u{e8d}', '\u{e8d}',
|
||||
L), ('\u{e94}', '\u{e97}', L), ('\u{e99}', '\u{e9f}', L), ('\u{ea1}', '\u{ea3}', L),
|
||||
('\u{ea5}', '\u{ea5}', L), ('\u{ea7}', '\u{ea7}', L), ('\u{eaa}', '\u{eab}', L), ('\u{ead}',
|
||||
'\u{eb0}', L), ('\u{eb1}', '\u{eb1}', NSM), ('\u{eb2}', '\u{eb3}', L), ('\u{eb4}',
|
||||
'\u{eb9}', NSM), ('\u{ebb}', '\u{ebc}', NSM), ('\u{ebd}', '\u{ebd}', L), ('\u{ec0}',
|
||||
'\u{ec4}', L), ('\u{ec6}', '\u{ec6}', L), ('\u{ec8}', '\u{ecd}', NSM), ('\u{ed0}',
|
||||
'\u{ed9}', L), ('\u{edc}', '\u{edf}', L), ('\u{f00}', '\u{f17}', L), ('\u{f18}', '\u{f19}',
|
||||
NSM), ('\u{f1a}', '\u{f34}', L), ('\u{f35}', '\u{f35}', NSM), ('\u{f36}', '\u{f36}', L),
|
||||
('\u{f37}', '\u{f37}', NSM), ('\u{f38}', '\u{f38}', L), ('\u{f39}', '\u{f39}', NSM),
|
||||
('\u{f3a}', '\u{f3d}', ON), ('\u{f3e}', '\u{f47}', L), ('\u{f49}', '\u{f6c}', L),
|
||||
('\u{f71}', '\u{f7e}', NSM), ('\u{f7f}', '\u{f7f}', L), ('\u{f80}', '\u{f84}', NSM),
|
||||
('\u{f85}', '\u{f85}', L), ('\u{f86}', '\u{f87}', NSM), ('\u{f88}', '\u{f8c}', L),
|
||||
('\u{f8d}', '\u{f97}', NSM), ('\u{f99}', '\u{fbc}', NSM), ('\u{fbe}', '\u{fc5}', L),
|
||||
('\u{fc6}', '\u{fc6}', NSM), ('\u{fc7}', '\u{fcc}', L), ('\u{fce}', '\u{fda}', L),
|
||||
('\u{1000}', '\u{102c}', L), ('\u{102d}', '\u{1030}', NSM), ('\u{1031}', '\u{1031}', L),
|
||||
('\u{1032}', '\u{1037}', NSM), ('\u{1038}', '\u{1038}', L), ('\u{1039}', '\u{103a}', NSM),
|
||||
('\u{103b}', '\u{103c}', L), ('\u{103d}', '\u{103e}', NSM), ('\u{103f}', '\u{1057}', L),
|
||||
('\u{1058}', '\u{1059}', NSM), ('\u{105a}', '\u{105d}', L), ('\u{105e}', '\u{1060}', NSM),
|
||||
('\u{1061}', '\u{1070}', L), ('\u{1071}', '\u{1074}', NSM), ('\u{1075}', '\u{1081}', L),
|
||||
('\u{1082}', '\u{1082}', NSM), ('\u{1083}', '\u{1084}', L), ('\u{1085}', '\u{1086}', NSM),
|
||||
('\u{1087}', '\u{108c}', L), ('\u{108d}', '\u{108d}', NSM), ('\u{108e}', '\u{109c}', L),
|
||||
('\u{109d}', '\u{109d}', NSM), ('\u{109e}', '\u{10c5}', L), ('\u{10c7}', '\u{10c7}', L),
|
||||
('\u{10cd}', '\u{10cd}', L), ('\u{10d0}', '\u{1248}', L), ('\u{124a}', '\u{124d}', L),
|
||||
('\u{1250}', '\u{1256}', L), ('\u{1258}', '\u{1258}', L), ('\u{125a}', '\u{125d}', L),
|
||||
('\u{1260}', '\u{1288}', L), ('\u{128a}', '\u{128d}', L), ('\u{1290}', '\u{12b0}', L),
|
||||
('\u{12b2}', '\u{12b5}', L), ('\u{12b8}', '\u{12be}', L), ('\u{12c0}', '\u{12c0}', L),
|
||||
('\u{12c2}', '\u{12c5}', L), ('\u{12c8}', '\u{12d6}', L), ('\u{12d8}', '\u{1310}', L),
|
||||
('\u{1312}', '\u{1315}', L), ('\u{1318}', '\u{135a}', L), ('\u{135d}', '\u{135f}', NSM),
|
||||
('\u{1360}', '\u{137c}', L), ('\u{1380}', '\u{138f}', L), ('\u{1390}', '\u{1399}', ON),
|
||||
('\u{13a0}', '\u{13f5}', L), ('\u{13f8}', '\u{13fd}', L), ('\u{1400}', '\u{1400}', ON),
|
||||
('\u{1401}', '\u{167f}', L), ('\u{1680}', '\u{1680}', WS), ('\u{1681}', '\u{169a}', L),
|
||||
('\u{169b}', '\u{169c}', ON), ('\u{16a0}', '\u{16f8}', L), ('\u{1700}', '\u{170c}', L),
|
||||
('\u{170e}', '\u{1711}', L), ('\u{1712}', '\u{1714}', NSM), ('\u{1720}', '\u{1731}', L),
|
||||
('\u{1732}', '\u{1734}', NSM), ('\u{1735}', '\u{1736}', L), ('\u{1740}', '\u{1751}', L),
|
||||
('\u{1752}', '\u{1753}', NSM), ('\u{1760}', '\u{176c}', L), ('\u{176e}', '\u{1770}', L),
|
||||
('\u{1772}', '\u{1773}', NSM), ('\u{1780}', '\u{17b3}', L), ('\u{17b4}', '\u{17b5}', NSM),
|
||||
('\u{17b6}', '\u{17b6}', L), ('\u{17b7}', '\u{17bd}', NSM), ('\u{17be}', '\u{17c5}', L),
|
||||
('\u{17c6}', '\u{17c6}', NSM), ('\u{17c7}', '\u{17c8}', L), ('\u{17c9}', '\u{17d3}', NSM),
|
||||
('\u{17d4}', '\u{17da}', L), ('\u{17db}', '\u{17db}', ET), ('\u{17dc}', '\u{17dc}', L),
|
||||
('\u{17dd}', '\u{17dd}', NSM), ('\u{17e0}', '\u{17e9}', L), ('\u{17f0}', '\u{17f9}', ON),
|
||||
('\u{1800}', '\u{180a}', ON), ('\u{180b}', '\u{180d}', NSM), ('\u{180e}', '\u{180e}', BN),
|
||||
('\u{1810}', '\u{1819}', L), ('\u{1820}', '\u{1877}', L), ('\u{1880}', '\u{18a8}', L),
|
||||
('\u{18a9}', '\u{18a9}', NSM), ('\u{18aa}', '\u{18aa}', L), ('\u{18b0}', '\u{18f5}', L),
|
||||
('\u{1900}', '\u{191e}', L), ('\u{1920}', '\u{1922}', NSM), ('\u{1923}', '\u{1926}', L),
|
||||
('\u{1927}', '\u{1928}', NSM), ('\u{1929}', '\u{192b}', L), ('\u{1930}', '\u{1931}', L),
|
||||
('\u{1932}', '\u{1932}', NSM), ('\u{1933}', '\u{1938}', L), ('\u{1939}', '\u{193b}', NSM),
|
||||
('\u{1940}', '\u{1940}', ON), ('\u{1944}', '\u{1945}', ON), ('\u{1946}', '\u{196d}', L),
|
||||
('\u{1970}', '\u{1974}', L), ('\u{1980}', '\u{19ab}', L), ('\u{19b0}', '\u{19c9}', L),
|
||||
('\u{19d0}', '\u{19da}', L), ('\u{19de}', '\u{19ff}', ON), ('\u{1a00}', '\u{1a16}', L),
|
||||
('\u{1a17}', '\u{1a18}', NSM), ('\u{1a19}', '\u{1a1a}', L), ('\u{1a1b}', '\u{1a1b}', NSM),
|
||||
('\u{1a1e}', '\u{1a55}', L), ('\u{1a56}', '\u{1a56}', NSM), ('\u{1a57}', '\u{1a57}', L),
|
||||
('\u{1a58}', '\u{1a5e}', NSM), ('\u{1a60}', '\u{1a60}', NSM), ('\u{1a61}', '\u{1a61}', L),
|
||||
('\u{1a62}', '\u{1a62}', NSM), ('\u{1a63}', '\u{1a64}', L), ('\u{1a65}', '\u{1a6c}', NSM),
|
||||
('\u{1a6d}', '\u{1a72}', L), ('\u{1a73}', '\u{1a7c}', NSM), ('\u{1a7f}', '\u{1a7f}', NSM),
|
||||
('\u{1a80}', '\u{1a89}', L), ('\u{1a90}', '\u{1a99}', L), ('\u{1aa0}', '\u{1aad}', L),
|
||||
('\u{1ab0}', '\u{1abe}', NSM), ('\u{1b00}', '\u{1b03}', NSM), ('\u{1b04}', '\u{1b33}', L),
|
||||
('\u{1b34}', '\u{1b34}', NSM), ('\u{1b35}', '\u{1b35}', L), ('\u{1b36}', '\u{1b3a}', NSM),
|
||||
('\u{1b3b}', '\u{1b3b}', L), ('\u{1b3c}', '\u{1b3c}', NSM), ('\u{1b3d}', '\u{1b41}', L),
|
||||
('\u{1b42}', '\u{1b42}', NSM), ('\u{1b43}', '\u{1b4b}', L), ('\u{1b50}', '\u{1b6a}', L),
|
||||
('\u{1b6b}', '\u{1b73}', NSM), ('\u{1b74}', '\u{1b7c}', L), ('\u{1b80}', '\u{1b81}', NSM),
|
||||
('\u{1b82}', '\u{1ba1}', L), ('\u{1ba2}', '\u{1ba5}', NSM), ('\u{1ba6}', '\u{1ba7}', L),
|
||||
('\u{1ba8}', '\u{1ba9}', NSM), ('\u{1baa}', '\u{1baa}', L), ('\u{1bab}', '\u{1bad}', NSM),
|
||||
('\u{1bae}', '\u{1be5}', L), ('\u{1be6}', '\u{1be6}', NSM), ('\u{1be7}', '\u{1be7}', L),
|
||||
('\u{1be8}', '\u{1be9}', NSM), ('\u{1bea}', '\u{1bec}', L), ('\u{1bed}', '\u{1bed}', NSM),
|
||||
('\u{1bee}', '\u{1bee}', L), ('\u{1bef}', '\u{1bf1}', NSM), ('\u{1bf2}', '\u{1bf3}', L),
|
||||
('\u{1bfc}', '\u{1c2b}', L), ('\u{1c2c}', '\u{1c33}', NSM), ('\u{1c34}', '\u{1c35}', L),
|
||||
('\u{1c36}', '\u{1c37}', NSM), ('\u{1c3b}', '\u{1c49}', L), ('\u{1c4d}', '\u{1c7f}', L),
|
||||
('\u{1cc0}', '\u{1cc7}', L), ('\u{1cd0}', '\u{1cd2}', NSM), ('\u{1cd3}', '\u{1cd3}', L),
|
||||
('\u{1cd4}', '\u{1ce0}', NSM), ('\u{1ce1}', '\u{1ce1}', L), ('\u{1ce2}', '\u{1ce8}', NSM),
|
||||
('\u{1ce9}', '\u{1cec}', L), ('\u{1ced}', '\u{1ced}', NSM), ('\u{1cee}', '\u{1cf3}', L),
|
||||
('\u{1cf4}', '\u{1cf4}', NSM), ('\u{1cf5}', '\u{1cf6}', L), ('\u{1cf8}', '\u{1cf9}', NSM),
|
||||
('\u{1d00}', '\u{1dbf}', L), ('\u{1dc0}', '\u{1df5}', NSM), ('\u{1dfc}', '\u{1dff}', NSM),
|
||||
('\u{1e00}', '\u{1f15}', L), ('\u{1f18}', '\u{1f1d}', L), ('\u{1f20}', '\u{1f45}', L),
|
||||
('\u{1f48}', '\u{1f4d}', L), ('\u{1f50}', '\u{1f57}', L), ('\u{1f59}', '\u{1f59}', L),
|
||||
('\u{1f5b}', '\u{1f5b}', L), ('\u{1f5d}', '\u{1f5d}', L), ('\u{1f5f}', '\u{1f7d}', L),
|
||||
('\u{1f80}', '\u{1fb4}', L), ('\u{1fb6}', '\u{1fbc}', L), ('\u{1fbd}', '\u{1fbd}', ON),
|
||||
('\u{1fbe}', '\u{1fbe}', L), ('\u{1fbf}', '\u{1fc1}', ON), ('\u{1fc2}', '\u{1fc4}', L),
|
||||
('\u{1fc6}', '\u{1fcc}', L), ('\u{1fcd}', '\u{1fcf}', ON), ('\u{1fd0}', '\u{1fd3}', L),
|
||||
('\u{1fd6}', '\u{1fdb}', L), ('\u{1fdd}', '\u{1fdf}', ON), ('\u{1fe0}', '\u{1fec}', L),
|
||||
('\u{1fed}', '\u{1fef}', ON), ('\u{1ff2}', '\u{1ff4}', L), ('\u{1ff6}', '\u{1ffc}', L),
|
||||
('\u{1ffd}', '\u{1ffe}', ON), ('\u{2000}', '\u{200a}', WS), ('\u{200b}', '\u{200d}', BN),
|
||||
('\u{200e}', '\u{200e}', L), ('\u{200f}', '\u{200f}', R), ('\u{2010}', '\u{2027}', ON),
|
||||
('\u{2028}', '\u{2028}', WS), ('\u{2029}', '\u{2029}', B), ('\u{202a}', '\u{202a}', LRE),
|
||||
('\u{202b}', '\u{202b}', RLE), ('\u{202c}', '\u{202c}', PDF), ('\u{202d}', '\u{202d}', LRO),
|
||||
('\u{202e}', '\u{202e}', RLO), ('\u{202f}', '\u{202f}', CS), ('\u{2030}', '\u{2034}', ET),
|
||||
('\u{2035}', '\u{2043}', ON), ('\u{2044}', '\u{2044}', CS), ('\u{2045}', '\u{205e}', ON),
|
||||
('\u{205f}', '\u{205f}', WS), ('\u{2060}', '\u{2064}', BN), ('\u{2066}', '\u{2066}', LRI),
|
||||
('\u{2067}', '\u{2067}', RLI), ('\u{2068}', '\u{2068}', FSI), ('\u{2069}', '\u{2069}', PDI),
|
||||
('\u{206a}', '\u{206f}', BN), ('\u{2070}', '\u{2070}', EN), ('\u{2071}', '\u{2071}', L),
|
||||
('\u{2074}', '\u{2079}', EN), ('\u{207a}', '\u{207b}', ES), ('\u{207c}', '\u{207e}', ON),
|
||||
('\u{207f}', '\u{207f}', L), ('\u{2080}', '\u{2089}', EN), ('\u{208a}', '\u{208b}', ES),
|
||||
('\u{208c}', '\u{208e}', ON), ('\u{2090}', '\u{209c}', L), ('\u{20a0}', '\u{20cf}', ET),
|
||||
('\u{20d0}', '\u{20f0}', NSM), ('\u{2100}', '\u{2101}', ON), ('\u{2102}', '\u{2102}', L),
|
||||
('\u{2103}', '\u{2106}', ON), ('\u{2107}', '\u{2107}', L), ('\u{2108}', '\u{2109}', ON),
|
||||
('\u{210a}', '\u{2113}', L), ('\u{2114}', '\u{2114}', ON), ('\u{2115}', '\u{2115}', L),
|
||||
('\u{2116}', '\u{2118}', ON), ('\u{2119}', '\u{211d}', L), ('\u{211e}', '\u{2123}', ON),
|
||||
('\u{2124}', '\u{2124}', L), ('\u{2125}', '\u{2125}', ON), ('\u{2126}', '\u{2126}', L),
|
||||
('\u{2127}', '\u{2127}', ON), ('\u{2128}', '\u{2128}', L), ('\u{2129}', '\u{2129}', ON),
|
||||
('\u{212a}', '\u{212d}', L), ('\u{212e}', '\u{212e}', ET), ('\u{212f}', '\u{2139}', L),
|
||||
('\u{213a}', '\u{213b}', ON), ('\u{213c}', '\u{213f}', L), ('\u{2140}', '\u{2144}', ON),
|
||||
('\u{2145}', '\u{2149}', L), ('\u{214a}', '\u{214d}', ON), ('\u{214e}', '\u{214f}', L),
|
||||
('\u{2150}', '\u{215f}', ON), ('\u{2160}', '\u{2188}', L), ('\u{2189}', '\u{218b}', ON),
|
||||
('\u{2190}', '\u{2211}', ON), ('\u{2212}', '\u{2212}', ES), ('\u{2213}', '\u{2213}', ET),
|
||||
('\u{2214}', '\u{2335}', ON), ('\u{2336}', '\u{237a}', L), ('\u{237b}', '\u{2394}', ON),
|
||||
('\u{2395}', '\u{2395}', L), ('\u{2396}', '\u{23fa}', ON), ('\u{2400}', '\u{2426}', ON),
|
||||
('\u{2440}', '\u{244a}', ON), ('\u{2460}', '\u{2487}', ON), ('\u{2488}', '\u{249b}', EN),
|
||||
('\u{249c}', '\u{24e9}', L), ('\u{24ea}', '\u{26ab}', ON), ('\u{26ac}', '\u{26ac}', L),
|
||||
('\u{26ad}', '\u{27ff}', ON), ('\u{2800}', '\u{28ff}', L), ('\u{2900}', '\u{2b73}', ON),
|
||||
('\u{2b76}', '\u{2b95}', ON), ('\u{2b98}', '\u{2bb9}', ON), ('\u{2bbd}', '\u{2bc8}', ON),
|
||||
('\u{2bca}', '\u{2bd1}', ON), ('\u{2bec}', '\u{2bef}', ON), ('\u{2c00}', '\u{2c2e}', L),
|
||||
('\u{2c30}', '\u{2c5e}', L), ('\u{2c60}', '\u{2ce4}', L), ('\u{2ce5}', '\u{2cea}', ON),
|
||||
('\u{2ceb}', '\u{2cee}', L), ('\u{2cef}', '\u{2cf1}', NSM), ('\u{2cf2}', '\u{2cf3}', L),
|
||||
('\u{2cf9}', '\u{2cff}', ON), ('\u{2d00}', '\u{2d25}', L), ('\u{2d27}', '\u{2d27}', L),
|
||||
('\u{2d2d}', '\u{2d2d}', L), ('\u{2d30}', '\u{2d67}', L), ('\u{2d6f}', '\u{2d70}', L),
|
||||
('\u{2d7f}', '\u{2d7f}', NSM), ('\u{2d80}', '\u{2d96}', L), ('\u{2da0}', '\u{2da6}', L),
|
||||
('\u{2da8}', '\u{2dae}', L), ('\u{2db0}', '\u{2db6}', L), ('\u{2db8}', '\u{2dbe}', L),
|
||||
('\u{2dc0}', '\u{2dc6}', L), ('\u{2dc8}', '\u{2dce}', L), ('\u{2dd0}', '\u{2dd6}', L),
|
||||
('\u{2dd8}', '\u{2dde}', L), ('\u{2de0}', '\u{2dff}', NSM), ('\u{2e00}', '\u{2e42}', ON),
|
||||
('\u{2e80}', '\u{2e99}', ON), ('\u{2e9b}', '\u{2ef3}', ON), ('\u{2f00}', '\u{2fd5}', ON),
|
||||
('\u{2ff0}', '\u{2ffb}', ON), ('\u{3000}', '\u{3000}', WS), ('\u{3001}', '\u{3004}', ON),
|
||||
('\u{3005}', '\u{3007}', L), ('\u{3008}', '\u{3020}', ON), ('\u{3021}', '\u{3029}', L),
|
||||
('\u{302a}', '\u{302d}', NSM), ('\u{302e}', '\u{302f}', L), ('\u{3030}', '\u{3030}', ON),
|
||||
('\u{3031}', '\u{3035}', L), ('\u{3036}', '\u{3037}', ON), ('\u{3038}', '\u{303c}', L),
|
||||
('\u{303d}', '\u{303f}', ON), ('\u{3041}', '\u{3096}', L), ('\u{3099}', '\u{309a}', NSM),
|
||||
('\u{309b}', '\u{309c}', ON), ('\u{309d}', '\u{309f}', L), ('\u{30a0}', '\u{30a0}', ON),
|
||||
('\u{30a1}', '\u{30fa}', L), ('\u{30fb}', '\u{30fb}', ON), ('\u{30fc}', '\u{30ff}', L),
|
||||
('\u{3105}', '\u{312d}', L), ('\u{3131}', '\u{318e}', L), ('\u{3190}', '\u{31ba}', L),
|
||||
('\u{31c0}', '\u{31e3}', ON), ('\u{31f0}', '\u{321c}', L), ('\u{321d}', '\u{321e}', ON),
|
||||
('\u{3220}', '\u{324f}', L), ('\u{3250}', '\u{325f}', ON), ('\u{3260}', '\u{327b}', L),
|
||||
('\u{327c}', '\u{327e}', ON), ('\u{327f}', '\u{32b0}', L), ('\u{32b1}', '\u{32bf}', ON),
|
||||
('\u{32c0}', '\u{32cb}', L), ('\u{32cc}', '\u{32cf}', ON), ('\u{32d0}', '\u{32fe}', L),
|
||||
('\u{3300}', '\u{3376}', L), ('\u{3377}', '\u{337a}', ON), ('\u{337b}', '\u{33dd}', L),
|
||||
('\u{33de}', '\u{33df}', ON), ('\u{33e0}', '\u{33fe}', L), ('\u{33ff}', '\u{33ff}', ON),
|
||||
('\u{3400}', '\u{4db5}', L), ('\u{4dc0}', '\u{4dff}', ON), ('\u{4e00}', '\u{9fd5}', L),
|
||||
('\u{a000}', '\u{a48c}', L), ('\u{a490}', '\u{a4c6}', ON), ('\u{a4d0}', '\u{a60c}', L),
|
||||
('\u{a60d}', '\u{a60f}', ON), ('\u{a610}', '\u{a62b}', L), ('\u{a640}', '\u{a66e}', L),
|
||||
('\u{a66f}', '\u{a672}', NSM), ('\u{a673}', '\u{a673}', ON), ('\u{a674}', '\u{a67d}', NSM),
|
||||
('\u{a67e}', '\u{a67f}', ON), ('\u{a680}', '\u{a69d}', L), ('\u{a69e}', '\u{a69f}', NSM),
|
||||
('\u{a6a0}', '\u{a6ef}', L), ('\u{a6f0}', '\u{a6f1}', NSM), ('\u{a6f2}', '\u{a6f7}', L),
|
||||
('\u{a700}', '\u{a721}', ON), ('\u{a722}', '\u{a787}', L), ('\u{a788}', '\u{a788}', ON),
|
||||
('\u{a789}', '\u{a7ad}', L), ('\u{a7b0}', '\u{a7b7}', L), ('\u{a7f7}', '\u{a801}', L),
|
||||
('\u{a802}', '\u{a802}', NSM), ('\u{a803}', '\u{a805}', L), ('\u{a806}', '\u{a806}', NSM),
|
||||
('\u{a807}', '\u{a80a}', L), ('\u{a80b}', '\u{a80b}', NSM), ('\u{a80c}', '\u{a824}', L),
|
||||
('\u{a825}', '\u{a826}', NSM), ('\u{a827}', '\u{a827}', L), ('\u{a828}', '\u{a82b}', ON),
|
||||
('\u{a830}', '\u{a837}', L), ('\u{a838}', '\u{a839}', ET), ('\u{a840}', '\u{a873}', L),
|
||||
('\u{a874}', '\u{a877}', ON), ('\u{a880}', '\u{a8c3}', L), ('\u{a8c4}', '\u{a8c4}', NSM),
|
||||
('\u{a8ce}', '\u{a8d9}', L), ('\u{a8e0}', '\u{a8f1}', NSM), ('\u{a8f2}', '\u{a8fd}', L),
|
||||
('\u{a900}', '\u{a925}', L), ('\u{a926}', '\u{a92d}', NSM), ('\u{a92e}', '\u{a946}', L),
|
||||
('\u{a947}', '\u{a951}', NSM), ('\u{a952}', '\u{a953}', L), ('\u{a95f}', '\u{a97c}', L),
|
||||
('\u{a980}', '\u{a982}', NSM), ('\u{a983}', '\u{a9b2}', L), ('\u{a9b3}', '\u{a9b3}', NSM),
|
||||
('\u{a9b4}', '\u{a9b5}', L), ('\u{a9b6}', '\u{a9b9}', NSM), ('\u{a9ba}', '\u{a9bb}', L),
|
||||
('\u{a9bc}', '\u{a9bc}', NSM), ('\u{a9bd}', '\u{a9cd}', L), ('\u{a9cf}', '\u{a9d9}', L),
|
||||
('\u{a9de}', '\u{a9e4}', L), ('\u{a9e5}', '\u{a9e5}', NSM), ('\u{a9e6}', '\u{a9fe}', L),
|
||||
('\u{aa00}', '\u{aa28}', L), ('\u{aa29}', '\u{aa2e}', NSM), ('\u{aa2f}', '\u{aa30}', L),
|
||||
('\u{aa31}', '\u{aa32}', NSM), ('\u{aa33}', '\u{aa34}', L), ('\u{aa35}', '\u{aa36}', NSM),
|
||||
('\u{aa40}', '\u{aa42}', L), ('\u{aa43}', '\u{aa43}', NSM), ('\u{aa44}', '\u{aa4b}', L),
|
||||
('\u{aa4c}', '\u{aa4c}', NSM), ('\u{aa4d}', '\u{aa4d}', L), ('\u{aa50}', '\u{aa59}', L),
|
||||
('\u{aa5c}', '\u{aa7b}', L), ('\u{aa7c}', '\u{aa7c}', NSM), ('\u{aa7d}', '\u{aaaf}', L),
|
||||
('\u{aab0}', '\u{aab0}', NSM), ('\u{aab1}', '\u{aab1}', L), ('\u{aab2}', '\u{aab4}', NSM),
|
||||
('\u{aab5}', '\u{aab6}', L), ('\u{aab7}', '\u{aab8}', NSM), ('\u{aab9}', '\u{aabd}', L),
|
||||
('\u{aabe}', '\u{aabf}', NSM), ('\u{aac0}', '\u{aac0}', L), ('\u{aac1}', '\u{aac1}', NSM),
|
||||
('\u{aac2}', '\u{aac2}', L), ('\u{aadb}', '\u{aaeb}', L), ('\u{aaec}', '\u{aaed}', NSM),
|
||||
('\u{aaee}', '\u{aaf5}', L), ('\u{aaf6}', '\u{aaf6}', NSM), ('\u{ab01}', '\u{ab06}', L),
|
||||
('\u{ab09}', '\u{ab0e}', L), ('\u{ab11}', '\u{ab16}', L), ('\u{ab20}', '\u{ab26}', L),
|
||||
('\u{ab28}', '\u{ab2e}', L), ('\u{ab30}', '\u{ab65}', L), ('\u{ab70}', '\u{abe4}', L),
|
||||
('\u{abe5}', '\u{abe5}', NSM), ('\u{abe6}', '\u{abe7}', L), ('\u{abe8}', '\u{abe8}', NSM),
|
||||
('\u{abe9}', '\u{abec}', L), ('\u{abed}', '\u{abed}', NSM), ('\u{abf0}', '\u{abf9}', L),
|
||||
('\u{ac00}', '\u{d7a3}', L), ('\u{d7b0}', '\u{d7c6}', L), ('\u{d7cb}', '\u{d7fb}', L),
|
||||
('\u{e000}', '\u{fa6d}', L), ('\u{fa70}', '\u{fad9}', L), ('\u{fb00}', '\u{fb06}', L),
|
||||
('\u{fb13}', '\u{fb17}', L), ('\u{fb1d}', '\u{fb1d}', R), ('\u{fb1e}', '\u{fb1e}', NSM),
|
||||
('\u{fb1f}', '\u{fb28}', R), ('\u{fb29}', '\u{fb29}', ES), ('\u{fb2a}', '\u{fb4f}', R),
|
||||
('\u{fb50}', '\u{fd3d}', AL), ('\u{fd3e}', '\u{fd3f}', ON), ('\u{fd40}', '\u{fdcf}', AL),
|
||||
('\u{fdf0}', '\u{fdfc}', AL), ('\u{fdfd}', '\u{fdfd}', ON), ('\u{fdfe}', '\u{fdff}', AL),
|
||||
('\u{fe00}', '\u{fe0f}', NSM), ('\u{fe10}', '\u{fe19}', ON), ('\u{fe20}', '\u{fe2f}', NSM),
|
||||
('\u{fe30}', '\u{fe4f}', ON), ('\u{fe50}', '\u{fe50}', CS), ('\u{fe51}', '\u{fe51}', ON),
|
||||
('\u{fe52}', '\u{fe52}', CS), ('\u{fe54}', '\u{fe54}', ON), ('\u{fe55}', '\u{fe55}', CS),
|
||||
('\u{fe56}', '\u{fe5e}', ON), ('\u{fe5f}', '\u{fe5f}', ET), ('\u{fe60}', '\u{fe61}', ON),
|
||||
('\u{fe62}', '\u{fe63}', ES), ('\u{fe64}', '\u{fe66}', ON), ('\u{fe68}', '\u{fe68}', ON),
|
||||
('\u{fe69}', '\u{fe6a}', ET), ('\u{fe6b}', '\u{fe6b}', ON), ('\u{fe70}', '\u{fefe}', AL),
|
||||
('\u{feff}', '\u{feff}', BN), ('\u{ff01}', '\u{ff02}', ON), ('\u{ff03}', '\u{ff05}', ET),
|
||||
('\u{ff06}', '\u{ff0a}', ON), ('\u{ff0b}', '\u{ff0b}', ES), ('\u{ff0c}', '\u{ff0c}', CS),
|
||||
('\u{ff0d}', '\u{ff0d}', ES), ('\u{ff0e}', '\u{ff0f}', CS), ('\u{ff10}', '\u{ff19}', EN),
|
||||
('\u{ff1a}', '\u{ff1a}', CS), ('\u{ff1b}', '\u{ff20}', ON), ('\u{ff21}', '\u{ff3a}', L),
|
||||
('\u{ff3b}', '\u{ff40}', ON), ('\u{ff41}', '\u{ff5a}', L), ('\u{ff5b}', '\u{ff65}', ON),
|
||||
('\u{ff66}', '\u{ffbe}', L), ('\u{ffc2}', '\u{ffc7}', L), ('\u{ffca}', '\u{ffcf}', L),
|
||||
('\u{ffd2}', '\u{ffd7}', L), ('\u{ffda}', '\u{ffdc}', L), ('\u{ffe0}', '\u{ffe1}', ET),
|
||||
('\u{ffe2}', '\u{ffe4}', ON), ('\u{ffe5}', '\u{ffe6}', ET), ('\u{ffe8}', '\u{ffee}', ON),
|
||||
('\u{fff9}', '\u{fffd}', ON), ('\u{10000}', '\u{1000b}', L), ('\u{1000d}', '\u{10026}', L),
|
||||
('\u{10028}', '\u{1003a}', L), ('\u{1003c}', '\u{1003d}', L), ('\u{1003f}', '\u{1004d}', L),
|
||||
('\u{10050}', '\u{1005d}', L), ('\u{10080}', '\u{100fa}', L), ('\u{10100}', '\u{10100}', L),
|
||||
('\u{10101}', '\u{10101}', ON), ('\u{10102}', '\u{10102}', L), ('\u{10107}', '\u{10133}',
|
||||
L), ('\u{10137}', '\u{1013f}', L), ('\u{10140}', '\u{1018c}', ON), ('\u{10190}',
|
||||
'\u{1019b}', ON), ('\u{101a0}', '\u{101a0}', ON), ('\u{101d0}', '\u{101fc}', L),
|
||||
('\u{101fd}', '\u{101fd}', NSM), ('\u{10280}', '\u{1029c}', L), ('\u{102a0}', '\u{102d0}',
|
||||
L), ('\u{102e0}', '\u{102e0}', NSM), ('\u{102e1}', '\u{102fb}', EN), ('\u{10300}',
|
||||
'\u{10323}', L), ('\u{10330}', '\u{1034a}', L), ('\u{10350}', '\u{10375}', L), ('\u{10376}',
|
||||
'\u{1037a}', NSM), ('\u{10380}', '\u{1039d}', L), ('\u{1039f}', '\u{103c3}', L),
|
||||
('\u{103c8}', '\u{103d5}', L), ('\u{10400}', '\u{1049d}', L), ('\u{104a0}', '\u{104a9}', L),
|
||||
('\u{10500}', '\u{10527}', L), ('\u{10530}', '\u{10563}', L), ('\u{1056f}', '\u{1056f}', L),
|
||||
('\u{10600}', '\u{10736}', L), ('\u{10740}', '\u{10755}', L), ('\u{10760}', '\u{10767}', L),
|
||||
('\u{10800}', '\u{1091e}', R), ('\u{1091f}', '\u{1091f}', ON), ('\u{10920}', '\u{10a00}',
|
||||
R), ('\u{10a01}', '\u{10a03}', NSM), ('\u{10a04}', '\u{10a04}', R), ('\u{10a05}',
|
||||
'\u{10a06}', NSM), ('\u{10a07}', '\u{10a0b}', R), ('\u{10a0c}', '\u{10a0f}', NSM),
|
||||
('\u{10a10}', '\u{10a37}', R), ('\u{10a38}', '\u{10a3a}', NSM), ('\u{10a3b}', '\u{10a3e}',
|
||||
R), ('\u{10a3f}', '\u{10a3f}', NSM), ('\u{10a40}', '\u{10ae4}', R), ('\u{10ae5}',
|
||||
'\u{10ae6}', NSM), ('\u{10ae7}', '\u{10b38}', R), ('\u{10b39}', '\u{10b3f}', ON),
|
||||
('\u{10b40}', '\u{10e5f}', R), ('\u{10e60}', '\u{10e7e}', AN), ('\u{10e7f}', '\u{10fff}',
|
||||
R), ('\u{11000}', '\u{11000}', L), ('\u{11001}', '\u{11001}', NSM), ('\u{11002}',
|
||||
'\u{11037}', L), ('\u{11038}', '\u{11046}', NSM), ('\u{11047}', '\u{1104d}', L),
|
||||
('\u{11052}', '\u{11065}', ON), ('\u{11066}', '\u{1106f}', L), ('\u{1107f}', '\u{11081}',
|
||||
NSM), ('\u{11082}', '\u{110b2}', L), ('\u{110b3}', '\u{110b6}', NSM), ('\u{110b7}',
|
||||
'\u{110b8}', L), ('\u{110b9}', '\u{110ba}', NSM), ('\u{110bb}', '\u{110c1}', L),
|
||||
('\u{110d0}', '\u{110e8}', L), ('\u{110f0}', '\u{110f9}', L), ('\u{11100}', '\u{11102}',
|
||||
NSM), ('\u{11103}', '\u{11126}', L), ('\u{11127}', '\u{1112b}', NSM), ('\u{1112c}',
|
||||
'\u{1112c}', L), ('\u{1112d}', '\u{11134}', NSM), ('\u{11136}', '\u{11143}', L),
|
||||
('\u{11150}', '\u{11172}', L), ('\u{11173}', '\u{11173}', NSM), ('\u{11174}', '\u{11176}',
|
||||
L), ('\u{11180}', '\u{11181}', NSM), ('\u{11182}', '\u{111b5}', L), ('\u{111b6}',
|
||||
'\u{111be}', NSM), ('\u{111bf}', '\u{111c9}', L), ('\u{111ca}', '\u{111cc}', NSM),
|
||||
('\u{111cd}', '\u{111cd}', L), ('\u{111d0}', '\u{111df}', L), ('\u{111e1}', '\u{111f4}', L),
|
||||
('\u{11200}', '\u{11211}', L), ('\u{11213}', '\u{1122e}', L), ('\u{1122f}', '\u{11231}',
|
||||
NSM), ('\u{11232}', '\u{11233}', L), ('\u{11234}', '\u{11234}', NSM), ('\u{11235}',
|
||||
'\u{11235}', L), ('\u{11236}', '\u{11237}', NSM), ('\u{11238}', '\u{1123d}', L),
|
||||
('\u{11280}', '\u{11286}', L), ('\u{11288}', '\u{11288}', L), ('\u{1128a}', '\u{1128d}', L),
|
||||
('\u{1128f}', '\u{1129d}', L), ('\u{1129f}', '\u{112a9}', L), ('\u{112b0}', '\u{112de}', L),
|
||||
('\u{112df}', '\u{112df}', NSM), ('\u{112e0}', '\u{112e2}', L), ('\u{112e3}', '\u{112ea}',
|
||||
NSM), ('\u{112f0}', '\u{112f9}', L), ('\u{11300}', '\u{11301}', NSM), ('\u{11302}',
|
||||
'\u{11303}', L), ('\u{11305}', '\u{1130c}', L), ('\u{1130f}', '\u{11310}', L), ('\u{11313}',
|
||||
'\u{11328}', L), ('\u{1132a}', '\u{11330}', L), ('\u{11332}', '\u{11333}', L), ('\u{11335}',
|
||||
'\u{11339}', L), ('\u{1133c}', '\u{1133c}', NSM), ('\u{1133d}', '\u{1133f}', L),
|
||||
('\u{11340}', '\u{11340}', NSM), ('\u{11341}', '\u{11344}', L), ('\u{11347}', '\u{11348}',
|
||||
L), ('\u{1134b}', '\u{1134d}', L), ('\u{11350}', '\u{11350}', L), ('\u{11357}', '\u{11357}',
|
||||
L), ('\u{1135d}', '\u{11363}', L), ('\u{11366}', '\u{1136c}', NSM), ('\u{11370}',
|
||||
'\u{11374}', NSM), ('\u{11480}', '\u{114b2}', L), ('\u{114b3}', '\u{114b8}', NSM),
|
||||
('\u{114b9}', '\u{114b9}', L), ('\u{114ba}', '\u{114ba}', NSM), ('\u{114bb}', '\u{114be}',
|
||||
L), ('\u{114bf}', '\u{114c0}', NSM), ('\u{114c1}', '\u{114c1}', L), ('\u{114c2}',
|
||||
'\u{114c3}', NSM), ('\u{114c4}', '\u{114c7}', L), ('\u{114d0}', '\u{114d9}', L),
|
||||
('\u{11580}', '\u{115b1}', L), ('\u{115b2}', '\u{115b5}', NSM), ('\u{115b8}', '\u{115bb}',
|
||||
L), ('\u{115bc}', '\u{115bd}', NSM), ('\u{115be}', '\u{115be}', L), ('\u{115bf}',
|
||||
'\u{115c0}', NSM), ('\u{115c1}', '\u{115db}', L), ('\u{115dc}', '\u{115dd}', NSM),
|
||||
('\u{11600}', '\u{11632}', L), ('\u{11633}', '\u{1163a}', NSM), ('\u{1163b}', '\u{1163c}',
|
||||
L), ('\u{1163d}', '\u{1163d}', NSM), ('\u{1163e}', '\u{1163e}', L), ('\u{1163f}',
|
||||
'\u{11640}', NSM), ('\u{11641}', '\u{11644}', L), ('\u{11650}', '\u{11659}', L),
|
||||
('\u{11680}', '\u{116aa}', L), ('\u{116ab}', '\u{116ab}', NSM), ('\u{116ac}', '\u{116ac}',
|
||||
L), ('\u{116ad}', '\u{116ad}', NSM), ('\u{116ae}', '\u{116af}', L), ('\u{116b0}',
|
||||
'\u{116b5}', NSM), ('\u{116b6}', '\u{116b6}', L), ('\u{116b7}', '\u{116b7}', NSM),
|
||||
('\u{116c0}', '\u{116c9}', L), ('\u{11700}', '\u{11719}', L), ('\u{1171d}', '\u{1171f}',
|
||||
NSM), ('\u{11720}', '\u{11721}', L), ('\u{11722}', '\u{11725}', NSM), ('\u{11726}',
|
||||
'\u{11726}', L), ('\u{11727}', '\u{1172b}', NSM), ('\u{11730}', '\u{1173f}', L),
|
||||
('\u{118a0}', '\u{118f2}', L), ('\u{118ff}', '\u{118ff}', L), ('\u{11ac0}', '\u{11af8}', L),
|
||||
('\u{12000}', '\u{12399}', L), ('\u{12400}', '\u{1246e}', L), ('\u{12470}', '\u{12474}', L),
|
||||
('\u{12480}', '\u{12543}', L), ('\u{13000}', '\u{1342e}', L), ('\u{14400}', '\u{14646}', L),
|
||||
('\u{16800}', '\u{16a38}', L), ('\u{16a40}', '\u{16a5e}', L), ('\u{16a60}', '\u{16a69}', L),
|
||||
('\u{16a6e}', '\u{16a6f}', L), ('\u{16ad0}', '\u{16aed}', L), ('\u{16af0}', '\u{16af4}',
|
||||
NSM), ('\u{16af5}', '\u{16af5}', L), ('\u{16b00}', '\u{16b2f}', L), ('\u{16b30}',
|
||||
'\u{16b36}', NSM), ('\u{16b37}', '\u{16b45}', L), ('\u{16b50}', '\u{16b59}', L),
|
||||
('\u{16b5b}', '\u{16b61}', L), ('\u{16b63}', '\u{16b77}', L), ('\u{16b7d}', '\u{16b8f}', L),
|
||||
('\u{16f00}', '\u{16f44}', L), ('\u{16f50}', '\u{16f7e}', L), ('\u{16f8f}', '\u{16f92}',
|
||||
NSM), ('\u{16f93}', '\u{16f9f}', L), ('\u{1b000}', '\u{1b001}', L), ('\u{1bc00}',
|
||||
'\u{1bc6a}', L), ('\u{1bc70}', '\u{1bc7c}', L), ('\u{1bc80}', '\u{1bc88}', L), ('\u{1bc90}',
|
||||
'\u{1bc99}', L), ('\u{1bc9c}', '\u{1bc9c}', L), ('\u{1bc9d}', '\u{1bc9e}', NSM),
|
||||
('\u{1bc9f}', '\u{1bc9f}', L), ('\u{1bca0}', '\u{1bca3}', BN), ('\u{1d000}', '\u{1d0f5}',
|
||||
L), ('\u{1d100}', '\u{1d126}', L), ('\u{1d129}', '\u{1d166}', L), ('\u{1d167}', '\u{1d169}',
|
||||
NSM), ('\u{1d16a}', '\u{1d172}', L), ('\u{1d173}', '\u{1d17a}', BN), ('\u{1d17b}',
|
||||
'\u{1d182}', NSM), ('\u{1d183}', '\u{1d184}', L), ('\u{1d185}', '\u{1d18b}', NSM),
|
||||
('\u{1d18c}', '\u{1d1a9}', L), ('\u{1d1aa}', '\u{1d1ad}', NSM), ('\u{1d1ae}', '\u{1d1e8}',
|
||||
L), ('\u{1d200}', '\u{1d241}', ON), ('\u{1d242}', '\u{1d244}', NSM), ('\u{1d245}',
|
||||
'\u{1d245}', ON), ('\u{1d300}', '\u{1d356}', ON), ('\u{1d360}', '\u{1d371}', L),
|
||||
('\u{1d400}', '\u{1d454}', L), ('\u{1d456}', '\u{1d49c}', L), ('\u{1d49e}', '\u{1d49f}', L),
|
||||
('\u{1d4a2}', '\u{1d4a2}', L), ('\u{1d4a5}', '\u{1d4a6}', L), ('\u{1d4a9}', '\u{1d4ac}', L),
|
||||
('\u{1d4ae}', '\u{1d4b9}', L), ('\u{1d4bb}', '\u{1d4bb}', L), ('\u{1d4bd}', '\u{1d4c3}', L),
|
||||
('\u{1d4c5}', '\u{1d505}', L), ('\u{1d507}', '\u{1d50a}', L), ('\u{1d50d}', '\u{1d514}', L),
|
||||
('\u{1d516}', '\u{1d51c}', L), ('\u{1d51e}', '\u{1d539}', L), ('\u{1d53b}', '\u{1d53e}', L),
|
||||
('\u{1d540}', '\u{1d544}', L), ('\u{1d546}', '\u{1d546}', L), ('\u{1d54a}', '\u{1d550}', L),
|
||||
('\u{1d552}', '\u{1d6a5}', L), ('\u{1d6a8}', '\u{1d6da}', L), ('\u{1d6db}', '\u{1d6db}',
|
||||
ON), ('\u{1d6dc}', '\u{1d714}', L), ('\u{1d715}', '\u{1d715}', ON), ('\u{1d716}',
|
||||
'\u{1d74e}', L), ('\u{1d74f}', '\u{1d74f}', ON), ('\u{1d750}', '\u{1d788}', L),
|
||||
('\u{1d789}', '\u{1d789}', ON), ('\u{1d78a}', '\u{1d7c2}', L), ('\u{1d7c3}', '\u{1d7c3}',
|
||||
ON), ('\u{1d7c4}', '\u{1d7cb}', L), ('\u{1d7ce}', '\u{1d7ff}', EN), ('\u{1d800}',
|
||||
'\u{1d9ff}', L), ('\u{1da00}', '\u{1da36}', NSM), ('\u{1da37}', '\u{1da3a}', L),
|
||||
('\u{1da3b}', '\u{1da6c}', NSM), ('\u{1da6d}', '\u{1da74}', L), ('\u{1da75}', '\u{1da75}',
|
||||
NSM), ('\u{1da76}', '\u{1da83}', L), ('\u{1da84}', '\u{1da84}', NSM), ('\u{1da85}',
|
||||
'\u{1da8b}', L), ('\u{1da9b}', '\u{1da9f}', NSM), ('\u{1daa1}', '\u{1daaf}', NSM),
|
||||
('\u{1e800}', '\u{1e8cf}', R), ('\u{1e8d0}', '\u{1e8d6}', NSM), ('\u{1e8d7}', '\u{1edff}',
|
||||
R), ('\u{1ee00}', '\u{1eeef}', AL), ('\u{1eef0}', '\u{1eef1}', ON), ('\u{1eef2}',
|
||||
'\u{1eeff}', AL), ('\u{1ef00}', '\u{1efff}', R), ('\u{1f000}', '\u{1f02b}', ON),
|
||||
('\u{1f030}', '\u{1f093}', ON), ('\u{1f0a0}', '\u{1f0ae}', ON), ('\u{1f0b1}', '\u{1f0bf}',
|
||||
ON), ('\u{1f0c1}', '\u{1f0cf}', ON), ('\u{1f0d1}', '\u{1f0f5}', ON), ('\u{1f100}',
|
||||
'\u{1f10a}', EN), ('\u{1f10b}', '\u{1f10c}', ON), ('\u{1f110}', '\u{1f12e}', L),
|
||||
('\u{1f130}', '\u{1f169}', L), ('\u{1f16a}', '\u{1f16b}', ON), ('\u{1f170}', '\u{1f19a}',
|
||||
L), ('\u{1f1e6}', '\u{1f202}', L), ('\u{1f210}', '\u{1f23a}', L), ('\u{1f240}', '\u{1f248}',
|
||||
L), ('\u{1f250}', '\u{1f251}', L), ('\u{1f300}', '\u{1f579}', ON), ('\u{1f57b}',
|
||||
'\u{1f5a3}', ON), ('\u{1f5a5}', '\u{1f6d0}', ON), ('\u{1f6e0}', '\u{1f6ec}', ON),
|
||||
('\u{1f6f0}', '\u{1f6f3}', ON), ('\u{1f700}', '\u{1f773}', ON), ('\u{1f780}', '\u{1f7d4}',
|
||||
ON), ('\u{1f800}', '\u{1f80b}', ON), ('\u{1f810}', '\u{1f847}', ON), ('\u{1f850}',
|
||||
'\u{1f859}', ON), ('\u{1f860}', '\u{1f887}', ON), ('\u{1f890}', '\u{1f8ad}', ON),
|
||||
('\u{1f910}', '\u{1f918}', ON), ('\u{1f980}', '\u{1f984}', ON), ('\u{1f9c0}', '\u{1f9c0}',
|
||||
ON), ('\u{20000}', '\u{2a6d6}', L), ('\u{2a700}', '\u{2b734}', L), ('\u{2b740}',
|
||||
'\u{2b81d}', L), ('\u{2b820}', '\u{2cea1}', L), ('\u{2f800}', '\u{2fa1d}', L), ('\u{e0001}',
|
||||
'\u{e0001}', BN), ('\u{e0020}', '\u{e007f}', BN), ('\u{e0100}', '\u{e01ef}', NSM),
|
||||
('\u{f0000}', '\u{ffffd}', L), ('\u{100000}', '\u{10fffd}', L)
|
||||
];
|
||||
|
220
third_party/rust/unicode-bidi/tools/generate.py
vendored
220
third_party/rust/unicode-bidi/tools/generate.py
vendored
@ -1,220 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
#
|
||||
# Based on src/etc/unicode.py from Rust 1.2.0.
|
||||
#
|
||||
# Copyright 2011-2013 The Rust Project Developers.
|
||||
# Copyright 2015 The Servo Project Developers. See the COPYRIGHT
|
||||
# file at the top-level directory of this distribution and at
|
||||
# http://rust-lang.org/COPYRIGHT.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
# option. This file may not be copied, modified, or distributed
|
||||
# except according to those terms.
|
||||
|
||||
|
||||
import fileinput, re, os, sys, operator
|
||||
|
||||
preamble = '''// NOTE:
|
||||
// The following code was generated by "tools/generate.py". do not edit directly
|
||||
|
||||
#![allow(missing_docs, non_upper_case_globals, non_snake_case)]
|
||||
'''
|
||||
|
||||
# these are the surrogate codepoints, which are not valid rust characters
|
||||
surrogate_codepoints = (0xd800, 0xdfff)
|
||||
|
||||
def fetch(f):
|
||||
if not os.path.exists(os.path.basename(f)):
|
||||
os.system("curl -O http://www.unicode.org/Public/UNIDATA/%s"
|
||||
% f)
|
||||
|
||||
if not os.path.exists(os.path.basename(f)):
|
||||
sys.stderr.write("cannot load %s" % f)
|
||||
exit(1)
|
||||
|
||||
def is_surrogate(n):
|
||||
return surrogate_codepoints[0] <= n <= surrogate_codepoints[1]
|
||||
|
||||
def load_unicode_data(f):
|
||||
fetch(f)
|
||||
udict = {};
|
||||
|
||||
range_start = -1;
|
||||
for line in fileinput.input(f):
|
||||
data = line.split(';');
|
||||
if len(data) != 15:
|
||||
continue
|
||||
cp = int(data[0], 16);
|
||||
if is_surrogate(cp):
|
||||
continue
|
||||
if range_start >= 0:
|
||||
for i in xrange(range_start, cp):
|
||||
udict[i] = data;
|
||||
range_start = -1;
|
||||
if data[1].endswith(", First>"):
|
||||
range_start = cp;
|
||||
continue;
|
||||
udict[cp] = data;
|
||||
|
||||
# Mapping of code point to Bidi_Class property:
|
||||
bidi_class = {}
|
||||
|
||||
for code in udict:
|
||||
[code_org, name, gencat, combine, bidi,
|
||||
decomp, deci, digit, num, mirror,
|
||||
old, iso, upcase, lowcase, titlecase ] = udict[code];
|
||||
|
||||
if bidi not in bidi_class:
|
||||
bidi_class[bidi] = []
|
||||
bidi_class[bidi].append(code)
|
||||
|
||||
# Default Bidi_Class for unassigned codepoints.
|
||||
# http://www.unicode.org/Public/UNIDATA/extracted/DerivedBidiClass.txt
|
||||
default_ranges = [
|
||||
(0x0600, 0x07BF, "AL"), (0x08A0, 0x08FF, "AL"),
|
||||
(0xFB50, 0xFDCF, "AL"), (0xFDF0, 0xFDFF, "AL"),
|
||||
(0xFE70, 0xFEFF, "AL"), (0x1EE00, 0x0001EEFF, "AL"),
|
||||
|
||||
(0x0590, 0x05FF, "R"), (0x07C0, 0x089F, "R"),
|
||||
(0xFB1D, 0xFB4F, "R"), (0x00010800, 0x00010FFF, "R"),
|
||||
(0x0001E800, 0x0001EDFF, "R"), (0x0001EF00, 0x0001EFFF, "R"),
|
||||
|
||||
(0x20A0, 0x20CF, "ET")]
|
||||
|
||||
for (start, end, default) in default_ranges:
|
||||
for code in range(start, end+1):
|
||||
if not code in udict:
|
||||
bidi_class[default].append(code)
|
||||
|
||||
bidi_class = group_cats(bidi_class)
|
||||
return bidi_class
|
||||
|
||||
def group_cats(cats):
|
||||
cats_out = []
|
||||
for cat in cats:
|
||||
cats_out.extend([(x, y, cat) for (x, y) in group_cat(cats[cat])])
|
||||
cats_out.sort(key=lambda w: w[0])
|
||||
return (sorted(cats.keys()), cats_out)
|
||||
|
||||
def group_cat(cat):
|
||||
cat_out = []
|
||||
letters = sorted(set(cat))
|
||||
cur_start = letters.pop(0)
|
||||
cur_end = cur_start
|
||||
for letter in letters:
|
||||
assert letter > cur_end, \
|
||||
"cur_end: %s, letter: %s" % (hex(cur_end), hex(letter))
|
||||
if letter == cur_end + 1:
|
||||
cur_end = letter
|
||||
else:
|
||||
cat_out.append((cur_start, cur_end))
|
||||
cur_start = cur_end = letter
|
||||
cat_out.append((cur_start, cur_end))
|
||||
return cat_out
|
||||
|
||||
def format_table_content(f, content, indent):
|
||||
line = " "*indent
|
||||
first = True
|
||||
for chunk in content.split(","):
|
||||
if len(line) + len(chunk) < 98:
|
||||
if first:
|
||||
line += chunk
|
||||
else:
|
||||
line += ", " + chunk
|
||||
first = False
|
||||
else:
|
||||
f.write(line + ",\n")
|
||||
line = " "*indent + chunk
|
||||
f.write(line)
|
||||
|
||||
def escape_char(c):
|
||||
return "'\\u{%x}'" % c
|
||||
|
||||
def emit_table(f, name, t_data, t_type = "&'static [(char, char)]", is_pub=True,
|
||||
pfun=lambda x: "(%s,%s)" % (escape_char(x[0]), escape_char(x[1]))):
|
||||
pub_string = ""
|
||||
if is_pub:
|
||||
pub_string = "pub "
|
||||
f.write(" %sconst %s: %s = &[\n" % (pub_string, name, t_type))
|
||||
data = ""
|
||||
first = True
|
||||
for dat in t_data:
|
||||
if not first:
|
||||
data += ","
|
||||
first = False
|
||||
data += pfun(dat)
|
||||
format_table_content(f, data, 8)
|
||||
f.write("\n ];\n\n")
|
||||
|
||||
def emit_bidi_module(f, bidi_class, cats):
|
||||
f.write("""pub use self::BidiClass::*;
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
/// Represents the Unicode character property **Bidi_Class**, also known as
|
||||
/// the *bidirectional character type*.
|
||||
///
|
||||
/// Use the `bidi_class` function to look up the BidiClass of a code point.
|
||||
///
|
||||
/// http://www.unicode.org/reports/tr9/#Bidirectional_Character_Types
|
||||
pub enum BidiClass {
|
||||
""")
|
||||
for cat in cats:
|
||||
f.write(" " + cat + ",\n")
|
||||
f.write(""" }
|
||||
|
||||
fn bsearch_range_value_table(c: char, r: &'static [(char, char, BidiClass)]) -> BidiClass {
|
||||
use ::std::cmp::Ordering::{Equal, Less, Greater};
|
||||
match r.binary_search_by(|&(lo, hi, _)| {
|
||||
if lo <= c && c <= hi { Equal }
|
||||
else if hi < c { Less }
|
||||
else { Greater }
|
||||
}) {
|
||||
Ok(idx) => {
|
||||
let (_, _, cat) = r[idx];
|
||||
cat
|
||||
}
|
||||
// UCD/extracted/DerivedBidiClass.txt: "All code points not explicitly listed
|
||||
// for Bidi_Class have the value Left_To_Right (L)."
|
||||
Err(_) => L
|
||||
}
|
||||
}
|
||||
|
||||
/// Find the BidiClass of a single char.
|
||||
pub fn bidi_class(c: char) -> BidiClass {
|
||||
bsearch_range_value_table(c, bidi_class_table)
|
||||
}
|
||||
|
||||
""")
|
||||
|
||||
emit_table(f, "bidi_class_table", bidi_class, "&'static [(char, char, BidiClass)]",
|
||||
pfun=lambda x: "(%s,%s,%s)" % (escape_char(x[0]), escape_char(x[1]), x[2]),
|
||||
is_pub=False)
|
||||
|
||||
if __name__ == "__main__":
|
||||
os.chdir("../src/") # changing download path to /unicode-bidi/src/
|
||||
r = "tables.rs"
|
||||
# downloading the test case files
|
||||
fetch("BidiTest.txt")
|
||||
fetch("BidiCharacterTest.txt")
|
||||
|
||||
if os.path.exists(r):
|
||||
os.remove(r)
|
||||
with open(r, "w") as rf:
|
||||
# write the file's preamble
|
||||
rf.write(preamble)
|
||||
|
||||
# download and parse all the data
|
||||
fetch("ReadMe.txt")
|
||||
with open("ReadMe.txt") as readme:
|
||||
pattern = "for Version (\d+)\.(\d+)\.(\d+) of the Unicode"
|
||||
unicode_version = re.search(pattern, readme.read()).groups()
|
||||
rf.write("""
|
||||
/// The version of [Unicode](http://www.unicode.org/)
|
||||
/// that the `bidi_class` function is based on.
|
||||
pub const UNICODE_VERSION: (u64, u64, u64) = (%s, %s, %s);
|
||||
""" % unicode_version)
|
||||
(bidi_cats, bidi_class) = load_unicode_data("UnicodeData.txt")
|
||||
emit_bidi_module(rf, bidi_class, bidi_cats)
|
@ -1 +0,0 @@
|
||||
{"files":{".cargo-ok":"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",".gitignore":"2282653731b0b47b955075a5a366b16c4ea092dbe7de4f9bb68a0e36abbb62f3",".travis.yml":"493704d6a0d0f27ad2ad6e950f8bce5f42d9ec4081daeb3c5a48066f1030f467","COPYRIGHT":"23860c2a7b5d96b21569afedf033469bab9fe14a1b24a35068b8641c578ce24d","Cargo.toml":"098ff7b940aedf372f29c54788e1a58173a54846c88fec5c627b3d74d013298c","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"7b63ecd5f1902af1b63729947373683c32745c16a10e8e6292e2e2dcd7e90ae0","README.md":"8a937b4305a42bfa5a2faf7baaaae8156d055537934a53eed2d3bc9a3f9da40a","scripts/unicode.py":"64a1b919ab0e251fdb1db8b9c5363d84227fca33ac8375268bb88b74c4462f8f","scripts/unicode_gen_normtests.py":"da891d433fa58068747a1cd121774435b7d486394ce5c85c8079b227d20ea507","src/DerivedNormalizationProps.txt":"6cc76100d402d7dd12bd769ac61158b91e71aad58c8333abb848f1b9c9fdde2d","src/ReadMe.txt":"e4dc6045cb89ddaca938ff7ffffc2c2aafc5d4fc85023fffb9b36d4aee47986b","src/UnicodeData.txt":"38b17e1118206489a7e0ab5d29d7932212d38838df7d3ec025ecb58e8798ec20","src/decompose.rs":"04818a6b0271412ec58508e44535b18c58c80384a5836fe5b2c24e489a5ab4cc","src/lib.rs":"d3cab5f68a71b640da82859d06d348a0c3050d611c17cc9cb66989492437a98c","src/normalize.rs":"6bcc92e3f2ee9922a017a7b7adc8dfd88de4bcf777974e07da3688559d0ec008","src/recompose.rs":"96b8aea91e09f1fa439467378f2d1fa2aa4c81c86e597d3d36a2a35d7750a0d6","src/tables.rs":"bc930c04ed78307e624a225ec27679cf3fad00ba13cb2f283963476cd449e84e","src/test.rs":"e47de49aeef9231f5ff177eeb9946618577fc67ed798e889b99710c877e92e1b","src/testdata.rs":"6a67ae0a072c7d65fca0d101f4f5831592ff24ab137f8e1c4f9524f2bdc8d63e"},"package":"26643a2f83bac55f1976fb716c10234485f9202dcd65cfbdf9da49867b271172"}
|
@ -1,4 +0,0 @@
|
||||
target
|
||||
Cargo.lock
|
||||
scripts/tmp
|
||||
*.pyc
|
@ -1,20 +0,0 @@
|
||||
language: rust
|
||||
sudo: false
|
||||
script:
|
||||
- cargo build --verbose
|
||||
- cargo test --verbose
|
||||
- rustdoc --test README.md -L target/debug -L target/debug/deps
|
||||
- cargo doc
|
||||
after_success: |
|
||||
[ $TRAVIS_BRANCH = master ] &&
|
||||
[ $TRAVIS_PULL_REQUEST = false ] &&
|
||||
echo '<meta http-equiv=refresh content=0;url=unicode_normalization/index.html>' > target/doc/index.html &&
|
||||
pip install ghp-import --user $USER &&
|
||||
$HOME/.local/bin/ghp-import -n target/doc &&
|
||||
git push -qf https://${TOKEN}@github.com/${TRAVIS_REPO_SLUG}.git gh-pages
|
||||
env:
|
||||
global:
|
||||
secure: Te91dtDN8uv5OBxVuMy+nvQ5GtnLU9r6amS9p6IbblVXyzXgXPQdFfAND+GXXfZNnsjAyS2LnZL4NcNOR4JF63H0KxTTDIGXLSUAmc0C98UhqWWvv5bjz4mY0YKs9MwuTBX9P0LdTZjvRTd+yJ2PYH7ORGen+ZuOmlPNE7lpzrg=
|
||||
notifications:
|
||||
email:
|
||||
on_success: never
|
@ -1,7 +0,0 @@
|
||||
Licensed under the Apache License, Version 2.0
|
||||
<LICENSE-APACHE or
|
||||
http://www.apache.org/licenses/LICENSE-2.0> or the MIT
|
||||
license <LICENSE-MIT or http://opensource.org/licenses/MIT>,
|
||||
at your option. All files in the project carrying such
|
||||
notice may not be copied, modified, or distributed except
|
||||
according to those terms.
|
@ -1,21 +0,0 @@
|
||||
[package]
|
||||
|
||||
name = "unicode-normalization"
|
||||
version = "0.1.2"
|
||||
authors = ["kwantam <kwantam@gmail.com>"]
|
||||
|
||||
homepage = "https://github.com/unicode-rs/unicode-normalization"
|
||||
repository = "https://github.com/unicode-rs/unicode-normalization"
|
||||
documentation = "https://unicode-rs.github.io/unicode-normalization"
|
||||
|
||||
license = "MIT/Apache-2.0"
|
||||
keywords = ["text", "unicode", "normalization", "decomposition", "recomposition"]
|
||||
readme = "README.md"
|
||||
description = """
|
||||
This crate provides functions for normalization of
|
||||
Unicode strings, including Canonical and Compatible
|
||||
Decomposition and Recomposition, as described in
|
||||
Unicode Standard Annex #15.
|
||||
"""
|
||||
|
||||
exclude = [ "target/*", "Cargo.lock", "scripts/tmp" ]
|
@ -1,201 +0,0 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
@ -1,25 +0,0 @@
|
||||
Copyright (c) 2015 The Rust Project Developers
|
||||
|
||||
Permission is hereby granted, free of charge, to any
|
||||
person obtaining a copy of this software and associated
|
||||
documentation files (the "Software"), to deal in the
|
||||
Software without restriction, including without
|
||||
limitation the rights to use, copy, modify, merge,
|
||||
publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software
|
||||
is furnished to do so, subject to the following
|
||||
conditions:
|
||||
|
||||
The above copyright notice and this permission notice
|
||||
shall be included in all copies or substantial portions
|
||||
of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
||||
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
|
||||
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
|
||||
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
32
third_party/rust/unicode-normalization/README.md
vendored
32
third_party/rust/unicode-normalization/README.md
vendored
@ -1,32 +0,0 @@
|
||||
Unicode character composition and decomposition utilities
|
||||
as described in
|
||||
[Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/).
|
||||
|
||||
[![Build Status](https://travis-ci.org/unicode-rs/unicode-normalization.svg)](https://travis-ci.org/unicode-rs/unicode-normalization)
|
||||
|
||||
[Documentation](https://unicode-rs.github.io/unicode-normalization/unicode_normalization/index.html)
|
||||
|
||||
```rust
|
||||
extern crate unicode_normalization;
|
||||
|
||||
use unicode_normalization::char::compose;
|
||||
use unicode_normalization::UnicodeNormalization;
|
||||
|
||||
fn main() {
|
||||
assert_eq!(compose('A','\u{30a}'), Some('Å'));
|
||||
|
||||
let s = "ÅΩ";
|
||||
let c = s.nfc().collect::<String>();
|
||||
assert_eq!(c, "ÅΩ");
|
||||
}
|
||||
```
|
||||
|
||||
# crates.io
|
||||
|
||||
You can use this package in your project by adding the following
|
||||
to your `Cargo.toml`:
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
unicode-normalization = "0.1.1"
|
||||
```
|
@ -1,372 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright 2011-2015 The Rust Project Developers. See the COPYRIGHT
|
||||
# file at the top-level directory of this distribution and at
|
||||
# http://rust-lang.org/COPYRIGHT.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
# option. This file may not be copied, modified, or distributed
|
||||
# except according to those terms.
|
||||
|
||||
# This script uses the following Unicode tables:
|
||||
# - DerivedNormalizationProps.txt
|
||||
# - ReadMe.txt
|
||||
# - UnicodeData.txt
|
||||
#
|
||||
# Since this should not require frequent updates, we just store this
|
||||
# out-of-line and check the unicode.rs file into git.
|
||||
|
||||
import fileinput, re, os, sys
|
||||
|
||||
preamble = '''// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
// NOTE: The following code was generated by "scripts/unicode.py", do not edit directly
|
||||
|
||||
#![allow(missing_docs, non_upper_case_globals, non_snake_case)]
|
||||
'''
|
||||
|
||||
# Mapping taken from Table 12 from:
|
||||
# http://www.unicode.org/reports/tr44/#General_Category_Values
|
||||
expanded_categories = {
|
||||
'Lu': ['LC', 'L'], 'Ll': ['LC', 'L'], 'Lt': ['LC', 'L'],
|
||||
'Lm': ['L'], 'Lo': ['L'],
|
||||
'Mn': ['M'], 'Mc': ['M'], 'Me': ['M'],
|
||||
'Nd': ['N'], 'Nl': ['N'], 'No': ['No'],
|
||||
'Pc': ['P'], 'Pd': ['P'], 'Ps': ['P'], 'Pe': ['P'],
|
||||
'Pi': ['P'], 'Pf': ['P'], 'Po': ['P'],
|
||||
'Sm': ['S'], 'Sc': ['S'], 'Sk': ['S'], 'So': ['S'],
|
||||
'Zs': ['Z'], 'Zl': ['Z'], 'Zp': ['Z'],
|
||||
'Cc': ['C'], 'Cf': ['C'], 'Cs': ['C'], 'Co': ['C'], 'Cn': ['C'],
|
||||
}
|
||||
|
||||
# these are the surrogate codepoints, which are not valid rust characters
|
||||
surrogate_codepoints = (0xd800, 0xdfff)
|
||||
|
||||
def fetch(f):
|
||||
if not os.path.exists(os.path.basename(f)):
|
||||
os.system("curl -O http://www.unicode.org/Public/UNIDATA/%s"
|
||||
% f)
|
||||
|
||||
if not os.path.exists(os.path.basename(f)):
|
||||
sys.stderr.write("cannot load %s" % f)
|
||||
exit(1)
|
||||
|
||||
def is_surrogate(n):
|
||||
return surrogate_codepoints[0] <= n <= surrogate_codepoints[1]
|
||||
|
||||
def load_unicode_data(f):
|
||||
fetch(f)
|
||||
combines = {}
|
||||
canon_decomp = {}
|
||||
compat_decomp = {}
|
||||
general_category_mark = []
|
||||
|
||||
udict = {};
|
||||
range_start = -1;
|
||||
for line in fileinput.input(f):
|
||||
data = line.split(';');
|
||||
if len(data) != 15:
|
||||
continue
|
||||
cp = int(data[0], 16);
|
||||
if is_surrogate(cp):
|
||||
continue
|
||||
if range_start >= 0:
|
||||
for i in xrange(range_start, cp):
|
||||
udict[i] = data;
|
||||
range_start = -1;
|
||||
if data[1].endswith(", First>"):
|
||||
range_start = cp;
|
||||
continue;
|
||||
udict[cp] = data;
|
||||
|
||||
for code in udict:
|
||||
[code_org, name, gencat, combine, bidi,
|
||||
decomp, deci, digit, num, mirror,
|
||||
old, iso, upcase, lowcase, titlecase ] = udict[code];
|
||||
|
||||
# store decomposition, if given
|
||||
if decomp != "":
|
||||
if decomp.startswith('<'):
|
||||
seq = []
|
||||
for i in decomp.split()[1:]:
|
||||
seq.append(int(i, 16))
|
||||
compat_decomp[code] = seq
|
||||
else:
|
||||
seq = []
|
||||
for i in decomp.split():
|
||||
seq.append(int(i, 16))
|
||||
canon_decomp[code] = seq
|
||||
|
||||
# record combining class, if any
|
||||
if combine != "0":
|
||||
if combine not in combines:
|
||||
combines[combine] = []
|
||||
combines[combine].append(code)
|
||||
|
||||
if 'M' in [gencat] + expanded_categories.get(gencat, []):
|
||||
general_category_mark.append(code)
|
||||
general_category_mark = group_cat(general_category_mark)
|
||||
|
||||
combines = to_combines(group_cats(combines))
|
||||
|
||||
return (canon_decomp, compat_decomp, combines, general_category_mark)
|
||||
|
||||
def group_cats(cats):
|
||||
cats_out = {}
|
||||
for cat in cats:
|
||||
cats_out[cat] = group_cat(cats[cat])
|
||||
return cats_out
|
||||
|
||||
def group_cat(cat):
|
||||
cat_out = []
|
||||
letters = sorted(set(cat))
|
||||
cur_start = letters.pop(0)
|
||||
cur_end = cur_start
|
||||
for letter in letters:
|
||||
assert letter > cur_end, \
|
||||
"cur_end: %s, letter: %s" % (hex(cur_end), hex(letter))
|
||||
if letter == cur_end + 1:
|
||||
cur_end = letter
|
||||
else:
|
||||
cat_out.append((cur_start, cur_end))
|
||||
cur_start = cur_end = letter
|
||||
cat_out.append((cur_start, cur_end))
|
||||
return cat_out
|
||||
|
||||
def ungroup_cat(cat):
|
||||
cat_out = []
|
||||
for (lo, hi) in cat:
|
||||
while lo <= hi:
|
||||
cat_out.append(lo)
|
||||
lo += 1
|
||||
return cat_out
|
||||
|
||||
def to_combines(combs):
|
||||
combs_out = []
|
||||
for comb in combs:
|
||||
for (lo, hi) in combs[comb]:
|
||||
combs_out.append((lo, hi, comb))
|
||||
combs_out.sort(key=lambda comb: comb[0])
|
||||
return combs_out
|
||||
|
||||
def format_table_content(f, content, indent):
|
||||
line = " "*indent
|
||||
first = True
|
||||
for chunk in content.split(","):
|
||||
if len(line) + len(chunk) < 98:
|
||||
if first:
|
||||
line += chunk
|
||||
else:
|
||||
line += ", " + chunk
|
||||
first = False
|
||||
else:
|
||||
f.write(line + ",\n")
|
||||
line = " "*indent + chunk
|
||||
f.write(line)
|
||||
|
||||
def load_properties(f, interestingprops):
|
||||
fetch(f)
|
||||
props = {}
|
||||
re1 = re.compile("^ *([0-9A-F]+) *; *(\w+)")
|
||||
re2 = re.compile("^ *([0-9A-F]+)\.\.([0-9A-F]+) *; *(\w+)")
|
||||
|
||||
for line in fileinput.input(os.path.basename(f)):
|
||||
prop = None
|
||||
d_lo = 0
|
||||
d_hi = 0
|
||||
m = re1.match(line)
|
||||
if m:
|
||||
d_lo = m.group(1)
|
||||
d_hi = m.group(1)
|
||||
prop = m.group(2)
|
||||
else:
|
||||
m = re2.match(line)
|
||||
if m:
|
||||
d_lo = m.group(1)
|
||||
d_hi = m.group(2)
|
||||
prop = m.group(3)
|
||||
else:
|
||||
continue
|
||||
if interestingprops and prop not in interestingprops:
|
||||
continue
|
||||
d_lo = int(d_lo, 16)
|
||||
d_hi = int(d_hi, 16)
|
||||
if prop not in props:
|
||||
props[prop] = []
|
||||
props[prop].append((d_lo, d_hi))
|
||||
|
||||
# optimize if possible
|
||||
for prop in props:
|
||||
props[prop] = group_cat(ungroup_cat(props[prop]))
|
||||
|
||||
return props
|
||||
|
||||
def escape_char(c):
|
||||
return "'\\u{%x}'" % c
|
||||
|
||||
def emit_table(f, name, t_data, t_type = "&'static [(char, char)]", is_pub=True,
|
||||
pfun=lambda x: "(%s,%s)" % (escape_char(x[0]), escape_char(x[1]))):
|
||||
pub_string = ""
|
||||
if is_pub:
|
||||
pub_string = "pub "
|
||||
f.write(" %sconst %s: %s = &[\n" % (pub_string, name, t_type))
|
||||
data = ""
|
||||
first = True
|
||||
for dat in t_data:
|
||||
if not first:
|
||||
data += ","
|
||||
first = False
|
||||
data += pfun(dat)
|
||||
format_table_content(f, data, 8)
|
||||
f.write("\n ];\n\n")
|
||||
|
||||
def emit_norm_module(f, canon, compat, combine, norm_props, general_category_mark):
|
||||
canon_keys = canon.keys()
|
||||
canon_keys.sort()
|
||||
|
||||
compat_keys = compat.keys()
|
||||
compat_keys.sort()
|
||||
|
||||
canon_comp = {}
|
||||
comp_exclusions = norm_props["Full_Composition_Exclusion"]
|
||||
for char in canon_keys:
|
||||
if True in map(lambda (lo, hi): lo <= char <= hi, comp_exclusions):
|
||||
continue
|
||||
decomp = canon[char]
|
||||
if len(decomp) == 2:
|
||||
if not canon_comp.has_key(decomp[0]):
|
||||
canon_comp[decomp[0]] = []
|
||||
canon_comp[decomp[0]].append( (decomp[1], char) )
|
||||
canon_comp_keys = canon_comp.keys()
|
||||
canon_comp_keys.sort()
|
||||
|
||||
f.write("pub mod normalization {\n")
|
||||
|
||||
def mkdata_fun(table):
|
||||
def f(char):
|
||||
data = "(%s,&[" % escape_char(char)
|
||||
first = True
|
||||
for d in table[char]:
|
||||
if not first:
|
||||
data += ","
|
||||
first = False
|
||||
data += escape_char(d)
|
||||
data += "])"
|
||||
return data
|
||||
return f
|
||||
|
||||
f.write(" // Canonical decompositions\n")
|
||||
emit_table(f, "canonical_table", canon_keys, "&'static [(char, &'static [char])]",
|
||||
pfun=mkdata_fun(canon))
|
||||
|
||||
f.write(" // Compatibility decompositions\n")
|
||||
emit_table(f, "compatibility_table", compat_keys, "&'static [(char, &'static [char])]",
|
||||
pfun=mkdata_fun(compat))
|
||||
|
||||
def comp_pfun(char):
|
||||
data = "(%s,&[" % escape_char(char)
|
||||
canon_comp[char].sort(lambda x, y: x[0] - y[0])
|
||||
first = True
|
||||
for pair in canon_comp[char]:
|
||||
if not first:
|
||||
data += ","
|
||||
first = False
|
||||
data += "(%s,%s)" % (escape_char(pair[0]), escape_char(pair[1]))
|
||||
data += "])"
|
||||
return data
|
||||
|
||||
f.write(" // Canonical compositions\n")
|
||||
emit_table(f, "composition_table", canon_comp_keys,
|
||||
"&'static [(char, &'static [(char, char)])]", pfun=comp_pfun)
|
||||
|
||||
f.write("""
|
||||
fn bsearch_range_value_table(c: char, r: &'static [(char, char, u8)]) -> u8 {
|
||||
use std::cmp::Ordering::{Equal, Less, Greater};
|
||||
match r.binary_search_by(|&(lo, hi, _)| {
|
||||
if lo <= c && c <= hi { Equal }
|
||||
else if hi < c { Less }
|
||||
else { Greater }
|
||||
}) {
|
||||
Ok(idx) => {
|
||||
let (_, _, result) = r[idx];
|
||||
result
|
||||
}
|
||||
Err(_) => 0
|
||||
}
|
||||
}\n
|
||||
""")
|
||||
|
||||
emit_table(f, "combining_class_table", combine, "&'static [(char, char, u8)]", is_pub=False,
|
||||
pfun=lambda x: "(%s,%s,%s)" % (escape_char(x[0]), escape_char(x[1]), x[2]))
|
||||
|
||||
f.write(" pub fn canonical_combining_class(c: char) -> u8 {\n"
|
||||
+ " bsearch_range_value_table(c, combining_class_table)\n"
|
||||
+ " }\n")
|
||||
|
||||
f.write("""
|
||||
fn bsearch_range_table(c: char, r: &'static [(char, char)]) -> bool {
|
||||
use std::cmp::Ordering::{Equal, Less, Greater};
|
||||
r.binary_search_by(|&(lo, hi)| {
|
||||
if lo <= c && c <= hi {
|
||||
Equal
|
||||
} else if hi < c {
|
||||
Less
|
||||
} else {
|
||||
Greater
|
||||
}
|
||||
})
|
||||
.is_ok()
|
||||
}
|
||||
|
||||
/// Return whether the given character is a combining mark (`General_Category=Mark`)
|
||||
pub fn is_combining_mark(c: char) -> bool {
|
||||
bsearch_range_table(c, general_category_mark)
|
||||
}
|
||||
|
||||
""")
|
||||
|
||||
emit_table(f, "general_category_mark", combine, "&'static [(char, char)]", is_pub=False,
|
||||
pfun=lambda x: "(%s,%s)" % (escape_char(x[0]), escape_char(x[1])))
|
||||
|
||||
f.write("""
|
||||
}
|
||||
|
||||
""")
|
||||
|
||||
if __name__ == "__main__":
|
||||
r = "tables.rs"
|
||||
if os.path.exists(r):
|
||||
os.remove(r)
|
||||
with open(r, "w") as rf:
|
||||
# write the file's preamble
|
||||
rf.write(preamble)
|
||||
|
||||
# download and parse all the data
|
||||
fetch("ReadMe.txt")
|
||||
with open("ReadMe.txt") as readme:
|
||||
pattern = "for Version (\d+)\.(\d+)\.(\d+) of the Unicode"
|
||||
unicode_version = re.search(pattern, readme.read()).groups()
|
||||
rf.write("""
|
||||
/// The version of [Unicode](http://www.unicode.org/)
|
||||
/// that this version of unicode-normalization is based on.
|
||||
pub const UNICODE_VERSION: (u64, u64, u64) = (%s, %s, %s);
|
||||
|
||||
""" % unicode_version)
|
||||
(canon_decomp, compat_decomp, combines, general_category_mark) = \
|
||||
load_unicode_data("UnicodeData.txt")
|
||||
norm_props = load_properties("DerivedNormalizationProps.txt",
|
||||
["Full_Composition_Exclusion"])
|
||||
|
||||
# normalizations and conversions module
|
||||
emit_norm_module(rf, canon_decomp, compat_decomp, combines, norm_props,
|
||||
general_category_mark)
|
@ -1,81 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright 2015 The Rust Project Developers. See the COPYRIGHT
|
||||
# file at the top-level directory of this distribution and at
|
||||
# http://rust-lang.org/COPYRIGHT.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
# option. This file may not be copied, modified, or distributed
|
||||
# except according to those terms.
|
||||
|
||||
# This script uses the following Unicode tables:
|
||||
# - NormalizationTest.txt
|
||||
#
|
||||
# Since this should not require frequent updates, we just store this
|
||||
# out-of-line and check the unicode.rs file into git.
|
||||
|
||||
import unicode, re, os, fileinput
|
||||
|
||||
def load_test_data(f):
|
||||
outls = []
|
||||
testRe = re.compile("^(.*?);(.*?);(.*?);(.*?);(.*?);\s+#.*$")
|
||||
|
||||
unicode.fetch(f)
|
||||
for line in fileinput.input(os.path.basename(f)):
|
||||
# comment and header lines start with # and @ respectively
|
||||
if len(line) < 1 or line[0:1] == '#' or line[0:1] == '@':
|
||||
continue
|
||||
|
||||
m = testRe.match(line)
|
||||
groups = []
|
||||
if not m:
|
||||
print "error: no match on line where test was expected: %s" % line
|
||||
continue
|
||||
|
||||
has_surrogates = False
|
||||
for i in range(1, 6):
|
||||
group = []
|
||||
chs = m.group(i).split()
|
||||
for ch in chs:
|
||||
intch = int(ch,16)
|
||||
if unicode.is_surrogate(intch):
|
||||
has_surrogates = True
|
||||
break
|
||||
group.append(intch)
|
||||
|
||||
if has_surrogates:
|
||||
break
|
||||
groups.append(group)
|
||||
|
||||
if has_surrogates:
|
||||
continue
|
||||
outls.append(groups)
|
||||
|
||||
return outls
|
||||
|
||||
def showfun(gs):
|
||||
outstr = '('
|
||||
gfirst = True
|
||||
for g in gs:
|
||||
if not gfirst:
|
||||
outstr += ','
|
||||
gfirst = False
|
||||
|
||||
outstr += '"'
|
||||
for ch in g:
|
||||
outstr += "\\u{%x}" % ch
|
||||
outstr += '"'
|
||||
outstr += ')'
|
||||
return outstr
|
||||
|
||||
if __name__ == "__main__":
|
||||
d = load_test_data("NormalizationTest.txt")
|
||||
ntype = "&'static [(&'static str, &'static str, &'static str, &'static str, &'static str)]"
|
||||
with open("testdata.rs", "w") as nf:
|
||||
nf.write(unicode.preamble)
|
||||
nf.write("\n")
|
||||
nf.write(" // official Unicode test data\n")
|
||||
nf.write(" // http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt\n")
|
||||
unicode.emit_table(nf, "TEST_NORM", d, ntype, True, showfun)
|
File diff suppressed because it is too large
Load Diff
@ -1,17 +0,0 @@
|
||||
# Date: 2015-06-16, 20:24:00 GMT [KW]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2015 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
# For documentation, see the following:
|
||||
# NamesList.html
|
||||
# UAX #38, "Unicode Han Database (Unihan)"
|
||||
# UAX #44, "Unicode Character Database."
|
||||
#
|
||||
# The UAXes can be accessed at http://www.unicode.org/versions/Unicode8.0.0/
|
||||
|
||||
This directory contains the final data files
|
||||
for the Unicode Character Database, for Version 8.0.0 of the Unicode
|
||||
Standard.
|
||||
|
29215
third_party/rust/unicode-normalization/src/UnicodeData.txt
vendored
29215
third_party/rust/unicode-normalization/src/UnicodeData.txt
vendored
File diff suppressed because it is too large
Load Diff
@ -1,135 +0,0 @@
|
||||
// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
|
||||
// Helper functions used for Unicode normalization
|
||||
fn canonical_sort(comb: &mut [(char, u8)]) {
|
||||
let len = comb.len();
|
||||
for i in 0..len {
|
||||
let mut swapped = false;
|
||||
for j in 1..len-i {
|
||||
let class_a = comb[j-1].1;
|
||||
let class_b = comb[j].1;
|
||||
if class_a != 0 && class_b != 0 && class_a > class_b {
|
||||
comb.swap(j-1, j);
|
||||
swapped = true;
|
||||
}
|
||||
}
|
||||
if !swapped { break; }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
enum DecompositionType {
|
||||
Canonical,
|
||||
Compatible
|
||||
}
|
||||
|
||||
/// External iterator for a string decomposition's characters.
|
||||
#[derive(Clone)]
|
||||
pub struct Decompositions<I> {
|
||||
kind: DecompositionType,
|
||||
iter: I,
|
||||
buffer: Vec<(char, u8)>,
|
||||
sorted: bool
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn new_canonical<I: Iterator<Item=char>>(iter: I) -> Decompositions<I> {
|
||||
Decompositions {
|
||||
iter: iter,
|
||||
buffer: Vec::new(),
|
||||
sorted: false,
|
||||
kind: self::DecompositionType::Canonical,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn new_compatible<I: Iterator<Item=char>>(iter: I) -> Decompositions<I> {
|
||||
Decompositions {
|
||||
iter: iter,
|
||||
buffer: Vec::new(),
|
||||
sorted: false,
|
||||
kind: self::DecompositionType::Compatible,
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: Iterator<Item=char>> Iterator for Decompositions<I> {
|
||||
type Item = char;
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<char> {
|
||||
use self::DecompositionType::*;
|
||||
|
||||
match self.buffer.first() {
|
||||
Some(&(c, 0)) => {
|
||||
self.sorted = false;
|
||||
self.buffer.remove(0);
|
||||
return Some(c);
|
||||
}
|
||||
Some(&(c, _)) if self.sorted => {
|
||||
self.buffer.remove(0);
|
||||
return Some(c);
|
||||
}
|
||||
_ => self.sorted = false
|
||||
}
|
||||
|
||||
if !self.sorted {
|
||||
for ch in self.iter.by_ref() {
|
||||
let buffer = &mut self.buffer;
|
||||
let sorted = &mut self.sorted;
|
||||
{
|
||||
let callback = |d| {
|
||||
let class =
|
||||
super::char::canonical_combining_class(d);
|
||||
if class == 0 && !*sorted {
|
||||
canonical_sort(buffer);
|
||||
*sorted = true;
|
||||
}
|
||||
buffer.push((d, class));
|
||||
};
|
||||
match self.kind {
|
||||
Canonical => {
|
||||
super::char::decompose_canonical(ch, callback)
|
||||
}
|
||||
Compatible => {
|
||||
super::char::decompose_compatible(ch, callback)
|
||||
}
|
||||
}
|
||||
}
|
||||
if *sorted {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !self.sorted {
|
||||
canonical_sort(&mut self.buffer);
|
||||
self.sorted = true;
|
||||
}
|
||||
|
||||
if self.buffer.is_empty() {
|
||||
None
|
||||
} else {
|
||||
match self.buffer.remove(0) {
|
||||
(c, 0) => {
|
||||
self.sorted = false;
|
||||
Some(c)
|
||||
}
|
||||
(c, _) => Some(c),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
let (lower, _) = self.iter.size_hint();
|
||||
(lower, None)
|
||||
}
|
||||
}
|
138
third_party/rust/unicode-normalization/src/lib.rs
vendored
138
third_party/rust/unicode-normalization/src/lib.rs
vendored
@ -1,138 +0,0 @@
|
||||
// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
//! Unicode character composition and decomposition utilities
|
||||
//! as described in
|
||||
//! [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/).
|
||||
//!
|
||||
//! ```rust
|
||||
//! extern crate unicode_normalization;
|
||||
//!
|
||||
//! use unicode_normalization::char::compose;
|
||||
//! use unicode_normalization::UnicodeNormalization;
|
||||
//!
|
||||
//! fn main() {
|
||||
//! assert_eq!(compose('A','\u{30a}'), Some('Å'));
|
||||
//!
|
||||
//! let s = "ÅΩ";
|
||||
//! let c = s.nfc().collect::<String>();
|
||||
//! assert_eq!(c, "ÅΩ");
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! # crates.io
|
||||
//!
|
||||
//! You can use this package in your project by adding the following
|
||||
//! to your `Cargo.toml`:
|
||||
//!
|
||||
//! ```toml
|
||||
//! [dependencies]
|
||||
//! unicode-normalization = "0.1.0"
|
||||
//! ```
|
||||
|
||||
#![deny(missing_docs, unsafe_code)]
|
||||
#![doc(html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png",
|
||||
html_favicon_url = "https://unicode-rs.github.io/unicode-rs_sm.png")]
|
||||
|
||||
pub use tables::UNICODE_VERSION;
|
||||
pub use decompose::Decompositions;
|
||||
pub use recompose::Recompositions;
|
||||
use std::str::Chars;
|
||||
|
||||
mod decompose;
|
||||
mod normalize;
|
||||
mod recompose;
|
||||
mod tables;
|
||||
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
#[cfg(test)]
|
||||
mod testdata;
|
||||
|
||||
/// Methods for composing and decomposing characters.
|
||||
pub mod char {
|
||||
pub use normalize::{decompose_canonical, decompose_compatible, compose};
|
||||
|
||||
/// Look up the canonical combining class of a character.
|
||||
pub use tables::normalization::canonical_combining_class;
|
||||
|
||||
/// Return whether the given character is a combining mark (`General_Category=Mark`)
|
||||
pub use tables::normalization::is_combining_mark;
|
||||
}
|
||||
|
||||
|
||||
/// Methods for iterating over strings while applying Unicode normalizations
|
||||
/// as described in
|
||||
/// [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/).
|
||||
pub trait UnicodeNormalization<I: Iterator<Item=char>> {
|
||||
/// Returns an iterator over the string in Unicode Normalization Form D
|
||||
/// (canonical decomposition).
|
||||
#[inline]
|
||||
fn nfd(self) -> Decompositions<I>;
|
||||
|
||||
/// Returns an iterator over the string in Unicode Normalization Form KD
|
||||
/// (compatibility decomposition).
|
||||
#[inline]
|
||||
fn nfkd(self) -> Decompositions<I>;
|
||||
|
||||
/// An Iterator over the string in Unicode Normalization Form C
|
||||
/// (canonical decomposition followed by canonical composition).
|
||||
#[inline]
|
||||
fn nfc(self) -> Recompositions<I>;
|
||||
|
||||
/// An Iterator over the string in Unicode Normalization Form KC
|
||||
/// (compatibility decomposition followed by canonical composition).
|
||||
#[inline]
|
||||
fn nfkc(self) -> Recompositions<I>;
|
||||
}
|
||||
|
||||
impl<'a> UnicodeNormalization<Chars<'a>> for &'a str {
|
||||
#[inline]
|
||||
fn nfd(self) -> Decompositions<Chars<'a>> {
|
||||
decompose::new_canonical(self.chars())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn nfkd(self) -> Decompositions<Chars<'a>> {
|
||||
decompose::new_compatible(self.chars())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn nfc(self) -> Recompositions<Chars<'a>> {
|
||||
recompose::new_canonical(self.chars())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn nfkc(self) -> Recompositions<Chars<'a>> {
|
||||
recompose::new_compatible(self.chars())
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: Iterator<Item=char>> UnicodeNormalization<I> for I {
|
||||
#[inline]
|
||||
fn nfd(self) -> Decompositions<I> {
|
||||
decompose::new_canonical(self)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn nfkd(self) -> Decompositions<I> {
|
||||
decompose::new_compatible(self)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn nfc(self) -> Recompositions<I> {
|
||||
recompose::new_canonical(self)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn nfkc(self) -> Recompositions<I> {
|
||||
recompose::new_compatible(self)
|
||||
}
|
||||
}
|
@ -1,158 +0,0 @@
|
||||
// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
//! Functions for computing canonical and compatible decompositions for Unicode characters.
|
||||
|
||||
use std::cmp::Ordering::{Equal, Less, Greater};
|
||||
use std::ops::FnMut;
|
||||
use tables::normalization::{canonical_table, compatibility_table, composition_table};
|
||||
|
||||
fn bsearch_table<T>(c: char, r: &'static [(char, &'static [T])]) -> Option<&'static [T]> {
|
||||
match r.binary_search_by(|&(val, _)| {
|
||||
if c == val { Equal }
|
||||
else if val < c { Less }
|
||||
else { Greater }
|
||||
}) {
|
||||
Ok(idx) => {
|
||||
let (_, result) = r[idx];
|
||||
Some(result)
|
||||
}
|
||||
Err(_) => None
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute canonical Unicode decomposition for character.
|
||||
/// See [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/)
|
||||
/// for more information.
|
||||
pub fn decompose_canonical<F>(c: char, mut i: F) where F: FnMut(char) { d(c, &mut i, false); }
|
||||
|
||||
/// Compute canonical or compatible Unicode decomposition for character.
|
||||
/// See [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/)
|
||||
/// for more information.
|
||||
pub fn decompose_compatible<F>(c: char, mut i: F) where F: FnMut(char) { d(c, &mut i, true); }
|
||||
|
||||
// FIXME(#19596) This is a workaround, we should use `F` instead of `&mut F`
|
||||
fn d<F>(c: char, i: &mut F, k: bool) where F: FnMut(char) {
|
||||
// 7-bit ASCII never decomposes
|
||||
if c <= '\x7f' { (*i)(c); return; }
|
||||
|
||||
// Perform decomposition for Hangul
|
||||
if (c as u32) >= S_BASE && (c as u32) < (S_BASE + S_COUNT) {
|
||||
decompose_hangul(c, i);
|
||||
return;
|
||||
}
|
||||
|
||||
// First check the canonical decompositions
|
||||
match bsearch_table(c, canonical_table) {
|
||||
Some(canon) => {
|
||||
for x in canon {
|
||||
d(*x, i, k);
|
||||
}
|
||||
return;
|
||||
}
|
||||
None => ()
|
||||
}
|
||||
|
||||
// Bottom out if we're not doing compat.
|
||||
if !k { (*i)(c); return; }
|
||||
|
||||
// Then check the compatibility decompositions
|
||||
match bsearch_table(c, compatibility_table) {
|
||||
Some(compat) => {
|
||||
for x in compat {
|
||||
d(*x, i, k);
|
||||
}
|
||||
return;
|
||||
}
|
||||
None => ()
|
||||
}
|
||||
|
||||
// Finally bottom out.
|
||||
(*i)(c);
|
||||
}
|
||||
|
||||
/// Compose two characters into a single character, if possible.
|
||||
/// See [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/)
|
||||
/// for more information.
|
||||
pub fn compose(a: char, b: char) -> Option<char> {
|
||||
compose_hangul(a, b).or_else(|| {
|
||||
match bsearch_table(a, composition_table) {
|
||||
None => None,
|
||||
Some(candidates) => {
|
||||
match candidates.binary_search_by(|&(val, _)| {
|
||||
if b == val { Equal }
|
||||
else if val < b { Less }
|
||||
else { Greater }
|
||||
}) {
|
||||
Ok(idx) => {
|
||||
let (_, result) = candidates[idx];
|
||||
Some(result)
|
||||
}
|
||||
Err(_) => None
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// Constants from Unicode 7.0.0 Section 3.12 Conjoining Jamo Behavior
|
||||
const S_BASE: u32 = 0xAC00;
|
||||
const L_BASE: u32 = 0x1100;
|
||||
const V_BASE: u32 = 0x1161;
|
||||
const T_BASE: u32 = 0x11A7;
|
||||
const L_COUNT: u32 = 19;
|
||||
const V_COUNT: u32 = 21;
|
||||
const T_COUNT: u32 = 28;
|
||||
const N_COUNT: u32 = (V_COUNT * T_COUNT);
|
||||
const S_COUNT: u32 = (L_COUNT * N_COUNT);
|
||||
|
||||
// FIXME(#19596) This is a workaround, we should use `F` instead of `&mut F`
|
||||
// Decompose a precomposed Hangul syllable
|
||||
#[allow(unsafe_code)]
|
||||
#[inline(always)]
|
||||
fn decompose_hangul<F>(s: char, f: &mut F) where F: FnMut(char) {
|
||||
use std::mem::transmute;
|
||||
|
||||
let si = s as u32 - S_BASE;
|
||||
|
||||
let li = si / N_COUNT;
|
||||
unsafe {
|
||||
(*f)(transmute(L_BASE + li));
|
||||
|
||||
let vi = (si % N_COUNT) / T_COUNT;
|
||||
(*f)(transmute(V_BASE + vi));
|
||||
|
||||
let ti = si % T_COUNT;
|
||||
if ti > 0 {
|
||||
(*f)(transmute(T_BASE + ti));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Compose a pair of Hangul Jamo
|
||||
#[allow(unsafe_code)]
|
||||
#[inline(always)]
|
||||
fn compose_hangul(a: char, b: char) -> Option<char> {
|
||||
use std::mem::transmute;
|
||||
|
||||
let l = a as u32;
|
||||
let v = b as u32;
|
||||
// Compose an LPart and a VPart
|
||||
if L_BASE <= l && l < (L_BASE + L_COUNT) && V_BASE <= v && v < (V_BASE + V_COUNT) {
|
||||
let r = S_BASE + (l - L_BASE) * N_COUNT + (v - V_BASE) * T_COUNT;
|
||||
return unsafe { Some(transmute(r)) };
|
||||
}
|
||||
// Compose an LVPart and a TPart
|
||||
if S_BASE <= l && l <= (S_BASE+S_COUNT-T_COUNT) && T_BASE <= v && v < (T_BASE+T_COUNT) {
|
||||
let r = l + (v - T_BASE);
|
||||
return unsafe { Some(transmute(r)) };
|
||||
}
|
||||
None
|
||||
}
|
@ -1,137 +0,0 @@
|
||||
// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
use std::collections::VecDeque;
|
||||
use decompose::Decompositions;
|
||||
|
||||
#[derive(Clone)]
|
||||
enum RecompositionState {
|
||||
Composing,
|
||||
Purging,
|
||||
Finished
|
||||
}
|
||||
|
||||
/// External iterator for a string recomposition's characters.
|
||||
#[derive(Clone)]
|
||||
pub struct Recompositions<I> {
|
||||
iter: Decompositions<I>,
|
||||
state: RecompositionState,
|
||||
buffer: VecDeque<char>,
|
||||
composee: Option<char>,
|
||||
last_ccc: Option<u8>
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn new_canonical<I: Iterator<Item=char>>(iter: I) -> Recompositions<I> {
|
||||
Recompositions {
|
||||
iter: super::decompose::new_canonical(iter),
|
||||
state: self::RecompositionState::Composing,
|
||||
buffer: VecDeque::new(),
|
||||
composee: None,
|
||||
last_ccc: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn new_compatible<I: Iterator<Item=char>>(iter: I) -> Recompositions<I> {
|
||||
Recompositions {
|
||||
iter: super::decompose::new_compatible(iter),
|
||||
state : self::RecompositionState::Composing,
|
||||
buffer: VecDeque::new(),
|
||||
composee: None,
|
||||
last_ccc: None,
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: Iterator<Item=char>> Iterator for Recompositions<I> {
|
||||
type Item = char;
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<char> {
|
||||
use self::RecompositionState::*;
|
||||
|
||||
loop {
|
||||
match self.state {
|
||||
Composing => {
|
||||
for ch in self.iter.by_ref() {
|
||||
let ch_class = super::char::canonical_combining_class(ch);
|
||||
if self.composee.is_none() {
|
||||
if ch_class != 0 {
|
||||
return Some(ch);
|
||||
}
|
||||
self.composee = Some(ch);
|
||||
continue;
|
||||
}
|
||||
let k = self.composee.clone().unwrap();
|
||||
|
||||
match self.last_ccc {
|
||||
None => {
|
||||
match super::char::compose(k, ch) {
|
||||
Some(r) => {
|
||||
self.composee = Some(r);
|
||||
continue;
|
||||
}
|
||||
None => {
|
||||
if ch_class == 0 {
|
||||
self.composee = Some(ch);
|
||||
return Some(k);
|
||||
}
|
||||
self.buffer.push_back(ch);
|
||||
self.last_ccc = Some(ch_class);
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(l_class) => {
|
||||
if l_class >= ch_class {
|
||||
// `ch` is blocked from `composee`
|
||||
if ch_class == 0 {
|
||||
self.composee = Some(ch);
|
||||
self.last_ccc = None;
|
||||
self.state = Purging;
|
||||
return Some(k);
|
||||
}
|
||||
self.buffer.push_back(ch);
|
||||
self.last_ccc = Some(ch_class);
|
||||
continue;
|
||||
}
|
||||
match super::char::compose(k, ch) {
|
||||
Some(r) => {
|
||||
self.composee = Some(r);
|
||||
continue;
|
||||
}
|
||||
None => {
|
||||
self.buffer.push_back(ch);
|
||||
self.last_ccc = Some(ch_class);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
self.state = Finished;
|
||||
if self.composee.is_some() {
|
||||
return self.composee.take();
|
||||
}
|
||||
}
|
||||
Purging => {
|
||||
match self.buffer.pop_front() {
|
||||
None => self.state = Composing,
|
||||
s => return s
|
||||
}
|
||||
}
|
||||
Finished => {
|
||||
match self.buffer.pop_front() {
|
||||
None => return self.composee.take(),
|
||||
s => return s
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
2838
third_party/rust/unicode-normalization/src/tables.rs
vendored
2838
third_party/rust/unicode-normalization/src/tables.rs
vendored
File diff suppressed because it is too large
Load Diff
155
third_party/rust/unicode-normalization/src/test.rs
vendored
155
third_party/rust/unicode-normalization/src/test.rs
vendored
@ -1,155 +0,0 @@
|
||||
// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
use UnicodeNormalization;
|
||||
|
||||
#[test]
|
||||
fn test_nfd() {
|
||||
macro_rules! t {
|
||||
($input: expr, $expected: expr) => {
|
||||
assert_eq!($input.nfd().collect::<String>(), $expected);
|
||||
// A dummy iterator that is not std::str::Chars directly:
|
||||
assert_eq!($input.chars().map(|c| c).nfd().collect::<String>(), $expected);
|
||||
}
|
||||
}
|
||||
t!("abc", "abc");
|
||||
t!("\u{1e0b}\u{1c4}", "d\u{307}\u{1c4}");
|
||||
t!("\u{2026}", "\u{2026}");
|
||||
t!("\u{2126}", "\u{3a9}");
|
||||
t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
|
||||
t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
|
||||
t!("a\u{301}", "a\u{301}");
|
||||
t!("\u{301}a", "\u{301}a");
|
||||
t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
|
||||
t!("\u{ac1c}", "\u{1100}\u{1162}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_nfkd() {
|
||||
macro_rules! t {
|
||||
($input: expr, $expected: expr) => {
|
||||
assert_eq!($input.nfkd().collect::<String>(), $expected);
|
||||
}
|
||||
}
|
||||
t!("abc", "abc");
|
||||
t!("\u{1e0b}\u{1c4}", "d\u{307}DZ\u{30c}");
|
||||
t!("\u{2026}", "...");
|
||||
t!("\u{2126}", "\u{3a9}");
|
||||
t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
|
||||
t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
|
||||
t!("a\u{301}", "a\u{301}");
|
||||
t!("\u{301}a", "\u{301}a");
|
||||
t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
|
||||
t!("\u{ac1c}", "\u{1100}\u{1162}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_nfc() {
|
||||
macro_rules! t {
|
||||
($input: expr, $expected: expr) => {
|
||||
assert_eq!($input.nfc().collect::<String>(), $expected);
|
||||
}
|
||||
}
|
||||
t!("abc", "abc");
|
||||
t!("\u{1e0b}\u{1c4}", "\u{1e0b}\u{1c4}");
|
||||
t!("\u{2026}", "\u{2026}");
|
||||
t!("\u{2126}", "\u{3a9}");
|
||||
t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
|
||||
t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
|
||||
t!("a\u{301}", "\u{e1}");
|
||||
t!("\u{301}a", "\u{301}a");
|
||||
t!("\u{d4db}", "\u{d4db}");
|
||||
t!("\u{ac1c}", "\u{ac1c}");
|
||||
t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_nfkc() {
|
||||
macro_rules! t {
|
||||
($input: expr, $expected: expr) => {
|
||||
assert_eq!($input.nfkc().collect::<String>(), $expected);
|
||||
}
|
||||
}
|
||||
t!("abc", "abc");
|
||||
t!("\u{1e0b}\u{1c4}", "\u{1e0b}D\u{17d}");
|
||||
t!("\u{2026}", "...");
|
||||
t!("\u{2126}", "\u{3a9}");
|
||||
t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
|
||||
t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
|
||||
t!("a\u{301}", "\u{e1}");
|
||||
t!("\u{301}a", "\u{301}a");
|
||||
t!("\u{d4db}", "\u{d4db}");
|
||||
t!("\u{ac1c}", "\u{ac1c}");
|
||||
t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_official() {
|
||||
use testdata::TEST_NORM;
|
||||
macro_rules! normString {
|
||||
($method: ident, $input: expr) => { $input.$method().collect::<String>() }
|
||||
}
|
||||
|
||||
for &(s1, s2, s3, s4, s5) in TEST_NORM {
|
||||
// these invariants come from the CONFORMANCE section of
|
||||
// http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt
|
||||
{
|
||||
let r1 = normString!(nfc, s1);
|
||||
let r2 = normString!(nfc, s2);
|
||||
let r3 = normString!(nfc, s3);
|
||||
let r4 = normString!(nfc, s4);
|
||||
let r5 = normString!(nfc, s5);
|
||||
assert_eq!(s2, &r1[..]);
|
||||
assert_eq!(s2, &r2[..]);
|
||||
assert_eq!(s2, &r3[..]);
|
||||
assert_eq!(s4, &r4[..]);
|
||||
assert_eq!(s4, &r5[..]);
|
||||
}
|
||||
|
||||
{
|
||||
let r1 = normString!(nfd, s1);
|
||||
let r2 = normString!(nfd, s2);
|
||||
let r3 = normString!(nfd, s3);
|
||||
let r4 = normString!(nfd, s4);
|
||||
let r5 = normString!(nfd, s5);
|
||||
assert_eq!(s3, &r1[..]);
|
||||
assert_eq!(s3, &r2[..]);
|
||||
assert_eq!(s3, &r3[..]);
|
||||
assert_eq!(s5, &r4[..]);
|
||||
assert_eq!(s5, &r5[..]);
|
||||
}
|
||||
|
||||
{
|
||||
let r1 = normString!(nfkc, s1);
|
||||
let r2 = normString!(nfkc, s2);
|
||||
let r3 = normString!(nfkc, s3);
|
||||
let r4 = normString!(nfkc, s4);
|
||||
let r5 = normString!(nfkc, s5);
|
||||
assert_eq!(s4, &r1[..]);
|
||||
assert_eq!(s4, &r2[..]);
|
||||
assert_eq!(s4, &r3[..]);
|
||||
assert_eq!(s4, &r4[..]);
|
||||
assert_eq!(s4, &r5[..]);
|
||||
}
|
||||
|
||||
{
|
||||
let r1 = normString!(nfkd, s1);
|
||||
let r2 = normString!(nfkd, s2);
|
||||
let r3 = normString!(nfkd, s3);
|
||||
let r4 = normString!(nfkd, s4);
|
||||
let r5 = normString!(nfkd, s5);
|
||||
assert_eq!(s5, &r1[..]);
|
||||
assert_eq!(s5, &r2[..]);
|
||||
assert_eq!(s5, &r3[..]);
|
||||
assert_eq!(s5, &r4[..]);
|
||||
assert_eq!(s5, &r5[..]);
|
||||
}
|
||||
}
|
||||
}
|
22832
third_party/rust/unicode-normalization/src/testdata.rs
vendored
22832
third_party/rust/unicode-normalization/src/testdata.rs
vendored
File diff suppressed because it is too large
Load Diff
1
third_party/rust/url/.cargo-checksum.json
vendored
1
third_party/rust/url/.cargo-checksum.json
vendored
@ -1 +0,0 @@
|
||||
{"files":{".cargo-ok":"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",".gitignore":"e20a03cb660e0039506f8828204fd93815ebfe051ef62194f8dcf3fc8b7d0e5a",".travis.yml":"2a3033f9edf86bc829de486fc1e74fe0bbe3166ed99b0139754ea29772c19b06","Cargo.toml":"30055a6d4ff3d1bf29a9fa99401e4ffffeecc4b5bade830843a9a5f1634fefd1","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"20c7855c364d57ea4c97889a5e8d98470a9952dade37bd9248b9a54431670e5e","Makefile":"92b64915724e3b5fec95ec96c5af738bd219b80709e18bf8639d784bc3d9a600","README.md":"eb3f4694003f408cbe3c7f3e9fbbc71241defb940cc55a816981f0f0f144c8eb","UPGRADING.md":"fbcc2d39bdf17db0745793db6626fcd5c909dddd4ce13b27566cfabece22c368","appveyor.yml":"c78486dbfbe6ebbf3d808afb9a19f7ec18c4704ce451c6305f0716999b70a1a6","docs/.nojekyll":"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855","docs/404.html":"f61e6271c1ea1aa113b64b356e994595fa548f0433f89948d747503ad22195cd","docs/index.html":"f61e6271c1ea1aa113b64b356e994595fa548f0433f89948d747503ad22195cd","github.png":"b432fd855efe7c430fe6a57ccf83935c1996f03a7cdc8d6e1b34154b8c43f6ec","rust-url-todo":"1192cee7b6cedf2133d97dc6074b593a1d19b0ee13fff6f28d6329855044e575","src/encoding.rs":"7fb43e1c109bf9f2a80a05525082f90e79dba8e8056547571c49fba074406d39","src/form_urlencoded.rs":"172922f2c51eb8dae0182d70e5e0c2969f1b5b7aac026720ced9f84059465999","src/host.rs":"a654dc3c9ce9024b98698e000bc97abaac7f10c49b1635be8125cf72376fd5d1","src/lib.rs":"c3542aabc733f76a3b9c7d24b5c41e60e9eb84d2712660611300d1de0e7c2072","src/origin.rs":"d52010a280d363aed6832f7de5e52c1a62815302e59dcbc9cdc2574e2ac884b9","src/parser.rs":"8ca331ada66f309e91fcc375d8467c929b453979fe0ac5abe190eb8a32074db8","src/path_segments.rs":"0414985c441d0c0292ccc6f56a144b84728ae03382476e6cae1a766f8c333ef8","src/percent_encoding.rs":"44d3321eaa1c77715e9ea1421519289ca73612a31c3d6fce04ff489dfa7db3c5","src/quirks.rs":"3249d1a1f73dd29ec06d626ea2ea4d61e7b2a782543742a5bee422b2f3864b19","src/slicing.rs":"4e539886b23945a92094625f3e531a4bff40daa44240b5d19ee8577478c4f7fe","tests/data.rs":"d36f0ee509fb00524635a7968e336bb89674a82a4fcb06be189155e4b9d43db5","tests/setters_tests.json":"ebb439306ea748be6d0f93132cb59220c5805afd56357d6017f1eb1e4f0f80b3","tests/unit.rs":"9a05f55c7b381b58ee67ef8ef145220e5df88a403225a1a324a36afc9bb6a7d7","tests/urltestdata.json":"11abe4a459566c3929e3326828f8576cb59ec8fab1d191714c34eac28d5f15f9"},"package":"8527c62d9869a08325c38272b3f85668df22a65890c61a639d233dc0ed0b23a2"}
|
0
third_party/rust/url/.cargo-ok
vendored
0
third_party/rust/url/.cargo-ok
vendored
3
third_party/rust/url/.gitignore
vendored
3
third_party/rust/url/.gitignore
vendored
@ -1,3 +0,0 @@
|
||||
target
|
||||
Cargo.lock
|
||||
/.cargo/config
|
8
third_party/rust/url/.travis.yml
vendored
8
third_party/rust/url/.travis.yml
vendored
@ -1,8 +0,0 @@
|
||||
language: rust
|
||||
rust:
|
||||
- nightly
|
||||
- beta
|
||||
- stable
|
||||
script: make test
|
||||
notifications:
|
||||
webhooks: http://build.servo.org:54856/travis
|
38
third_party/rust/url/Cargo.toml
vendored
38
third_party/rust/url/Cargo.toml
vendored
@ -1,38 +0,0 @@
|
||||
[package]
|
||||
|
||||
name = "url"
|
||||
version = "1.2.1"
|
||||
authors = ["The rust-url developers"]
|
||||
|
||||
description = "URL library for Rust, based on the WHATWG URL Standard"
|
||||
documentation = "http://servo.github.io/rust-url/url/index.html"
|
||||
repository = "https://github.com/servo/rust-url"
|
||||
readme = "README.md"
|
||||
keywords = ["url", "parser"]
|
||||
license = "MIT/Apache-2.0"
|
||||
|
||||
[[test]]
|
||||
name = "unit"
|
||||
|
||||
[[test]]
|
||||
name = "data"
|
||||
harness = false
|
||||
|
||||
[lib]
|
||||
test = false
|
||||
|
||||
[dev-dependencies]
|
||||
rustc-test = "0.1"
|
||||
rustc-serialize = "0.3"
|
||||
|
||||
[features]
|
||||
query_encoding = ["encoding"]
|
||||
heap_size = ["heapsize"]
|
||||
|
||||
[dependencies]
|
||||
encoding = {version = "0.2", optional = true}
|
||||
heapsize = {version = ">=0.1.1, <0.4", optional = true}
|
||||
idna = { version = "0.1.0", path = "./idna" }
|
||||
matches = "0.1"
|
||||
rustc-serialize = {version = "0.3", optional = true}
|
||||
serde = {version = ">=0.6.1, <0.9", optional = true}
|
201
third_party/rust/url/LICENSE-APACHE
vendored
201
third_party/rust/url/LICENSE-APACHE
vendored
@ -1,201 +0,0 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
25
third_party/rust/url/LICENSE-MIT
vendored
25
third_party/rust/url/LICENSE-MIT
vendored
@ -1,25 +0,0 @@
|
||||
Copyright (c) 2013-2016 The rust-url developers
|
||||
|
||||
Permission is hereby granted, free of charge, to any
|
||||
person obtaining a copy of this software and associated
|
||||
documentation files (the "Software"), to deal in the
|
||||
Software without restriction, including without
|
||||
limitation the rights to use, copy, modify, merge,
|
||||
publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software
|
||||
is furnished to do so, subject to the following
|
||||
conditions:
|
||||
|
||||
The above copyright notice and this permission notice
|
||||
shall be included in all copies or substantial portions
|
||||
of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
||||
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
|
||||
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
|
||||
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
5
third_party/rust/url/Makefile
vendored
5
third_party/rust/url/Makefile
vendored
@ -1,5 +0,0 @@
|
||||
test:
|
||||
cargo test --features "query_encoding serde rustc-serialize"
|
||||
[ x$$TRAVIS_RUST_VERSION != xnightly ] || cargo test --features heapsize
|
||||
|
||||
.PHONY: test
|
10
third_party/rust/url/README.md
vendored
10
third_party/rust/url/README.md
vendored
@ -1,10 +0,0 @@
|
||||
rust-url
|
||||
========
|
||||
|
||||
[![Travis build Status](https://travis-ci.org/servo/rust-url.svg?branch=master)](https://travis-ci.org/servo/rust-url) [![Appveyor build status](https://ci.appveyor.com/api/projects/status/ulkqx2xcemyod6xa?svg=true)](https://ci.appveyor.com/project/Manishearth/rust-url)
|
||||
|
||||
URL library for Rust, based on the [URL Standard](https://url.spec.whatwg.org/).
|
||||
|
||||
[Documentation](https://docs.rs/url/)
|
||||
|
||||
Please see [UPGRADING.md](https://github.com/servo/rust-url/blob/master/UPGRADING.md) if you are upgrading from 0.x to 1.x.
|
263
third_party/rust/url/UPGRADING.md
vendored
263
third_party/rust/url/UPGRADING.md
vendored
@ -1,263 +0,0 @@
|
||||
# Guide to upgrading from url 0.x to 1.x
|
||||
|
||||
* The fields of `Url` are now private because the `Url` constructor, parser,
|
||||
and setters maintain invariants that could be violated if you were to set the fields directly.
|
||||
Instead of accessing, for example, `url.scheme`, use the getter method, such as `url.scheme()`.
|
||||
Instead of assigning directly to a field, for example `url.scheme = "https".to_string()`,
|
||||
use the setter method, such as `url.set_scheme("https").unwrap()`.
|
||||
(Some setters validate the new value and return a `Result` that must be used).
|
||||
|
||||
* The methods of `Url` now return `&str` instead of `String`,
|
||||
thus reducing allocations and making serialization cheap.
|
||||
|
||||
* The `path()` method on `url::Url` instances used to return `Option<&[String]>`;
|
||||
now it returns `&str`.
|
||||
If you would like functionality more similar to the old behavior of `path()`,
|
||||
use `path_segments()` that returns `Option<str::Split<char>>`.
|
||||
|
||||
Before upgrading:
|
||||
|
||||
```rust
|
||||
let issue_list_url = Url::parse(
|
||||
"https://github.com/rust-lang/rust/issues?labels=E-easy&state=open"
|
||||
).unwrap();
|
||||
assert_eq!(issue_list_url.path(), Some(&["rust-lang".to_string(),
|
||||
"rust".to_string(),
|
||||
"issues".to_string()][..]));
|
||||
```
|
||||
|
||||
After upgrading:
|
||||
|
||||
```rust
|
||||
let issue_list_url = Url::parse(
|
||||
"https://github.com/rust-lang/rust/issues?labels=E-easy&state=open"
|
||||
).unwrap();
|
||||
assert_eq!(issue_list_url.path(), "/rust-lang/rust/issues");
|
||||
assert_eq!(issue_list_url.path_segments().map(|c| c.collect::<Vec<_>>()),
|
||||
Some(vec!["rust-lang", "rust", "issues"]));
|
||||
```
|
||||
|
||||
* The `path_mut()` method on `url::Url` instances that allowed modification of a URL's path
|
||||
has been replaced by `path_segments_mut()`.
|
||||
|
||||
Before upgrading:
|
||||
|
||||
```rust
|
||||
let mut url = Url::parse("https://github.com/rust-lang/rust").unwrap();
|
||||
url.path_mut().unwrap().push("issues");
|
||||
```
|
||||
|
||||
After upgrading:
|
||||
|
||||
```rust
|
||||
let mut url = Url::parse("https://github.com/rust-lang/rust").unwrap();
|
||||
url.path_segments_mut().unwrap().push("issues");
|
||||
```
|
||||
|
||||
* The `domain_mut()` method on `url::Url` instances that allowed modification of a URL's domain
|
||||
has been replaced by `set_host()` and `set_ip_host()`.
|
||||
|
||||
* The `host()` method on `url::Url` instances used to return `Option<&Host>`;
|
||||
now it returns `Option<Host<&str>>`.
|
||||
The `serialize_host()` method that returned `Option<String>`
|
||||
has been replaced by the `host_str()` method that returns `Option<&str>`.
|
||||
|
||||
* The `serialize()` method on `url::Url` instances that returned `String`
|
||||
has been replaced by an `as_str()` method that returns `&str`.
|
||||
|
||||
Before upgrading:
|
||||
|
||||
```rust
|
||||
let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html").unwrap();
|
||||
assert_eq!(this_document.serialize(), "http://servo.github.io/rust-url/url/index.html".to_string());
|
||||
```
|
||||
|
||||
After upgrading:
|
||||
|
||||
```rust
|
||||
let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html").unwrap();
|
||||
assert_eq!(this_document.as_str(), "http://servo.github.io/rust-url/url/index.html");
|
||||
```
|
||||
|
||||
* `url::UrlParser` has been replaced by `url::Url::parse()` and `url::Url::join()`.
|
||||
|
||||
Before upgrading:
|
||||
|
||||
```rust
|
||||
let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html").unwrap();
|
||||
let css_url = UrlParser::new().base_url(&this_document).parse("../main.css").unwrap();
|
||||
assert_eq!(css_url.serialize(), "http://servo.github.io/rust-url/main.css".to_string());
|
||||
```
|
||||
|
||||
After upgrading:
|
||||
|
||||
```rust
|
||||
let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html").unwrap();
|
||||
let css_url = this_document.join("../main.css").unwrap();
|
||||
assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css");
|
||||
```
|
||||
|
||||
* `url::parse_path()` and `url::UrlParser::parse_path()` have been removed without replacement.
|
||||
As a workaround, you can give a base URL that you then ignore too `url::Url::parse()`.
|
||||
|
||||
Before upgrading:
|
||||
|
||||
```rust
|
||||
let (path, query, fragment) = url::parse_path("/foo/bar/../baz?q=42").unwrap();
|
||||
assert_eq!(path, vec!["foo".to_string(), "baz".to_string()]);
|
||||
assert_eq!(query, Some("q=42".to_string()));
|
||||
assert_eq!(fragment, None);
|
||||
```
|
||||
|
||||
After upgrading:
|
||||
|
||||
```rust
|
||||
let base = Url::parse("http://example.com").unwrap();
|
||||
let with_path = base.join("/foo/bar/../baz?q=42").unwrap();
|
||||
assert_eq!(with_path.path(), "/foo/baz");
|
||||
assert_eq!(with_path.query(), Some("q=42"));
|
||||
assert_eq!(with_path.fragment(), None);
|
||||
```
|
||||
|
||||
* The `url::form_urlencoded::serialize()` method
|
||||
has been replaced with the `url::form_urlencoded::Serializer` struct.
|
||||
Instead of calling `serialize()` with key/value pairs,
|
||||
create a new `Serializer` with a new string,
|
||||
call the `extend_pairs()` method on the `Serializer` instance with the key/value pairs as the argument,
|
||||
then call `finish()`.
|
||||
|
||||
Before upgrading:
|
||||
|
||||
```rust
|
||||
let form = url::form_urlencoded::serialize(form.iter().map(|(k, v)| {
|
||||
(&k[..], &v[..])
|
||||
}));
|
||||
```
|
||||
|
||||
After upgrading:
|
||||
|
||||
```rust
|
||||
let form = url::form_urlencoded::Serializer::new(String::new()).extend_pairs(
|
||||
form.iter().map(|(k, v)| { (&k[..], &v[..]) })
|
||||
).finish();
|
||||
```
|
||||
|
||||
* The `set_query_from_pairs()` method on `url::Url` instances that took key/value pairs
|
||||
has been replaced with `query_pairs_mut()`, which allows you to modify the `url::Url`'s query pairs.
|
||||
|
||||
Before upgrading:
|
||||
|
||||
```rust
|
||||
let mut url = Url::parse("https://duckduckgo.com/").unwrap();
|
||||
let pairs = vec![
|
||||
("q", "test"),
|
||||
("ia", "images"),
|
||||
];
|
||||
url.set_query_from_pairs(pairs.iter().map(|&(k, v)| {
|
||||
(&k[..], &v[..])
|
||||
}));
|
||||
```
|
||||
|
||||
After upgrading:
|
||||
|
||||
```rust
|
||||
let mut url = Url::parse("https://duckduckgo.com/").unwrap();
|
||||
let pairs = vec![
|
||||
("q", "test"),
|
||||
("ia", "images"),
|
||||
];
|
||||
url.query_pairs_mut().clear().extend_pairs(
|
||||
pairs.iter().map(|&(k, v)| { (&k[..], &v[..]) })
|
||||
);
|
||||
```
|
||||
|
||||
* `url::SchemeData`, its variants `Relative` and `NonRelative`,
|
||||
and the struct `url::RelativeSchemeData` have been removed.
|
||||
Instead of matching on these variants
|
||||
to determine if you have a URL in a relative scheme such as HTTP
|
||||
versus a URL in a non-relative scheme as data,
|
||||
use the `cannot_be_a_base()` method to determine which kind you have.
|
||||
|
||||
Before upgrading:
|
||||
|
||||
```rust
|
||||
match url.scheme_data {
|
||||
url::SchemeData::Relative(..) => {}
|
||||
url::SchemeData::NonRelative(..) => {
|
||||
return Err(human(format!("`{}` must have relative scheme \
|
||||
data: {}", field, url)))
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
After upgrading:
|
||||
|
||||
```rust
|
||||
if url.cannot_be_a_base() {
|
||||
return Err(human(format!("`{}` must have relative scheme \
|
||||
data: {}", field, url)))
|
||||
}
|
||||
```
|
||||
|
||||
* The functions `url::whatwg_scheme_type_mapper()`, the `SchemeType` enum,
|
||||
and the `scheme_type_mapper()` method on `url::UrlParser` instances have been removed.
|
||||
`SchemeType` had a method for getting the `default_port()`;
|
||||
to replicate this functionality, use the method `port_or_known_default()` on `url::Url` instances.
|
||||
The `port_or_default()` method on `url::Url` instances has been removed;
|
||||
use `port_or_known_default()` instead.
|
||||
|
||||
Before upgrading:
|
||||
|
||||
```rust
|
||||
let port = match whatwg_scheme_type_mapper(&url.scheme) {
|
||||
SchemeType::Relative(port) => port,
|
||||
_ => return Err(format!("Invalid special scheme: `{}`",
|
||||
raw_url.scheme)),
|
||||
};
|
||||
```
|
||||
|
||||
After upgrading:
|
||||
|
||||
```rust
|
||||
let port = match url.port_or_known_default() {
|
||||
Some(port) => port,
|
||||
_ => return Err(format!("Invalid special scheme: `{}`",
|
||||
url.scheme())),
|
||||
};
|
||||
```
|
||||
|
||||
* The following formatting utilities have been removed without replacement;
|
||||
look at their linked previous implementations
|
||||
if you would like to replicate the functionality in your code:
|
||||
* [`url::format::PathFormatter`](https://github.com/servo/rust-url/pull/176/commits/9e759f18726c8e1343162922b87163d4dd08fe3c#diff-0bb16ac13b75e9b568fa4aff61b0e71dL24)
|
||||
* [`url::format::UserInfoFormatter`](https://github.com/servo/rust-url/pull/176/commits/9e759f18726c8e1343162922b87163d4dd08fe3c#diff-0bb16ac13b75e9b568fa4aff61b0e71dL50)
|
||||
* [`url::format::UrlNoFragmentFormatter`](https://github.com/servo/rust-url/pull/176/commits/9e759f18726c8e1343162922b87163d4dd08fe3c#diff-0bb16ac13b75e9b568fa4aff61b0e71dL70)
|
||||
|
||||
* `url::percent_encoding::percent_decode()` used to have a return type of `Vec<u8>`;
|
||||
now it returns an iterator of decoded `u8` bytes that also implements `Into<Cow<u8>>`.
|
||||
Use `.into().to_owned()` to obtain a `Vec<u8>`.
|
||||
(`.collect()` also works but might not be as efficient.)
|
||||
|
||||
* The `url::percent_encoding::EncodeSet` struct and constant instances
|
||||
used with `url::percent_encoding::percent_encode()`
|
||||
have been changed to structs that implement the trait `url::percent_encoding::EncodeSet`.
|
||||
* `SIMPLE_ENCODE_SET`, `QUERY_ENCODE_SET`, `DEFAULT_ENCODE_SET`,
|
||||
and `USERINFO_ENCODE_SET` have the same behavior.
|
||||
* `USERNAME_ENCODE_SET` and `PASSWORD_ENCODE_SET` have been removed;
|
||||
use `USERINFO_ENCODE_SET` instead.
|
||||
* `HTTP_VALUE_ENCODE_SET` has been removed;
|
||||
an implementation of it in the new types can be found [in hyper's source](
|
||||
https://github.com/hyperium/hyper/blob/67436c5bf615cf5a55a71e32b788afef5985570e/src/header/parsing.rs#L131-L138)
|
||||
if you need to replicate this functionality in your code.
|
||||
* `FORM_URLENCODED_ENCODE_SET` has been removed;
|
||||
instead, use the functionality in `url::form_urlencoded`.
|
||||
* `PATH_SEGMENT_ENCODE_SET` has been added for use on '/'-separated path segments.
|
||||
|
||||
* `url::percent_encoding::percent_decode_to()` has been removed.
|
||||
Use `url::percent_encoding::percent_decode()` which returns an iterator.
|
||||
You can then use the iterator’s `collect()` method
|
||||
or give it to some data structure’s `extend()` method.
|
||||
* A number of `ParseError` variants have changed.
|
||||
[See the documentation for the current set](http://servo.github.io/rust-url/url/enum.ParseError.html).
|
||||
* `url::OpaqueOrigin::new()` and `url::Origin::UID(OpaqueOrigin)`
|
||||
have been replaced by `url::Origin::new_opaque()` and `url::Origin::Opaque(OpaqueOrigin)`, respectively.
|
13
third_party/rust/url/appveyor.yml
vendored
13
third_party/rust/url/appveyor.yml
vendored
@ -1,13 +0,0 @@
|
||||
install:
|
||||
- ps: Start-FileDownload 'https://static.rust-lang.org/dist/rust-nightly-i686-pc-windows-gnu.exe'
|
||||
- rust-nightly-i686-pc-windows-gnu.exe /VERYSILENT /NORESTART /DIR="C:\Program Files (x86)\Rust"
|
||||
- SET PATH=%PATH%;C:\Program Files (x86)\Rust\bin
|
||||
- rustc -V
|
||||
- cargo -V
|
||||
- git submodule update --init --recursive
|
||||
|
||||
build: false
|
||||
|
||||
test_script:
|
||||
- cargo build
|
||||
- cargo test --verbose
|
0
third_party/rust/url/docs/.nojekyll
vendored
0
third_party/rust/url/docs/.nojekyll
vendored
3
third_party/rust/url/docs/404.html
vendored
3
third_party/rust/url/docs/404.html
vendored
@ -1,3 +0,0 @@
|
||||
<meta http-equiv="refresh" content="0; url=https://docs.rs/url/">
|
||||
<link rel="canonical" href="https://docs.rs/url/">
|
||||
<a href="https://docs.rs/url/">Moved to docs.rs</a>
|
3
third_party/rust/url/docs/index.html
vendored
3
third_party/rust/url/docs/index.html
vendored
@ -1,3 +0,0 @@
|
||||
<meta http-equiv="refresh" content="0; url=https://docs.rs/url/">
|
||||
<link rel="canonical" href="https://docs.rs/url/">
|
||||
<a href="https://docs.rs/url/">Moved to docs.rs</a>
|
BIN
third_party/rust/url/github.png
vendored
BIN
third_party/rust/url/github.png
vendored
Binary file not shown.
Before Width: | Height: | Size: 7.6 KiB |
14
third_party/rust/url/rust-url-todo
vendored
14
third_party/rust/url/rust-url-todo
vendored
@ -1,14 +0,0 @@
|
||||
* standalone path parsing?
|
||||
* Test setters
|
||||
* Test trim C0/space
|
||||
* Test remove tab & newline
|
||||
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_path_segments() {
|
||||
let mut url = Url::parse("http://example.net").unwrap();
|
||||
url.push_path_segment("foo").unwrap();
|
||||
url.extend_path_segments(&["bar", "b/az"]).unwrap();
|
||||
assert_eq!(url.as_str(), "http://example.net/foo");
|
||||
}
|
135
third_party/rust/url/src/encoding.rs
vendored
135
third_party/rust/url/src/encoding.rs
vendored
@ -1,135 +0,0 @@
|
||||
// Copyright 2013-2014 The rust-url developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
|
||||
//! Abstraction that conditionally compiles either to rust-encoding,
|
||||
//! or to only support UTF-8.
|
||||
|
||||
#[cfg(feature = "query_encoding")] extern crate encoding;
|
||||
|
||||
use std::borrow::Cow;
|
||||
|
||||
#[cfg(feature = "query_encoding")] use self::encoding::types::{DecoderTrap, EncoderTrap};
|
||||
#[cfg(feature = "query_encoding")] use self::encoding::label::encoding_from_whatwg_label;
|
||||
#[cfg(feature = "query_encoding")] pub use self::encoding::types::EncodingRef;
|
||||
|
||||
#[cfg(feature = "query_encoding")]
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct EncodingOverride {
|
||||
/// `None` means UTF-8.
|
||||
encoding: Option<EncodingRef>
|
||||
}
|
||||
|
||||
#[cfg(feature = "query_encoding")]
|
||||
impl EncodingOverride {
|
||||
pub fn from_opt_encoding(encoding: Option<EncodingRef>) -> Self {
|
||||
encoding.map(Self::from_encoding).unwrap_or_else(Self::utf8)
|
||||
}
|
||||
|
||||
pub fn from_encoding(encoding: EncodingRef) -> Self {
|
||||
EncodingOverride {
|
||||
encoding: if encoding.name() == "utf-8" { None } else { Some(encoding) }
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn utf8() -> Self {
|
||||
EncodingOverride { encoding: None }
|
||||
}
|
||||
|
||||
pub fn lookup(label: &[u8]) -> Option<Self> {
|
||||
// Don't use String::from_utf8_lossy since no encoding label contains U+FFFD
|
||||
// https://encoding.spec.whatwg.org/#names-and-labels
|
||||
::std::str::from_utf8(label)
|
||||
.ok()
|
||||
.and_then(encoding_from_whatwg_label)
|
||||
.map(Self::from_encoding)
|
||||
}
|
||||
|
||||
/// https://encoding.spec.whatwg.org/#get-an-output-encoding
|
||||
pub fn to_output_encoding(self) -> Self {
|
||||
if let Some(encoding) = self.encoding {
|
||||
if matches!(encoding.name(), "utf-16le" | "utf-16be") {
|
||||
return Self::utf8()
|
||||
}
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
pub fn is_utf8(&self) -> bool {
|
||||
self.encoding.is_none()
|
||||
}
|
||||
|
||||
pub fn name(&self) -> &'static str {
|
||||
match self.encoding {
|
||||
Some(encoding) => encoding.name(),
|
||||
None => "utf-8",
|
||||
}
|
||||
}
|
||||
|
||||
pub fn decode<'a>(&self, input: Cow<'a, [u8]>) -> Cow<'a, str> {
|
||||
match self.encoding {
|
||||
// `encoding.decode` never returns `Err` when called with `DecoderTrap::Replace`
|
||||
Some(encoding) => encoding.decode(&input, DecoderTrap::Replace).unwrap().into(),
|
||||
None => decode_utf8_lossy(input),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn encode<'a>(&self, input: Cow<'a, str>) -> Cow<'a, [u8]> {
|
||||
match self.encoding {
|
||||
// `encoding.encode` never returns `Err` when called with `EncoderTrap::NcrEscape`
|
||||
Some(encoding) => Cow::Owned(encoding.encode(&input, EncoderTrap::NcrEscape).unwrap()),
|
||||
None => encode_utf8(input)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(not(feature = "query_encoding"))]
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct EncodingOverride;
|
||||
|
||||
#[cfg(not(feature = "query_encoding"))]
|
||||
impl EncodingOverride {
|
||||
#[inline]
|
||||
pub fn utf8() -> Self {
|
||||
EncodingOverride
|
||||
}
|
||||
|
||||
pub fn decode<'a>(&self, input: Cow<'a, [u8]>) -> Cow<'a, str> {
|
||||
decode_utf8_lossy(input)
|
||||
}
|
||||
|
||||
pub fn encode<'a>(&self, input: Cow<'a, str>) -> Cow<'a, [u8]> {
|
||||
encode_utf8(input)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn decode_utf8_lossy(input: Cow<[u8]>) -> Cow<str> {
|
||||
match input {
|
||||
Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes),
|
||||
Cow::Owned(bytes) => {
|
||||
let raw_utf8: *const [u8];
|
||||
match String::from_utf8_lossy(&bytes) {
|
||||
Cow::Borrowed(utf8) => raw_utf8 = utf8.as_bytes(),
|
||||
Cow::Owned(s) => return s.into(),
|
||||
}
|
||||
// from_utf8_lossy returned a borrow of `bytes` unchanged.
|
||||
debug_assert!(raw_utf8 == &*bytes as *const [u8]);
|
||||
// Reuse the existing `Vec` allocation.
|
||||
unsafe { String::from_utf8_unchecked(bytes) }.into()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn encode_utf8(input: Cow<str>) -> Cow<[u8]> {
|
||||
match input {
|
||||
Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
|
||||
Cow::Owned(s) => Cow::Owned(s.into_bytes())
|
||||
}
|
||||
}
|
364
third_party/rust/url/src/form_urlencoded.rs
vendored
364
third_party/rust/url/src/form_urlencoded.rs
vendored
@ -1,364 +0,0 @@
|
||||
// Copyright 2013-2016 The rust-url developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
//! Parser and serializer for the [`application/x-www-form-urlencoded` syntax](
|
||||
//! http://url.spec.whatwg.org/#application/x-www-form-urlencoded),
|
||||
//! as used by HTML forms.
|
||||
//!
|
||||
//! Converts between a string (such as an URL’s query string)
|
||||
//! and a sequence of (name, value) pairs.
|
||||
|
||||
use encoding::EncodingOverride;
|
||||
use percent_encoding::{percent_encode_byte, percent_decode};
|
||||
use std::borrow::{Borrow, Cow};
|
||||
use std::str;
|
||||
|
||||
|
||||
/// Convert a byte string in the `application/x-www-form-urlencoded` syntax
|
||||
/// into a iterator of (name, value) pairs.
|
||||
///
|
||||
/// Use `parse(input.as_bytes())` to parse a `&str` string.
|
||||
///
|
||||
/// The names and values are percent-decoded. For instance, `%23first=%25try%25` will be
|
||||
/// converted to `[("#first", "%try%")]`.
|
||||
#[inline]
|
||||
pub fn parse(input: &[u8]) -> Parse {
|
||||
Parse {
|
||||
input: input,
|
||||
encoding: EncodingOverride::utf8(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Convert a byte string in the `application/x-www-form-urlencoded` syntax
|
||||
/// into a iterator of (name, value) pairs.
|
||||
///
|
||||
/// Use `parse(input.as_bytes())` to parse a `&str` string.
|
||||
///
|
||||
/// This function is only available if the `query_encoding` Cargo feature is enabled.
|
||||
///
|
||||
/// Arguments:
|
||||
///
|
||||
/// * `encoding_override`: The character encoding each name and values is decoded as
|
||||
/// after percent-decoding. Defaults to UTF-8.
|
||||
/// * `use_charset`: The *use _charset_ flag*. If in doubt, set to `false`.
|
||||
#[cfg(feature = "query_encoding")]
|
||||
pub fn parse_with_encoding<'a>(input: &'a [u8],
|
||||
encoding_override: Option<::encoding::EncodingRef>,
|
||||
use_charset: bool)
|
||||
-> Result<Parse<'a>, ()> {
|
||||
use std::ascii::AsciiExt;
|
||||
|
||||
let mut encoding = EncodingOverride::from_opt_encoding(encoding_override);
|
||||
if !(encoding.is_utf8() || input.is_ascii()) {
|
||||
return Err(())
|
||||
}
|
||||
if use_charset {
|
||||
for sequence in input.split(|&b| b == b'&') {
|
||||
// No '+' in "_charset_" to replace with ' '.
|
||||
if sequence.starts_with(b"_charset_=") {
|
||||
let value = &sequence[b"_charset_=".len()..];
|
||||
// Skip replacing '+' with ' ' in value since no encoding label contains either:
|
||||
// https://encoding.spec.whatwg.org/#names-and-labels
|
||||
if let Some(e) = EncodingOverride::lookup(value) {
|
||||
encoding = e;
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(Parse {
|
||||
input: input,
|
||||
encoding: encoding,
|
||||
})
|
||||
}
|
||||
|
||||
/// The return type of `parse()`.
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct Parse<'a> {
|
||||
input: &'a [u8],
|
||||
encoding: EncodingOverride,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for Parse<'a> {
|
||||
type Item = (Cow<'a, str>, Cow<'a, str>);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
loop {
|
||||
if self.input.is_empty() {
|
||||
return None
|
||||
}
|
||||
let mut split2 = self.input.splitn(2, |&b| b == b'&');
|
||||
let sequence = split2.next().unwrap();
|
||||
self.input = split2.next().unwrap_or(&[][..]);
|
||||
if sequence.is_empty() {
|
||||
continue
|
||||
}
|
||||
let mut split2 = sequence.splitn(2, |&b| b == b'=');
|
||||
let name = split2.next().unwrap();
|
||||
let value = split2.next().unwrap_or(&[][..]);
|
||||
return Some((
|
||||
decode(name, self.encoding),
|
||||
decode(value, self.encoding),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn decode(input: &[u8], encoding: EncodingOverride) -> Cow<str> {
|
||||
let replaced = replace_plus(input);
|
||||
encoding.decode(match percent_decode(&replaced).if_any() {
|
||||
Some(vec) => Cow::Owned(vec),
|
||||
None => replaced,
|
||||
})
|
||||
}
|
||||
|
||||
/// Replace b'+' with b' '
|
||||
fn replace_plus<'a>(input: &'a [u8]) -> Cow<'a, [u8]> {
|
||||
match input.iter().position(|&b| b == b'+') {
|
||||
None => Cow::Borrowed(input),
|
||||
Some(first_position) => {
|
||||
let mut replaced = input.to_owned();
|
||||
replaced[first_position] = b' ';
|
||||
for byte in &mut replaced[first_position + 1..] {
|
||||
if *byte == b'+' {
|
||||
*byte = b' ';
|
||||
}
|
||||
}
|
||||
Cow::Owned(replaced)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Parse<'a> {
|
||||
/// Return a new iterator that yields pairs of `String` instead of pairs of `Cow<str>`.
|
||||
pub fn into_owned(self) -> ParseIntoOwned<'a> {
|
||||
ParseIntoOwned { inner: self }
|
||||
}
|
||||
}
|
||||
|
||||
/// Like `Parse`, but yields pairs of `String` instead of pairs of `Cow<str>`.
|
||||
pub struct ParseIntoOwned<'a> {
|
||||
inner: Parse<'a>
|
||||
}
|
||||
|
||||
impl<'a> Iterator for ParseIntoOwned<'a> {
|
||||
type Item = (String, String);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.inner.next().map(|(k, v)| (k.into_owned(), v.into_owned()))
|
||||
}
|
||||
}
|
||||
|
||||
/// The [`application/x-www-form-urlencoded` byte serializer](
|
||||
/// https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer).
|
||||
///
|
||||
/// Return an iterator of `&str` slices.
|
||||
pub fn byte_serialize(input: &[u8]) -> ByteSerialize {
|
||||
ByteSerialize {
|
||||
bytes: input,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return value of `byte_serialize()`.
|
||||
pub struct ByteSerialize<'a> {
|
||||
bytes: &'a [u8],
|
||||
}
|
||||
|
||||
fn byte_serialized_unchanged(byte: u8) -> bool {
|
||||
matches!(byte, b'*' | b'-' | b'.' | b'0' ... b'9' | b'A' ... b'Z' | b'_' | b'a' ... b'z')
|
||||
}
|
||||
|
||||
impl<'a> Iterator for ByteSerialize<'a> {
|
||||
type Item = &'a str;
|
||||
|
||||
fn next(&mut self) -> Option<&'a str> {
|
||||
if let Some((&first, tail)) = self.bytes.split_first() {
|
||||
if !byte_serialized_unchanged(first) {
|
||||
self.bytes = tail;
|
||||
return Some(if first == b' ' { "+" } else { percent_encode_byte(first) })
|
||||
}
|
||||
let position = tail.iter().position(|&b| !byte_serialized_unchanged(b));
|
||||
let (unchanged_slice, remaining) = match position {
|
||||
// 1 for first_byte + i unchanged in tail
|
||||
Some(i) => self.bytes.split_at(1 + i),
|
||||
None => (self.bytes, &[][..]),
|
||||
};
|
||||
self.bytes = remaining;
|
||||
Some(unsafe { str::from_utf8_unchecked(unchanged_slice) })
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
if self.bytes.is_empty() {
|
||||
(0, Some(0))
|
||||
} else {
|
||||
(1, Some(self.bytes.len()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The [`application/x-www-form-urlencoded` serializer](
|
||||
/// https://url.spec.whatwg.org/#concept-urlencoded-serializer).
|
||||
pub struct Serializer<T: Target> {
|
||||
target: Option<T>,
|
||||
start_position: usize,
|
||||
encoding: EncodingOverride,
|
||||
}
|
||||
|
||||
pub trait Target {
|
||||
fn as_mut_string(&mut self) -> &mut String;
|
||||
fn finish(self) -> Self::Finished;
|
||||
type Finished;
|
||||
}
|
||||
|
||||
impl Target for String {
|
||||
fn as_mut_string(&mut self) -> &mut String { self }
|
||||
fn finish(self) -> Self { self }
|
||||
type Finished = Self;
|
||||
}
|
||||
|
||||
impl<'a> Target for &'a mut String {
|
||||
fn as_mut_string(&mut self) -> &mut String { &mut **self }
|
||||
fn finish(self) -> Self { self }
|
||||
type Finished = Self;
|
||||
}
|
||||
|
||||
// `as_mut_string` string here exposes the internal serialization of an `Url`,
|
||||
// which should not be exposed to users.
|
||||
// We achieve that by not giving users direct access to `UrlQuery`:
|
||||
// * Its fields are private
|
||||
// (and so can not be constructed with struct literal syntax outside of this crate),
|
||||
// * It has no constructor
|
||||
// * It is only visible (on the type level) to users in the return type of
|
||||
// `Url::query_pairs_mut` which is `Serializer<UrlQuery>`
|
||||
// * `Serializer` keeps its target in a private field
|
||||
// * Unlike in other `Target` impls, `UrlQuery::finished` does not return `Self`.
|
||||
impl<'a> Target for ::UrlQuery<'a> {
|
||||
fn as_mut_string(&mut self) -> &mut String { &mut self.url.serialization }
|
||||
fn finish(self) -> &'a mut ::Url { self.url }
|
||||
type Finished = &'a mut ::Url;
|
||||
}
|
||||
|
||||
impl<T: Target> Serializer<T> {
|
||||
/// Create a new `application/x-www-form-urlencoded` serializer for the given target.
|
||||
///
|
||||
/// If the target is non-empty,
|
||||
/// its content is assumed to already be in `application/x-www-form-urlencoded` syntax.
|
||||
pub fn new(target: T) -> Self {
|
||||
Self::for_suffix(target, 0)
|
||||
}
|
||||
|
||||
/// Create a new `application/x-www-form-urlencoded` serializer
|
||||
/// for a suffix of the given target.
|
||||
///
|
||||
/// If that suffix is non-empty,
|
||||
/// its content is assumed to already be in `application/x-www-form-urlencoded` syntax.
|
||||
pub fn for_suffix(mut target: T, start_position: usize) -> Self {
|
||||
&target.as_mut_string()[start_position..]; // Panic if out of bounds
|
||||
Serializer {
|
||||
target: Some(target),
|
||||
start_position: start_position,
|
||||
encoding: EncodingOverride::utf8(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove any existing name/value pair.
|
||||
///
|
||||
/// Panics if called after `.finish()`.
|
||||
pub fn clear(&mut self) -> &mut Self {
|
||||
string(&mut self.target).truncate(self.start_position);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the character encoding to be used for names and values before percent-encoding.
|
||||
#[cfg(feature = "query_encoding")]
|
||||
pub fn encoding_override(&mut self, new: Option<::encoding::EncodingRef>) -> &mut Self {
|
||||
self.encoding = EncodingOverride::from_opt_encoding(new).to_output_encoding();
|
||||
self
|
||||
}
|
||||
|
||||
/// Serialize and append a name/value pair.
|
||||
///
|
||||
/// Panics if called after `.finish()`.
|
||||
pub fn append_pair(&mut self, name: &str, value: &str) -> &mut Self {
|
||||
append_pair(string(&mut self.target), self.start_position, self.encoding, name, value);
|
||||
self
|
||||
}
|
||||
|
||||
/// Serialize and append a number of name/value pairs.
|
||||
///
|
||||
/// This simply calls `append_pair` repeatedly.
|
||||
/// This can be more convenient, so the user doesn’t need to introduce a block
|
||||
/// to limit the scope of `Serializer`’s borrow of its string.
|
||||
///
|
||||
/// Panics if called after `.finish()`.
|
||||
pub fn extend_pairs<I, K, V>(&mut self, iter: I) -> &mut Self
|
||||
where I: IntoIterator, I::Item: Borrow<(K, V)>, K: AsRef<str>, V: AsRef<str> {
|
||||
{
|
||||
let string = string(&mut self.target);
|
||||
for pair in iter {
|
||||
let &(ref k, ref v) = pair.borrow();
|
||||
append_pair(string, self.start_position, self.encoding, k.as_ref(), v.as_ref());
|
||||
}
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a name/value pair whose name is `_charset_`
|
||||
/// and whose value is the character encoding’s name.
|
||||
/// (See the `encoding_override()` method.)
|
||||
///
|
||||
/// Panics if called after `.finish()`.
|
||||
#[cfg(feature = "query_encoding")]
|
||||
pub fn append_charset(&mut self) -> &mut Self {
|
||||
{
|
||||
let string = string(&mut self.target);
|
||||
append_separator_if_needed(string, self.start_position);
|
||||
string.push_str("_charset_=");
|
||||
string.push_str(self.encoding.name());
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
/// If this serializer was constructed with a string, take and return that string.
|
||||
///
|
||||
/// ```rust
|
||||
/// use url::form_urlencoded;
|
||||
/// let encoded: String = form_urlencoded::Serializer::new(String::new())
|
||||
/// .append_pair("foo", "bar & baz")
|
||||
/// .append_pair("saison", "Été+hiver")
|
||||
/// .finish();
|
||||
/// assert_eq!(encoded, "foo=bar+%26+baz&saison=%C3%89t%C3%A9%2Bhiver");
|
||||
/// ```
|
||||
///
|
||||
/// Panics if called more than once.
|
||||
pub fn finish(&mut self) -> T::Finished {
|
||||
self.target.take().expect("url::form_urlencoded::Serializer double finish").finish()
|
||||
}
|
||||
}
|
||||
|
||||
fn append_separator_if_needed(string: &mut String, start_position: usize) {
|
||||
if string.len() > start_position {
|
||||
string.push('&')
|
||||
}
|
||||
}
|
||||
|
||||
fn string<T: Target>(target: &mut Option<T>) -> &mut String {
|
||||
target.as_mut().expect("url::form_urlencoded::Serializer finished").as_mut_string()
|
||||
}
|
||||
|
||||
fn append_pair(string: &mut String, start_position: usize, encoding: EncodingOverride,
|
||||
name: &str, value: &str) {
|
||||
append_separator_if_needed(string, start_position);
|
||||
string.extend(byte_serialize(&encoding.encode(name.into())));
|
||||
string.push('=');
|
||||
string.extend(byte_serialize(&encoding.encode(value.into())));
|
||||
}
|
418
third_party/rust/url/src/host.rs
vendored
418
third_party/rust/url/src/host.rs
vendored
@ -1,418 +0,0 @@
|
||||
// Copyright 2013-2016 The rust-url developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
#[cfg(feature = "heapsize")] use heapsize::HeapSizeOf;
|
||||
use std::cmp;
|
||||
use std::fmt::{self, Formatter};
|
||||
use std::io;
|
||||
use std::net::{Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV4, SocketAddrV6, ToSocketAddrs};
|
||||
use std::vec;
|
||||
use parser::{ParseResult, ParseError};
|
||||
use percent_encoding::percent_decode;
|
||||
use idna;
|
||||
|
||||
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
||||
pub enum HostInternal {
|
||||
None,
|
||||
Domain,
|
||||
Ipv4(Ipv4Addr),
|
||||
Ipv6(Ipv6Addr),
|
||||
}
|
||||
|
||||
#[cfg(feature = "heapsize")]
|
||||
known_heap_size!(0, HostInternal);
|
||||
|
||||
impl<S> From<Host<S>> for HostInternal {
|
||||
fn from(host: Host<S>) -> HostInternal {
|
||||
match host {
|
||||
Host::Domain(_) => HostInternal::Domain,
|
||||
Host::Ipv4(address) => HostInternal::Ipv4(address),
|
||||
Host::Ipv6(address) => HostInternal::Ipv6(address),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The host name of an URL.
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
|
||||
pub enum Host<S=String> {
|
||||
/// A DNS domain name, as '.' dot-separated labels.
|
||||
/// Non-ASCII labels are encoded in punycode per IDNA.
|
||||
Domain(S),
|
||||
|
||||
/// An IPv4 address.
|
||||
/// `Url::host_str` returns the serialization of this address,
|
||||
/// as four decimal integers separated by `.` dots.
|
||||
Ipv4(Ipv4Addr),
|
||||
|
||||
/// An IPv6 address.
|
||||
/// `Url::host_str` returns the serialization of that address between `[` and `]` brackets,
|
||||
/// in the format per [RFC 5952 *A Recommendation
|
||||
/// for IPv6 Address Text Representation*](https://tools.ietf.org/html/rfc5952):
|
||||
/// lowercase hexadecimal with maximal `::` compression.
|
||||
Ipv6(Ipv6Addr),
|
||||
}
|
||||
|
||||
#[cfg(feature = "heapsize")]
|
||||
impl<S: HeapSizeOf> HeapSizeOf for Host<S> {
|
||||
fn heap_size_of_children(&self) -> usize {
|
||||
match *self {
|
||||
Host::Domain(ref s) => s.heap_size_of_children(),
|
||||
_ => 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Host<&'a str> {
|
||||
/// Return a copy of `self` that owns an allocated `String` but does not borrow an `&Url`.
|
||||
pub fn to_owned(&self) -> Host<String> {
|
||||
match *self {
|
||||
Host::Domain(domain) => Host::Domain(domain.to_owned()),
|
||||
Host::Ipv4(address) => Host::Ipv4(address),
|
||||
Host::Ipv6(address) => Host::Ipv6(address),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Host<String> {
|
||||
/// Parse a host: either an IPv6 address in [] square brackets, or a domain.
|
||||
///
|
||||
/// https://url.spec.whatwg.org/#host-parsing
|
||||
pub fn parse(input: &str) -> Result<Self, ParseError> {
|
||||
if input.starts_with("[") {
|
||||
if !input.ends_with("]") {
|
||||
return Err(ParseError::InvalidIpv6Address)
|
||||
}
|
||||
return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6)
|
||||
}
|
||||
let domain = percent_decode(input.as_bytes()).decode_utf8_lossy();
|
||||
let domain = try!(idna::domain_to_ascii(&domain));
|
||||
if domain.find(|c| matches!(c,
|
||||
'\0' | '\t' | '\n' | '\r' | ' ' | '#' | '%' | '/' | ':' | '?' | '@' | '[' | '\\' | ']'
|
||||
)).is_some() {
|
||||
return Err(ParseError::InvalidDomainCharacter)
|
||||
}
|
||||
if let Some(address) = try!(parse_ipv4addr(&domain)) {
|
||||
Ok(Host::Ipv4(address))
|
||||
} else {
|
||||
Ok(Host::Domain(domain.into()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: AsRef<str>> fmt::Display for Host<S> {
|
||||
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
||||
match *self {
|
||||
Host::Domain(ref domain) => domain.as_ref().fmt(f),
|
||||
Host::Ipv4(ref addr) => addr.fmt(f),
|
||||
Host::Ipv6(ref addr) => {
|
||||
try!(f.write_str("["));
|
||||
try!(write_ipv6(addr, f));
|
||||
f.write_str("]")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// This mostly exists because coherence rules don’t allow us to implement
|
||||
/// `ToSocketAddrs for (Host<S>, u16)`.
|
||||
pub struct HostAndPort<S=String> {
|
||||
pub host: Host<S>,
|
||||
pub port: u16,
|
||||
}
|
||||
|
||||
impl<'a> HostAndPort<&'a str> {
|
||||
/// Return a copy of `self` that owns an allocated `String` but does not borrow an `&Url`.
|
||||
pub fn to_owned(&self) -> HostAndPort<String> {
|
||||
HostAndPort {
|
||||
host: self.host.to_owned(),
|
||||
port: self.port
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: AsRef<str>> ToSocketAddrs for HostAndPort<S> {
|
||||
type Iter = SocketAddrs;
|
||||
|
||||
fn to_socket_addrs(&self) -> io::Result<Self::Iter> {
|
||||
let port = self.port;
|
||||
match self.host {
|
||||
Host::Domain(ref domain) => Ok(SocketAddrs {
|
||||
// FIXME: use std::net::lookup_host when it’s stable.
|
||||
state: SocketAddrsState::Domain(try!((domain.as_ref(), port).to_socket_addrs()))
|
||||
}),
|
||||
Host::Ipv4(address) => Ok(SocketAddrs {
|
||||
state: SocketAddrsState::One(SocketAddr::V4(SocketAddrV4::new(address, port)))
|
||||
}),
|
||||
Host::Ipv6(address) => Ok(SocketAddrs {
|
||||
state: SocketAddrsState::One(SocketAddr::V6(SocketAddrV6::new(address, port, 0, 0)))
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Socket addresses for an URL.
|
||||
pub struct SocketAddrs {
|
||||
state: SocketAddrsState
|
||||
}
|
||||
|
||||
enum SocketAddrsState {
|
||||
Domain(vec::IntoIter<SocketAddr>),
|
||||
One(SocketAddr),
|
||||
Done,
|
||||
}
|
||||
|
||||
impl Iterator for SocketAddrs {
|
||||
type Item = SocketAddr;
|
||||
fn next(&mut self) -> Option<SocketAddr> {
|
||||
match self.state {
|
||||
SocketAddrsState::Domain(ref mut iter) => iter.next(),
|
||||
SocketAddrsState::One(s) => {
|
||||
self.state = SocketAddrsState::Done;
|
||||
Some(s)
|
||||
}
|
||||
SocketAddrsState::Done => None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn write_ipv6(addr: &Ipv6Addr, f: &mut Formatter) -> fmt::Result {
|
||||
let segments = addr.segments();
|
||||
let (compress_start, compress_end) = longest_zero_sequence(&segments);
|
||||
let mut i = 0;
|
||||
while i < 8 {
|
||||
if i == compress_start {
|
||||
try!(f.write_str(":"));
|
||||
if i == 0 {
|
||||
try!(f.write_str(":"));
|
||||
}
|
||||
if compress_end < 8 {
|
||||
i = compress_end;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
try!(write!(f, "{:x}", segments[i as usize]));
|
||||
if i < 7 {
|
||||
try!(f.write_str(":"));
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize) {
|
||||
let mut longest = -1;
|
||||
let mut longest_length = -1;
|
||||
let mut start = -1;
|
||||
macro_rules! finish_sequence(
|
||||
($end: expr) => {
|
||||
if start >= 0 {
|
||||
let length = $end - start;
|
||||
if length > longest_length {
|
||||
longest = start;
|
||||
longest_length = length;
|
||||
}
|
||||
}
|
||||
};
|
||||
);
|
||||
for i in 0..8 {
|
||||
if pieces[i as usize] == 0 {
|
||||
if start < 0 {
|
||||
start = i;
|
||||
}
|
||||
} else {
|
||||
finish_sequence!(i);
|
||||
start = -1;
|
||||
}
|
||||
}
|
||||
finish_sequence!(8);
|
||||
(longest, longest + longest_length)
|
||||
}
|
||||
|
||||
|
||||
fn parse_ipv4number(mut input: &str) -> Result<u32, ()> {
|
||||
let mut r = 10;
|
||||
if input.starts_with("0x") || input.starts_with("0X") {
|
||||
input = &input[2..];
|
||||
r = 16;
|
||||
} else if input.len() >= 2 && input.starts_with("0") {
|
||||
input = &input[1..];
|
||||
r = 8;
|
||||
}
|
||||
if input.is_empty() {
|
||||
return Ok(0);
|
||||
}
|
||||
if input.starts_with("+") {
|
||||
return Err(())
|
||||
}
|
||||
match u32::from_str_radix(&input, r) {
|
||||
Ok(number) => Ok(number),
|
||||
Err(_) => Err(()),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_ipv4addr(input: &str) -> ParseResult<Option<Ipv4Addr>> {
|
||||
if input.is_empty() {
|
||||
return Ok(None)
|
||||
}
|
||||
let mut parts: Vec<&str> = input.split('.').collect();
|
||||
if parts.last() == Some(&"") {
|
||||
parts.pop();
|
||||
}
|
||||
if parts.len() > 4 {
|
||||
return Ok(None);
|
||||
}
|
||||
let mut numbers: Vec<u32> = Vec::new();
|
||||
for part in parts {
|
||||
if part == "" {
|
||||
return Ok(None);
|
||||
}
|
||||
if let Ok(n) = parse_ipv4number(part) {
|
||||
numbers.push(n);
|
||||
} else {
|
||||
return Ok(None);
|
||||
}
|
||||
}
|
||||
let mut ipv4 = numbers.pop().expect("a non-empty list of numbers");
|
||||
// Equivalent to: ipv4 >= 256 ** (4 − numbers.len())
|
||||
if ipv4 > u32::max_value() >> (8 * numbers.len() as u32) {
|
||||
return Err(ParseError::InvalidIpv4Address);
|
||||
}
|
||||
if numbers.iter().any(|x| *x > 255) {
|
||||
return Err(ParseError::InvalidIpv4Address);
|
||||
}
|
||||
for (counter, n) in numbers.iter().enumerate() {
|
||||
ipv4 += n << (8 * (3 - counter as u32))
|
||||
}
|
||||
Ok(Some(Ipv4Addr::from(ipv4)))
|
||||
}
|
||||
|
||||
|
||||
fn parse_ipv6addr(input: &str) -> ParseResult<Ipv6Addr> {
|
||||
let input = input.as_bytes();
|
||||
let len = input.len();
|
||||
let mut is_ip_v4 = false;
|
||||
let mut pieces = [0, 0, 0, 0, 0, 0, 0, 0];
|
||||
let mut piece_pointer = 0;
|
||||
let mut compress_pointer = None;
|
||||
let mut i = 0;
|
||||
|
||||
if len < 2 {
|
||||
return Err(ParseError::InvalidIpv6Address)
|
||||
}
|
||||
|
||||
if input[0] == b':' {
|
||||
if input[1] != b':' {
|
||||
return Err(ParseError::InvalidIpv6Address)
|
||||
}
|
||||
i = 2;
|
||||
piece_pointer = 1;
|
||||
compress_pointer = Some(1);
|
||||
}
|
||||
|
||||
while i < len {
|
||||
if piece_pointer == 8 {
|
||||
return Err(ParseError::InvalidIpv6Address)
|
||||
}
|
||||
if input[i] == b':' {
|
||||
if compress_pointer.is_some() {
|
||||
return Err(ParseError::InvalidIpv6Address)
|
||||
}
|
||||
i += 1;
|
||||
piece_pointer += 1;
|
||||
compress_pointer = Some(piece_pointer);
|
||||
continue
|
||||
}
|
||||
let start = i;
|
||||
let end = cmp::min(len, start + 4);
|
||||
let mut value = 0u16;
|
||||
while i < end {
|
||||
match (input[i] as char).to_digit(16) {
|
||||
Some(digit) => {
|
||||
value = value * 0x10 + digit as u16;
|
||||
i += 1;
|
||||
},
|
||||
None => break
|
||||
}
|
||||
}
|
||||
if i < len {
|
||||
match input[i] {
|
||||
b'.' => {
|
||||
if i == start {
|
||||
return Err(ParseError::InvalidIpv6Address)
|
||||
}
|
||||
i = start;
|
||||
is_ip_v4 = true;
|
||||
},
|
||||
b':' => {
|
||||
i += 1;
|
||||
if i == len {
|
||||
return Err(ParseError::InvalidIpv6Address)
|
||||
}
|
||||
},
|
||||
_ => return Err(ParseError::InvalidIpv6Address)
|
||||
}
|
||||
}
|
||||
if is_ip_v4 {
|
||||
break
|
||||
}
|
||||
pieces[piece_pointer] = value;
|
||||
piece_pointer += 1;
|
||||
}
|
||||
|
||||
if is_ip_v4 {
|
||||
if piece_pointer > 6 {
|
||||
return Err(ParseError::InvalidIpv6Address)
|
||||
}
|
||||
let mut dots_seen = 0;
|
||||
while i < len {
|
||||
// FIXME: https://github.com/whatwg/url/commit/1c22aa119c354e0020117e02571cec53f7c01064
|
||||
let mut value = 0u16;
|
||||
while i < len {
|
||||
let digit = match input[i] {
|
||||
c @ b'0' ... b'9' => c - b'0',
|
||||
_ => break
|
||||
};
|
||||
value = value * 10 + digit as u16;
|
||||
if value == 0 || value > 255 {
|
||||
return Err(ParseError::InvalidIpv6Address)
|
||||
}
|
||||
}
|
||||
if dots_seen < 3 && !(i < len && input[i] == b'.') {
|
||||
return Err(ParseError::InvalidIpv6Address)
|
||||
}
|
||||
pieces[piece_pointer] = pieces[piece_pointer] * 0x100 + value;
|
||||
if dots_seen == 0 || dots_seen == 2 {
|
||||
piece_pointer += 1;
|
||||
}
|
||||
i += 1;
|
||||
if dots_seen == 3 && i < len {
|
||||
return Err(ParseError::InvalidIpv6Address)
|
||||
}
|
||||
dots_seen += 1;
|
||||
}
|
||||
}
|
||||
|
||||
match compress_pointer {
|
||||
Some(compress_pointer) => {
|
||||
let mut swaps = piece_pointer - compress_pointer;
|
||||
piece_pointer = 7;
|
||||
while swaps > 0 {
|
||||
pieces[piece_pointer] = pieces[compress_pointer + swaps - 1];
|
||||
pieces[compress_pointer + swaps - 1] = 0;
|
||||
swaps -= 1;
|
||||
piece_pointer -= 1;
|
||||
}
|
||||
}
|
||||
_ => if piece_pointer != 8 {
|
||||
return Err(ParseError::InvalidIpv6Address)
|
||||
}
|
||||
}
|
||||
Ok(Ipv6Addr::new(pieces[0], pieces[1], pieces[2], pieces[3],
|
||||
pieces[4], pieces[5], pieces[6], pieces[7]))
|
||||
}
|
1456
third_party/rust/url/src/lib.rs
vendored
1456
third_party/rust/url/src/lib.rs
vendored
File diff suppressed because it is too large
Load Diff
114
third_party/rust/url/src/origin.rs
vendored
114
third_party/rust/url/src/origin.rs
vendored
@ -1,114 +0,0 @@
|
||||
// Copyright 2016 The rust-url developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
#[cfg(feature = "heapsize")] use heapsize::HeapSizeOf;
|
||||
use host::Host;
|
||||
use idna::domain_to_unicode;
|
||||
use parser::default_port;
|
||||
use std::sync::atomic::{AtomicUsize, ATOMIC_USIZE_INIT, Ordering};
|
||||
use Url;
|
||||
|
||||
pub fn url_origin(url: &Url) -> Origin {
|
||||
let scheme = url.scheme();
|
||||
match scheme {
|
||||
"blob" => {
|
||||
let result = Url::parse(url.path());
|
||||
match result {
|
||||
Ok(ref url) => url_origin(url),
|
||||
Err(_) => Origin::new_opaque()
|
||||
}
|
||||
},
|
||||
"ftp" | "gopher" | "http" | "https" | "ws" | "wss" => {
|
||||
Origin::Tuple(scheme.to_owned(), url.host().unwrap().to_owned(),
|
||||
url.port_or_known_default().unwrap())
|
||||
},
|
||||
// TODO: Figure out what to do if the scheme is a file
|
||||
"file" => Origin::new_opaque(),
|
||||
_ => Origin::new_opaque()
|
||||
}
|
||||
}
|
||||
|
||||
/// The origin of an URL
|
||||
#[derive(PartialEq, Eq, Clone, Debug)]
|
||||
pub enum Origin {
|
||||
/// A globally unique identifier
|
||||
Opaque(OpaqueOrigin),
|
||||
|
||||
/// Consists of the URL's scheme, host and port
|
||||
Tuple(String, Host<String>, u16)
|
||||
}
|
||||
|
||||
#[cfg(feature = "heapsize")]
|
||||
impl HeapSizeOf for Origin {
|
||||
fn heap_size_of_children(&self) -> usize {
|
||||
match *self {
|
||||
Origin::Tuple(ref scheme, ref host, _) => {
|
||||
scheme.heap_size_of_children() +
|
||||
host.heap_size_of_children()
|
||||
},
|
||||
_ => 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl Origin {
|
||||
/// Creates a new opaque origin that is only equal to itself.
|
||||
pub fn new_opaque() -> Origin {
|
||||
static COUNTER: AtomicUsize = ATOMIC_USIZE_INIT;
|
||||
Origin::Opaque(OpaqueOrigin(COUNTER.fetch_add(1, Ordering::SeqCst)))
|
||||
}
|
||||
|
||||
/// Return whether this origin is a (scheme, host, port) tuple
|
||||
/// (as opposed to an opaque origin).
|
||||
pub fn is_tuple(&self) -> bool {
|
||||
matches!(*self, Origin::Tuple(..))
|
||||
}
|
||||
|
||||
/// https://html.spec.whatwg.org/multipage/#ascii-serialisation-of-an-origin
|
||||
pub fn ascii_serialization(&self) -> String {
|
||||
match *self {
|
||||
Origin::Opaque(_) => "null".to_owned(),
|
||||
Origin::Tuple(ref scheme, ref host, port) => {
|
||||
if default_port(scheme) == Some(port) {
|
||||
format!("{}://{}", scheme, host)
|
||||
} else {
|
||||
format!("{}://{}:{}", scheme, host, port)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// https://html.spec.whatwg.org/multipage/#unicode-serialisation-of-an-origin
|
||||
pub fn unicode_serialization(&self) -> String {
|
||||
match *self {
|
||||
Origin::Opaque(_) => "null".to_owned(),
|
||||
Origin::Tuple(ref scheme, ref host, port) => {
|
||||
let host = match *host {
|
||||
Host::Domain(ref domain) => {
|
||||
let (domain, _errors) = domain_to_unicode(domain);
|
||||
Host::Domain(domain)
|
||||
}
|
||||
_ => host.clone()
|
||||
};
|
||||
if default_port(scheme) == Some(port) {
|
||||
format!("{}://{}", scheme, host)
|
||||
} else {
|
||||
format!("{}://{}:{}", scheme, host, port)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Opaque identifier for URLs that have file or other schemes
|
||||
#[derive(Eq, PartialEq, Clone, Debug)]
|
||||
pub struct OpaqueOrigin(usize);
|
||||
|
||||
#[cfg(feature = "heapsize")]
|
||||
known_heap_size!(0, OpaqueOrigin);
|
1179
third_party/rust/url/src/parser.rs
vendored
1179
third_party/rust/url/src/parser.rs
vendored
File diff suppressed because it is too large
Load Diff
187
third_party/rust/url/src/path_segments.rs
vendored
187
third_party/rust/url/src/path_segments.rs
vendored
@ -1,187 +0,0 @@
|
||||
// Copyright 2016 The rust-url developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
use parser::{self, SchemeType, to_u32};
|
||||
use std::str;
|
||||
use Url;
|
||||
|
||||
/// Exposes methods to manipulate the path of an URL that is not cannot-be-base.
|
||||
///
|
||||
/// The path always starts with a `/` slash, and is made of slash-separated segments.
|
||||
/// There is always at least one segment (which may be the empty string).
|
||||
///
|
||||
/// Examples:
|
||||
///
|
||||
/// ```rust
|
||||
/// # use url::Url;
|
||||
/// let mut url = Url::parse("mailto:me@example.com").unwrap();
|
||||
/// assert!(url.path_segments_mut().is_err());
|
||||
///
|
||||
/// let mut url = Url::parse("http://example.net/foo/index.html").unwrap();
|
||||
/// url.path_segments_mut().unwrap().pop().push("img").push("2/100%.png");
|
||||
/// assert_eq!(url.as_str(), "http://example.net/foo/img/2%2F100%25.png");
|
||||
/// ```
|
||||
pub struct PathSegmentsMut<'a> {
|
||||
url: &'a mut Url,
|
||||
after_first_slash: usize,
|
||||
after_path: String,
|
||||
old_after_path_position: u32,
|
||||
}
|
||||
|
||||
// Not re-exported outside the crate
|
||||
pub fn new(url: &mut Url) -> PathSegmentsMut {
|
||||
let after_path = url.take_after_path();
|
||||
let old_after_path_position = to_u32(url.serialization.len()).unwrap();
|
||||
debug_assert!(url.byte_at(url.path_start) == b'/');
|
||||
PathSegmentsMut {
|
||||
after_first_slash: url.path_start as usize + "/".len(),
|
||||
url: url,
|
||||
old_after_path_position: old_after_path_position,
|
||||
after_path: after_path,
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Drop for PathSegmentsMut<'a> {
|
||||
fn drop(&mut self) {
|
||||
self.url.restore_after_path(self.old_after_path_position, &self.after_path)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> PathSegmentsMut<'a> {
|
||||
/// Remove all segments in the path, leaving the minimal `url.path() == "/"`.
|
||||
///
|
||||
/// Returns `&mut Self` so that method calls can be chained.
|
||||
///
|
||||
/// Example:
|
||||
///
|
||||
/// ```rust
|
||||
/// # use url::Url;
|
||||
/// let mut url = Url::parse("https://github.com/servo/rust-url/").unwrap();
|
||||
/// url.path_segments_mut().unwrap().clear().push("logout");
|
||||
/// assert_eq!(url.as_str(), "https://github.com/logout");
|
||||
/// ```
|
||||
pub fn clear(&mut self) -> &mut Self {
|
||||
self.url.serialization.truncate(self.after_first_slash);
|
||||
self
|
||||
}
|
||||
|
||||
/// Remove the last segment of this URL’s path if it is empty,
|
||||
/// except if these was only one segment to begin with.
|
||||
///
|
||||
/// In other words, remove one path trailing slash, if any,
|
||||
/// unless it is also the initial slash (so this does nothing if `url.path() == "/")`.
|
||||
///
|
||||
/// Returns `&mut Self` so that method calls can be chained.
|
||||
///
|
||||
/// Example:
|
||||
///
|
||||
/// ```rust
|
||||
/// # use url::Url;
|
||||
/// let mut url = Url::parse("https://github.com/servo/rust-url/").unwrap();
|
||||
/// url.path_segments_mut().unwrap().push("pulls");
|
||||
/// assert_eq!(url.as_str(), "https://github.com/servo/rust-url//pulls");
|
||||
///
|
||||
/// let mut url = Url::parse("https://github.com/servo/rust-url/").unwrap();
|
||||
/// url.path_segments_mut().unwrap().pop_if_empty().push("pulls");
|
||||
/// assert_eq!(url.as_str(), "https://github.com/servo/rust-url/pulls");
|
||||
/// ```
|
||||
pub fn pop_if_empty(&mut self) -> &mut Self {
|
||||
if self.url.serialization[self.after_first_slash..].ends_with('/') {
|
||||
self.url.serialization.pop();
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
/// Remove the last segment of this URL’s path.
|
||||
///
|
||||
/// If the path only has one segment, make it empty such that `url.path() == "/"`.
|
||||
///
|
||||
/// Returns `&mut Self` so that method calls can be chained.
|
||||
pub fn pop(&mut self) -> &mut Self {
|
||||
let last_slash = self.url.serialization[self.after_first_slash..].rfind('/').unwrap_or(0);
|
||||
self.url.serialization.truncate(self.after_first_slash + last_slash);
|
||||
self
|
||||
}
|
||||
|
||||
/// Append the given segment at the end of this URL’s path.
|
||||
///
|
||||
/// See the documentation for `.extend()`.
|
||||
///
|
||||
/// Returns `&mut Self` so that method calls can be chained.
|
||||
pub fn push(&mut self, segment: &str) -> &mut Self {
|
||||
self.extend(Some(segment))
|
||||
}
|
||||
|
||||
/// Append each segment from the given iterator at the end of this URL’s path.
|
||||
///
|
||||
/// Each segment is percent-encoded like in `Url::parse` or `Url::join`,
|
||||
/// except that `%` and `/` characters are also encoded (to `%25` and `%2F`).
|
||||
/// This is unlike `Url::parse` where `%` is left as-is in case some of the input
|
||||
/// is already percent-encoded, and `/` denotes a path segment separator.)
|
||||
///
|
||||
/// Note that, in addition to slashes between new segments,
|
||||
/// this always adds a slash between the existing path and the new segments
|
||||
/// *except* if the existing path is `"/"`.
|
||||
/// If the previous last segment was empty (if the path had a trailing slash)
|
||||
/// the path after `.extend()` will contain two consecutive slashes.
|
||||
/// If that is undesired, call `.pop_if_empty()` first.
|
||||
///
|
||||
/// To obtain a behavior similar to `Url::join`, call `.pop()` unconditionally first.
|
||||
///
|
||||
/// Returns `&mut Self` so that method calls can be chained.
|
||||
///
|
||||
/// Example:
|
||||
///
|
||||
/// ```rust
|
||||
/// # use url::Url;
|
||||
/// let mut url = Url::parse("https://github.com/").unwrap();
|
||||
/// let org = "servo";
|
||||
/// let repo = "rust-url";
|
||||
/// let issue_number = "188";
|
||||
/// url.path_segments_mut().unwrap().extend(&[org, repo, "issues", issue_number]);
|
||||
/// assert_eq!(url.as_str(), "https://github.com/servo/rust-url/issues/188");
|
||||
/// ```
|
||||
///
|
||||
/// In order to make sure that parsing the serialization of an URL gives the same URL,
|
||||
/// a segment is ignored if it is `"."` or `".."`:
|
||||
///
|
||||
/// ```rust
|
||||
/// # use url::Url;
|
||||
/// let mut url = Url::parse("https://github.com/servo").unwrap();
|
||||
/// url.path_segments_mut().unwrap().extend(&["..", "rust-url", ".", "pulls"]);
|
||||
/// assert_eq!(url.as_str(), "https://github.com/servo/rust-url/pulls");
|
||||
/// ```
|
||||
pub fn extend<I>(&mut self, segments: I) -> &mut Self
|
||||
where I: IntoIterator, I::Item: AsRef<str> {
|
||||
let scheme_type = SchemeType::from(self.url.scheme());
|
||||
let path_start = self.url.path_start as usize;
|
||||
self.url.mutate(|parser| {
|
||||
parser.context = parser::Context::PathSegmentSetter;
|
||||
for segment in segments {
|
||||
let segment = segment.as_ref();
|
||||
if matches!(segment, "." | "..") {
|
||||
continue
|
||||
}
|
||||
if parser.serialization.len() > path_start + 1 {
|
||||
parser.serialization.push('/');
|
||||
}
|
||||
let mut has_host = true; // FIXME account for this?
|
||||
parser.parse_path(scheme_type, &mut has_host, path_start,
|
||||
parser::Input::new(segment));
|
||||
}
|
||||
});
|
||||
self
|
||||
}
|
||||
|
||||
/// For internal testing, not part of the public API.
|
||||
#[doc(hidden)]
|
||||
pub fn assert_url_invariants(&mut self) -> &mut Self {
|
||||
self.url.assert_invariants();
|
||||
self
|
||||
}
|
||||
}
|
344
third_party/rust/url/src/percent_encoding.rs
vendored
344
third_party/rust/url/src/percent_encoding.rs
vendored
@ -1,344 +0,0 @@
|
||||
// Copyright 2013-2016 The rust-url developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
use encoding;
|
||||
use std::ascii::AsciiExt;
|
||||
use std::borrow::Cow;
|
||||
use std::fmt;
|
||||
use std::slice;
|
||||
use std::str;
|
||||
|
||||
/// Represents a set of characters / bytes that should be percent-encoded.
|
||||
///
|
||||
/// See [encode sets specification](http://url.spec.whatwg.org/#simple-encode-set).
|
||||
///
|
||||
/// Different characters need to be encoded in different parts of an URL.
|
||||
/// For example, a literal `?` question mark in an URL’s path would indicate
|
||||
/// the start of the query string.
|
||||
/// A question mark meant to be part of the path therefore needs to be percent-encoded.
|
||||
/// In the query string however, a question mark does not have any special meaning
|
||||
/// and does not need to be percent-encoded.
|
||||
///
|
||||
/// A few sets are defined in this module.
|
||||
/// Use the [`define_encode_set!`](../macro.define_encode_set!.html) macro to define different ones.
|
||||
pub trait EncodeSet: Clone {
|
||||
/// Called with UTF-8 bytes rather than code points.
|
||||
/// Should return true for all non-ASCII bytes.
|
||||
fn contains(&self, byte: u8) -> bool;
|
||||
}
|
||||
|
||||
/// Define a new struct
|
||||
/// that implements the [`EncodeSet`](percent_encoding/trait.EncodeSet.html) trait,
|
||||
/// for use in [`percent_decode()`](percent_encoding/fn.percent_encode.html)
|
||||
/// and related functions.
|
||||
///
|
||||
/// Parameters are characters to include in the set in addition to those of the base set.
|
||||
/// See [encode sets specification](http://url.spec.whatwg.org/#simple-encode-set).
|
||||
///
|
||||
/// Example
|
||||
/// =======
|
||||
///
|
||||
/// ```rust
|
||||
/// #[macro_use] extern crate url;
|
||||
/// use url::percent_encoding::{utf8_percent_encode, SIMPLE_ENCODE_SET};
|
||||
/// define_encode_set! {
|
||||
/// /// This encode set is used in the URL parser for query strings.
|
||||
/// pub QUERY_ENCODE_SET = [SIMPLE_ENCODE_SET] | {' ', '"', '#', '<', '>'}
|
||||
/// }
|
||||
/// # fn main() {
|
||||
/// assert_eq!(utf8_percent_encode("foo bar", QUERY_ENCODE_SET).collect::<String>(), "foo%20bar");
|
||||
/// # }
|
||||
/// ```
|
||||
#[macro_export]
|
||||
macro_rules! define_encode_set {
|
||||
($(#[$attr: meta])* pub $name: ident = [$base_set: expr] | {$($ch: pat),*}) => {
|
||||
$(#[$attr])*
|
||||
#[derive(Copy, Clone)]
|
||||
#[allow(non_camel_case_types)]
|
||||
pub struct $name;
|
||||
|
||||
impl $crate::percent_encoding::EncodeSet for $name {
|
||||
#[inline]
|
||||
fn contains(&self, byte: u8) -> bool {
|
||||
match byte as char {
|
||||
$(
|
||||
$ch => true,
|
||||
)*
|
||||
_ => $base_set.contains(byte)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// This encode set is used for the path of cannot-be-a-base URLs.
|
||||
#[derive(Copy, Clone)]
|
||||
#[allow(non_camel_case_types)]
|
||||
pub struct SIMPLE_ENCODE_SET;
|
||||
|
||||
impl EncodeSet for SIMPLE_ENCODE_SET {
|
||||
#[inline]
|
||||
fn contains(&self, byte: u8) -> bool {
|
||||
byte < 0x20 || byte > 0x7E
|
||||
}
|
||||
}
|
||||
|
||||
define_encode_set! {
|
||||
/// This encode set is used in the URL parser for query strings.
|
||||
pub QUERY_ENCODE_SET = [SIMPLE_ENCODE_SET] | {' ', '"', '#', '<', '>'}
|
||||
}
|
||||
|
||||
define_encode_set! {
|
||||
/// This encode set is used for path components.
|
||||
pub DEFAULT_ENCODE_SET = [QUERY_ENCODE_SET] | {'`', '?', '{', '}'}
|
||||
}
|
||||
|
||||
define_encode_set! {
|
||||
/// This encode set is used for on '/'-separated path segment
|
||||
pub PATH_SEGMENT_ENCODE_SET = [DEFAULT_ENCODE_SET] | {'%', '/'}
|
||||
}
|
||||
|
||||
define_encode_set! {
|
||||
/// This encode set is used for username and password.
|
||||
pub USERINFO_ENCODE_SET = [DEFAULT_ENCODE_SET] | {
|
||||
'/', ':', ';', '=', '@', '[', '\\', ']', '^', '|'
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the percent-encoding of the given bytes.
|
||||
///
|
||||
/// This is unconditional, unlike `percent_encode()` which uses an encode set.
|
||||
pub fn percent_encode_byte(byte: u8) -> &'static str {
|
||||
let index = usize::from(byte) * 3;
|
||||
&"\
|
||||
%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F\
|
||||
%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F\
|
||||
%20%21%22%23%24%25%26%27%28%29%2A%2B%2C%2D%2E%2F\
|
||||
%30%31%32%33%34%35%36%37%38%39%3A%3B%3C%3D%3E%3F\
|
||||
%40%41%42%43%44%45%46%47%48%49%4A%4B%4C%4D%4E%4F\
|
||||
%50%51%52%53%54%55%56%57%58%59%5A%5B%5C%5D%5E%5F\
|
||||
%60%61%62%63%64%65%66%67%68%69%6A%6B%6C%6D%6E%6F\
|
||||
%70%71%72%73%74%75%76%77%78%79%7A%7B%7C%7D%7E%7F\
|
||||
%80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F\
|
||||
%90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F\
|
||||
%A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF\
|
||||
%B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF\
|
||||
%C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF\
|
||||
%D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF\
|
||||
%E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF\
|
||||
%F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF\
|
||||
"[index..index + 3]
|
||||
}
|
||||
|
||||
/// Percent-encode the given bytes with the given encode set.
|
||||
///
|
||||
/// The encode set define which bytes (in addition to non-ASCII and controls)
|
||||
/// need to be percent-encoded.
|
||||
/// The choice of this set depends on context.
|
||||
/// For example, `?` needs to be encoded in an URL path but not in a query string.
|
||||
///
|
||||
/// The return value is an iterator of `&str` slices (so it has a `.collect::<String>()` method)
|
||||
/// that also implements `Display` and `Into<Cow<str>>`.
|
||||
/// The latter returns `Cow::Borrowed` when none of the bytes in `input`
|
||||
/// are in the given encode set.
|
||||
#[inline]
|
||||
pub fn percent_encode<E: EncodeSet>(input: &[u8], encode_set: E) -> PercentEncode<E> {
|
||||
PercentEncode {
|
||||
bytes: input,
|
||||
encode_set: encode_set,
|
||||
}
|
||||
}
|
||||
|
||||
/// Percent-encode the UTF-8 encoding of the given string.
|
||||
///
|
||||
/// See `percent_encode()` for how to use the return value.
|
||||
#[inline]
|
||||
pub fn utf8_percent_encode<E: EncodeSet>(input: &str, encode_set: E) -> PercentEncode<E> {
|
||||
percent_encode(input.as_bytes(), encode_set)
|
||||
}
|
||||
|
||||
/// The return type of `percent_encode()` and `utf8_percent_encode()`.
|
||||
#[derive(Clone)]
|
||||
pub struct PercentEncode<'a, E: EncodeSet> {
|
||||
bytes: &'a [u8],
|
||||
encode_set: E,
|
||||
}
|
||||
|
||||
impl<'a, E: EncodeSet> Iterator for PercentEncode<'a, E> {
|
||||
type Item = &'a str;
|
||||
|
||||
fn next(&mut self) -> Option<&'a str> {
|
||||
if let Some((&first_byte, remaining)) = self.bytes.split_first() {
|
||||
if self.encode_set.contains(first_byte) {
|
||||
self.bytes = remaining;
|
||||
Some(percent_encode_byte(first_byte))
|
||||
} else {
|
||||
assert!(first_byte.is_ascii());
|
||||
for (i, &byte) in remaining.iter().enumerate() {
|
||||
if self.encode_set.contains(byte) {
|
||||
// 1 for first_byte + i for previous iterations of this loop
|
||||
let (unchanged_slice, remaining) = self.bytes.split_at(1 + i);
|
||||
self.bytes = remaining;
|
||||
return Some(unsafe { str::from_utf8_unchecked(unchanged_slice) })
|
||||
} else {
|
||||
assert!(byte.is_ascii());
|
||||
}
|
||||
}
|
||||
let unchanged_slice = self.bytes;
|
||||
self.bytes = &[][..];
|
||||
Some(unsafe { str::from_utf8_unchecked(unchanged_slice) })
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
if self.bytes.is_empty() {
|
||||
(0, Some(0))
|
||||
} else {
|
||||
(1, Some(self.bytes.len()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, E: EncodeSet> fmt::Display for PercentEncode<'a, E> {
|
||||
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
||||
for c in (*self).clone() {
|
||||
try!(formatter.write_str(c))
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, E: EncodeSet> From<PercentEncode<'a, E>> for Cow<'a, str> {
|
||||
fn from(mut iter: PercentEncode<'a, E>) -> Self {
|
||||
match iter.next() {
|
||||
None => "".into(),
|
||||
Some(first) => {
|
||||
match iter.next() {
|
||||
None => first.into(),
|
||||
Some(second) => {
|
||||
let mut string = first.to_owned();
|
||||
string.push_str(second);
|
||||
string.extend(iter);
|
||||
string.into()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Percent-decode the given bytes.
|
||||
///
|
||||
/// The return value is an iterator of decoded `u8` bytes
|
||||
/// that also implements `Into<Cow<u8>>`
|
||||
/// (which returns `Cow::Borrowed` when `input` contains no percent-encoded sequence)
|
||||
/// and has `decode_utf8()` and `decode_utf8_lossy()` methods.
|
||||
#[inline]
|
||||
pub fn percent_decode<'a>(input: &'a [u8]) -> PercentDecode<'a> {
|
||||
PercentDecode {
|
||||
bytes: input.iter()
|
||||
}
|
||||
}
|
||||
|
||||
/// The return type of `percent_decode()`.
|
||||
#[derive(Clone)]
|
||||
pub struct PercentDecode<'a> {
|
||||
bytes: slice::Iter<'a, u8>,
|
||||
}
|
||||
|
||||
fn after_percent_sign(iter: &mut slice::Iter<u8>) -> Option<u8> {
|
||||
let initial_iter = iter.clone();
|
||||
let h = iter.next().and_then(|&b| (b as char).to_digit(16));
|
||||
let l = iter.next().and_then(|&b| (b as char).to_digit(16));
|
||||
if let (Some(h), Some(l)) = (h, l) {
|
||||
Some(h as u8 * 0x10 + l as u8)
|
||||
} else {
|
||||
*iter = initial_iter;
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for PercentDecode<'a> {
|
||||
type Item = u8;
|
||||
|
||||
fn next(&mut self) -> Option<u8> {
|
||||
self.bytes.next().map(|&byte| {
|
||||
if byte == b'%' {
|
||||
after_percent_sign(&mut self.bytes).unwrap_or(byte)
|
||||
} else {
|
||||
byte
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
let bytes = self.bytes.len();
|
||||
(bytes / 3, Some(bytes))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<PercentDecode<'a>> for Cow<'a, [u8]> {
|
||||
fn from(iter: PercentDecode<'a>) -> Self {
|
||||
match iter.if_any() {
|
||||
Some(vec) => Cow::Owned(vec),
|
||||
None => Cow::Borrowed(iter.bytes.as_slice()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> PercentDecode<'a> {
|
||||
/// If the percent-decoding is different from the input, return it as a new bytes vector.
|
||||
pub fn if_any(&self) -> Option<Vec<u8>> {
|
||||
let mut bytes_iter = self.bytes.clone();
|
||||
while bytes_iter.find(|&&b| b == b'%').is_some() {
|
||||
if let Some(decoded_byte) = after_percent_sign(&mut bytes_iter) {
|
||||
let initial_bytes = self.bytes.as_slice();
|
||||
let unchanged_bytes_len = initial_bytes.len() - bytes_iter.len() - 3;
|
||||
let mut decoded = initial_bytes[..unchanged_bytes_len].to_owned();
|
||||
decoded.push(decoded_byte);
|
||||
decoded.extend(PercentDecode {
|
||||
bytes: bytes_iter
|
||||
});
|
||||
return Some(decoded)
|
||||
}
|
||||
}
|
||||
// Nothing to decode
|
||||
None
|
||||
}
|
||||
|
||||
/// Decode the result of percent-decoding as UTF-8.
|
||||
///
|
||||
/// This is return `Err` when the percent-decoded bytes are not well-formed in UTF-8.
|
||||
pub fn decode_utf8(self) -> Result<Cow<'a, str>, str::Utf8Error> {
|
||||
match self.clone().into() {
|
||||
Cow::Borrowed(bytes) => {
|
||||
match str::from_utf8(bytes) {
|
||||
Ok(s) => Ok(s.into()),
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
Cow::Owned(bytes) => {
|
||||
match String::from_utf8(bytes) {
|
||||
Ok(s) => Ok(s.into()),
|
||||
Err(e) => Err(e.utf8_error()),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Decode the result of percent-decoding as UTF-8, lossily.
|
||||
///
|
||||
/// Invalid UTF-8 percent-encoded byte sequences will be replaced <20> U+FFFD,
|
||||
/// the replacement character.
|
||||
pub fn decode_utf8_lossy(self) -> Cow<'a, str> {
|
||||
encoding::decode_utf8_lossy(self.clone().into())
|
||||
}
|
||||
}
|
217
third_party/rust/url/src/quirks.rs
vendored
217
third_party/rust/url/src/quirks.rs
vendored
@ -1,217 +0,0 @@
|
||||
// Copyright 2016 The rust-url developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
//! Getters and setters for URL components implemented per https://url.spec.whatwg.org/#api
|
||||
//!
|
||||
//! Unless you need to be interoperable with web browsers,
|
||||
//! you probably want to use `Url` method instead.
|
||||
|
||||
use {Url, Position, Host, ParseError, idna};
|
||||
use parser::{Parser, SchemeType, default_port, Context, Input};
|
||||
|
||||
/// https://url.spec.whatwg.org/#dom-url-domaintoascii
|
||||
pub fn domain_to_ascii(domain: &str) -> String {
|
||||
match Host::parse(domain) {
|
||||
Ok(Host::Domain(domain)) => domain,
|
||||
_ => String::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// https://url.spec.whatwg.org/#dom-url-domaintounicode
|
||||
pub fn domain_to_unicode(domain: &str) -> String {
|
||||
match Host::parse(domain) {
|
||||
Ok(Host::Domain(ref domain)) => {
|
||||
let (unicode, _errors) = idna::domain_to_unicode(domain);
|
||||
unicode
|
||||
}
|
||||
_ => String::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Getter for https://url.spec.whatwg.org/#dom-url-href
|
||||
pub fn href(url: &Url) -> &str {
|
||||
url.as_str()
|
||||
}
|
||||
|
||||
/// Setter for https://url.spec.whatwg.org/#dom-url-href
|
||||
pub fn set_href(url: &mut Url, value: &str) -> Result<(), ParseError> {
|
||||
*url = try!(Url::parse(value));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Getter for https://url.spec.whatwg.org/#dom-url-origin
|
||||
pub fn origin(url: &Url) -> String {
|
||||
url.origin().unicode_serialization()
|
||||
}
|
||||
|
||||
/// Getter for https://url.spec.whatwg.org/#dom-url-protocol
|
||||
#[inline]
|
||||
pub fn protocol(url: &Url) -> &str {
|
||||
&url.as_str()[..url.scheme().len() + ":".len()]
|
||||
}
|
||||
|
||||
/// Setter for https://url.spec.whatwg.org/#dom-url-protocol
|
||||
pub fn set_protocol(url: &mut Url, mut new_protocol: &str) -> Result<(), ()> {
|
||||
// The scheme state in the spec ignores everything after the first `:`,
|
||||
// but `set_scheme` errors if there is more.
|
||||
if let Some(position) = new_protocol.find(':') {
|
||||
new_protocol = &new_protocol[..position];
|
||||
}
|
||||
url.set_scheme(new_protocol)
|
||||
}
|
||||
|
||||
/// Getter for https://url.spec.whatwg.org/#dom-url-username
|
||||
#[inline]
|
||||
pub fn username(url: &Url) -> &str {
|
||||
url.username()
|
||||
}
|
||||
|
||||
/// Setter for https://url.spec.whatwg.org/#dom-url-username
|
||||
pub fn set_username(url: &mut Url, new_username: &str) -> Result<(), ()> {
|
||||
url.set_username(new_username)
|
||||
}
|
||||
|
||||
/// Getter for https://url.spec.whatwg.org/#dom-url-password
|
||||
#[inline]
|
||||
pub fn password(url: &Url) -> &str {
|
||||
url.password().unwrap_or("")
|
||||
}
|
||||
|
||||
/// Setter for https://url.spec.whatwg.org/#dom-url-password
|
||||
pub fn set_password(url: &mut Url, new_password: &str) -> Result<(), ()> {
|
||||
url.set_password(if new_password.is_empty() { None } else { Some(new_password) })
|
||||
}
|
||||
|
||||
/// Getter for https://url.spec.whatwg.org/#dom-url-host
|
||||
#[inline]
|
||||
pub fn host(url: &Url) -> &str {
|
||||
&url[Position::BeforeHost..Position::AfterPort]
|
||||
}
|
||||
|
||||
/// Setter for https://url.spec.whatwg.org/#dom-url-host
|
||||
pub fn set_host(url: &mut Url, new_host: &str) -> Result<(), ()> {
|
||||
if url.cannot_be_a_base() {
|
||||
return Err(())
|
||||
}
|
||||
let host;
|
||||
let opt_port;
|
||||
{
|
||||
let scheme = url.scheme();
|
||||
let result = Parser::parse_host(Input::new(new_host), SchemeType::from(scheme));
|
||||
match result {
|
||||
Ok((h, remaining)) => {
|
||||
host = h;
|
||||
opt_port = if let Some(remaining) = remaining.split_prefix(':') {
|
||||
Parser::parse_port(remaining, || default_port(scheme), Context::Setter)
|
||||
.ok().map(|(port, _remaining)| port)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
}
|
||||
Err(_) => return Err(())
|
||||
}
|
||||
}
|
||||
url.set_host_internal(host, opt_port);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Getter for https://url.spec.whatwg.org/#dom-url-hostname
|
||||
#[inline]
|
||||
pub fn hostname(url: &Url) -> &str {
|
||||
url.host_str().unwrap_or("")
|
||||
}
|
||||
|
||||
/// Setter for https://url.spec.whatwg.org/#dom-url-hostname
|
||||
pub fn set_hostname(url: &mut Url, new_hostname: &str) -> Result<(), ()> {
|
||||
if url.cannot_be_a_base() {
|
||||
return Err(())
|
||||
}
|
||||
let result = Parser::parse_host(Input::new(new_hostname), SchemeType::from(url.scheme()));
|
||||
if let Ok((host, _remaining)) = result {
|
||||
url.set_host_internal(host, None);
|
||||
Ok(())
|
||||
} else {
|
||||
Err(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Getter for https://url.spec.whatwg.org/#dom-url-port
|
||||
#[inline]
|
||||
pub fn port(url: &Url) -> &str {
|
||||
&url[Position::BeforePort..Position::AfterPort]
|
||||
}
|
||||
|
||||
/// Setter for https://url.spec.whatwg.org/#dom-url-port
|
||||
pub fn set_port(url: &mut Url, new_port: &str) -> Result<(), ()> {
|
||||
let result;
|
||||
{
|
||||
// has_host implies !cannot_be_a_base
|
||||
let scheme = url.scheme();
|
||||
if !url.has_host() || scheme == "file" {
|
||||
return Err(())
|
||||
}
|
||||
result = Parser::parse_port(Input::new(new_port), || default_port(scheme), Context::Setter)
|
||||
}
|
||||
if let Ok((new_port, _remaining)) = result {
|
||||
url.set_port_internal(new_port);
|
||||
Ok(())
|
||||
} else {
|
||||
Err(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Getter for https://url.spec.whatwg.org/#dom-url-pathname
|
||||
#[inline]
|
||||
pub fn pathname(url: &Url) -> &str {
|
||||
url.path()
|
||||
}
|
||||
|
||||
/// Setter for https://url.spec.whatwg.org/#dom-url-pathname
|
||||
pub fn set_pathname(url: &mut Url, new_pathname: &str) {
|
||||
if !url.cannot_be_a_base() {
|
||||
url.set_path(new_pathname)
|
||||
}
|
||||
}
|
||||
|
||||
/// Getter for https://url.spec.whatwg.org/#dom-url-search
|
||||
pub fn search(url: &Url) -> &str {
|
||||
trim(&url[Position::AfterPath..Position::AfterQuery])
|
||||
}
|
||||
|
||||
/// Setter for https://url.spec.whatwg.org/#dom-url-search
|
||||
pub fn set_search(url: &mut Url, new_search: &str) {
|
||||
url.set_query(match new_search {
|
||||
"" => None,
|
||||
_ if new_search.starts_with('?') => Some(&new_search[1..]),
|
||||
_ => Some(new_search),
|
||||
})
|
||||
}
|
||||
|
||||
/// Getter for https://url.spec.whatwg.org/#dom-url-hash
|
||||
pub fn hash(url: &Url) -> &str {
|
||||
trim(&url[Position::AfterQuery..])
|
||||
}
|
||||
|
||||
/// Setter for https://url.spec.whatwg.org/#dom-url-hash
|
||||
pub fn set_hash(url: &mut Url, new_hash: &str) {
|
||||
if url.scheme() != "javascript" {
|
||||
url.set_fragment(match new_hash {
|
||||
"" => None,
|
||||
_ if new_hash.starts_with('#') => Some(&new_hash[1..]),
|
||||
_ => Some(new_hash),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn trim(s: &str) -> &str {
|
||||
if s.len() == 1 {
|
||||
""
|
||||
} else {
|
||||
s
|
||||
}
|
||||
}
|
182
third_party/rust/url/src/slicing.rs
vendored
182
third_party/rust/url/src/slicing.rs
vendored
@ -1,182 +0,0 @@
|
||||
// Copyright 2016 The rust-url developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
use std::ops::{Range, RangeFrom, RangeTo, RangeFull, Index};
|
||||
use Url;
|
||||
|
||||
impl Index<RangeFull> for Url {
|
||||
type Output = str;
|
||||
fn index(&self, _: RangeFull) -> &str {
|
||||
&self.serialization
|
||||
}
|
||||
}
|
||||
|
||||
impl Index<RangeFrom<Position>> for Url {
|
||||
type Output = str;
|
||||
fn index(&self, range: RangeFrom<Position>) -> &str {
|
||||
&self.serialization[self.index(range.start)..]
|
||||
}
|
||||
}
|
||||
|
||||
impl Index<RangeTo<Position>> for Url {
|
||||
type Output = str;
|
||||
fn index(&self, range: RangeTo<Position>) -> &str {
|
||||
&self.serialization[..self.index(range.end)]
|
||||
}
|
||||
}
|
||||
|
||||
impl Index<Range<Position>> for Url {
|
||||
type Output = str;
|
||||
fn index(&self, range: Range<Position>) -> &str {
|
||||
&self.serialization[self.index(range.start)..self.index(range.end)]
|
||||
}
|
||||
}
|
||||
|
||||
/// Indicates a position within a URL based on its components.
|
||||
///
|
||||
/// A range of positions can be used for slicing `Url`:
|
||||
///
|
||||
/// ```rust
|
||||
/// # use url::{Url, Position};
|
||||
/// # fn something(some_url: Url) {
|
||||
/// let serialization: &str = &some_url[..];
|
||||
/// let serialization_without_fragment: &str = &some_url[..Position::AfterQuery];
|
||||
/// let authority: &str = &some_url[Position::BeforeUsername..Position::AfterPort];
|
||||
/// let data_url_payload: &str = &some_url[Position::BeforePath..Position::AfterQuery];
|
||||
/// let scheme_relative: &str = &some_url[Position::BeforeUsername..];
|
||||
/// # }
|
||||
/// ```
|
||||
///
|
||||
/// In a pseudo-grammar (where `[`…`]?` makes a sub-sequence optional),
|
||||
/// URL components and delimiters that separate them are:
|
||||
///
|
||||
/// ```notrust
|
||||
/// url =
|
||||
/// scheme ":"
|
||||
/// [ "//" [ username [ ":" password ]? "@" ]? host [ ":" port ]? ]?
|
||||
/// path [ "?" query ]? [ "#" fragment ]?
|
||||
/// ```
|
||||
///
|
||||
/// When a given component is not present,
|
||||
/// its "before" and "after" position are the same
|
||||
/// (so that `&some_url[BeforeFoo..AfterFoo]` is the empty string)
|
||||
/// and component ordering is preserved
|
||||
/// (so that a missing query "is between" a path and a fragment).
|
||||
///
|
||||
/// The end of a component and the start of the next are either the same or separate
|
||||
/// by a delimiter.
|
||||
/// (Not that the initial `/` of a path is considered part of the path here, not a delimiter.)
|
||||
/// For example, `&url[..BeforeFragment]` would include a `#` delimiter (if present in `url`),
|
||||
/// so `&url[..AfterQuery]` might be desired instead.
|
||||
///
|
||||
/// `BeforeScheme` and `AfterFragment` are always the start and end of the entire URL,
|
||||
/// so `&url[BeforeScheme..X]` is the same as `&url[..X]`
|
||||
/// and `&url[X..AfterFragment]` is the same as `&url[X..]`.
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub enum Position {
|
||||
BeforeScheme,
|
||||
AfterScheme,
|
||||
BeforeUsername,
|
||||
AfterUsername,
|
||||
BeforePassword,
|
||||
AfterPassword,
|
||||
BeforeHost,
|
||||
AfterHost,
|
||||
BeforePort,
|
||||
AfterPort,
|
||||
BeforePath,
|
||||
AfterPath,
|
||||
BeforeQuery,
|
||||
AfterQuery,
|
||||
BeforeFragment,
|
||||
AfterFragment
|
||||
}
|
||||
|
||||
impl Url {
|
||||
#[inline]
|
||||
fn index(&self, position: Position) -> usize {
|
||||
match position {
|
||||
Position::BeforeScheme => 0,
|
||||
|
||||
Position::AfterScheme => self.scheme_end as usize,
|
||||
|
||||
Position::BeforeUsername => if self.has_authority() {
|
||||
self.scheme_end as usize + "://".len()
|
||||
} else {
|
||||
debug_assert!(self.byte_at(self.scheme_end) == b':');
|
||||
debug_assert!(self.scheme_end + ":".len() as u32 == self.username_end);
|
||||
self.scheme_end as usize + ":".len()
|
||||
},
|
||||
|
||||
Position::AfterUsername => self.username_end as usize,
|
||||
|
||||
Position::BeforePassword => if self.has_authority() &&
|
||||
self.byte_at(self.username_end) == b':' {
|
||||
self.username_end as usize + ":".len()
|
||||
} else {
|
||||
debug_assert!(self.username_end == self.host_start);
|
||||
self.username_end as usize
|
||||
},
|
||||
|
||||
Position::AfterPassword => if self.has_authority() &&
|
||||
self.byte_at(self.username_end) == b':' {
|
||||
debug_assert!(self.byte_at(self.host_start - "@".len() as u32) == b'@');
|
||||
self.host_start as usize - "@".len()
|
||||
} else {
|
||||
debug_assert!(self.username_end == self.host_start);
|
||||
self.host_start as usize
|
||||
},
|
||||
|
||||
Position::BeforeHost => self.host_start as usize,
|
||||
|
||||
Position::AfterHost => self.host_end as usize,
|
||||
|
||||
Position::BeforePort => if self.port.is_some() {
|
||||
debug_assert!(self.byte_at(self.host_end) == b':');
|
||||
self.host_end as usize + ":".len()
|
||||
} else {
|
||||
self.host_end as usize
|
||||
},
|
||||
|
||||
Position::AfterPort => self.path_start as usize,
|
||||
|
||||
Position::BeforePath => self.path_start as usize,
|
||||
|
||||
Position::AfterPath => match (self.query_start, self.fragment_start) {
|
||||
(Some(q), _) => q as usize,
|
||||
(None, Some(f)) => f as usize,
|
||||
(None, None) => self.serialization.len(),
|
||||
},
|
||||
|
||||
Position::BeforeQuery => match (self.query_start, self.fragment_start) {
|
||||
(Some(q), _) => {
|
||||
debug_assert!(self.byte_at(q) == b'?');
|
||||
q as usize + "?".len()
|
||||
}
|
||||
(None, Some(f)) => f as usize,
|
||||
(None, None) => self.serialization.len(),
|
||||
},
|
||||
|
||||
Position::AfterQuery => match self.fragment_start {
|
||||
None => self.serialization.len(),
|
||||
Some(f) => f as usize,
|
||||
},
|
||||
|
||||
Position::BeforeFragment => match self.fragment_start {
|
||||
Some(f) => {
|
||||
debug_assert!(self.byte_at(f) == b'#');
|
||||
f as usize + "#".len()
|
||||
}
|
||||
None => self.serialization.len(),
|
||||
},
|
||||
|
||||
Position::AfterFragment => self.serialization.len(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
193
third_party/rust/url/tests/data.rs
vendored
193
third_party/rust/url/tests/data.rs
vendored
@ -1,193 +0,0 @@
|
||||
// Copyright 2013-2014 The rust-url developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
//! Data-driven tests
|
||||
|
||||
extern crate rustc_serialize;
|
||||
extern crate test;
|
||||
extern crate url;
|
||||
|
||||
use rustc_serialize::json::{self, Json};
|
||||
use url::{Url, quirks};
|
||||
|
||||
|
||||
fn run_parsing(input: String, base: String, expected: Result<ExpectedAttributes, ()>) {
|
||||
let base = match Url::parse(&base) {
|
||||
Ok(base) => base,
|
||||
Err(message) => panic!("Error parsing base {:?}: {}", base, message)
|
||||
};
|
||||
let (url, expected) = match (base.join(&input), expected) {
|
||||
(Ok(url), Ok(expected)) => (url, expected),
|
||||
(Err(_), Err(())) => return,
|
||||
(Err(message), Ok(_)) => panic!("Error parsing URL {:?}: {}", input, message),
|
||||
(Ok(_), Err(())) => panic!("Expected a parse error for URL {:?}", input),
|
||||
};
|
||||
|
||||
url.assert_invariants();
|
||||
|
||||
macro_rules! assert_eq {
|
||||
($expected: expr, $got: expr) => {
|
||||
{
|
||||
let expected = $expected;
|
||||
let got = $got;
|
||||
assert!(expected == got, "{:?} != {} {:?} for URL {:?}",
|
||||
got, stringify!($expected), expected, url);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! assert_attributes {
|
||||
($($attr: ident)+) => {
|
||||
{
|
||||
$(
|
||||
assert_eq!(expected.$attr, quirks::$attr(&url));
|
||||
)+;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert_attributes!(href protocol username password host hostname port pathname search hash);
|
||||
|
||||
if let Some(expected_origin) = expected.origin {
|
||||
assert_eq!(expected_origin, quirks::origin(&url));
|
||||
}
|
||||
}
|
||||
|
||||
struct ExpectedAttributes {
|
||||
href: String,
|
||||
origin: Option<String>,
|
||||
protocol: String,
|
||||
username: String,
|
||||
password: String,
|
||||
host: String,
|
||||
hostname: String,
|
||||
port: String,
|
||||
pathname: String,
|
||||
search: String,
|
||||
hash: String,
|
||||
}
|
||||
|
||||
trait JsonExt {
|
||||
fn take(&mut self, key: &str) -> Option<Json>;
|
||||
fn object(self) -> json::Object;
|
||||
fn string(self) -> String;
|
||||
fn take_string(&mut self, key: &str) -> String;
|
||||
}
|
||||
|
||||
impl JsonExt for Json {
|
||||
fn take(&mut self, key: &str) -> Option<Json> {
|
||||
self.as_object_mut().unwrap().remove(key)
|
||||
}
|
||||
|
||||
fn object(self) -> json::Object {
|
||||
if let Json::Object(o) = self { o } else { panic!("Not a Json::Object") }
|
||||
}
|
||||
|
||||
fn string(self) -> String {
|
||||
if let Json::String(s) = self { s } else { panic!("Not a Json::String") }
|
||||
}
|
||||
|
||||
fn take_string(&mut self, key: &str) -> String {
|
||||
self.take(key).unwrap().string()
|
||||
}
|
||||
}
|
||||
|
||||
fn collect_parsing<F: FnMut(String, test::TestFn)>(add_test: &mut F) {
|
||||
// Copied form https://github.com/w3c/web-platform-tests/blob/master/url/
|
||||
let mut json = Json::from_str(include_str!("urltestdata.json"))
|
||||
.expect("JSON parse error in urltestdata.json");
|
||||
for entry in json.as_array_mut().unwrap() {
|
||||
if entry.is_string() {
|
||||
continue // ignore comments
|
||||
}
|
||||
let base = entry.take_string("base");
|
||||
let input = entry.take_string("input");
|
||||
let expected = if entry.find("failure").is_some() {
|
||||
Err(())
|
||||
} else {
|
||||
Ok(ExpectedAttributes {
|
||||
href: entry.take_string("href"),
|
||||
origin: entry.take("origin").map(Json::string),
|
||||
protocol: entry.take_string("protocol"),
|
||||
username: entry.take_string("username"),
|
||||
password: entry.take_string("password"),
|
||||
host: entry.take_string("host"),
|
||||
hostname: entry.take_string("hostname"),
|
||||
port: entry.take_string("port"),
|
||||
pathname: entry.take_string("pathname"),
|
||||
search: entry.take_string("search"),
|
||||
hash: entry.take_string("hash"),
|
||||
})
|
||||
};
|
||||
add_test(format!("{:?} @ base {:?}", input, base),
|
||||
test::TestFn::dyn_test_fn(move || run_parsing(input, base, expected)));
|
||||
}
|
||||
}
|
||||
|
||||
fn collect_setters<F>(add_test: &mut F) where F: FnMut(String, test::TestFn) {
|
||||
let mut json = Json::from_str(include_str!("setters_tests.json"))
|
||||
.expect("JSON parse error in setters_tests.json");
|
||||
|
||||
macro_rules! setter {
|
||||
($attr: expr, $setter: ident) => {{
|
||||
let mut tests = json.take($attr).unwrap();
|
||||
for mut test in tests.as_array_mut().unwrap().drain(..) {
|
||||
let comment = test.take("comment").map(Json::string).unwrap_or(String::new());
|
||||
let href = test.take_string("href");
|
||||
let new_value = test.take_string("new_value");
|
||||
let name = format!("{:?}.{} = {:?} {}", href, $attr, new_value, comment);
|
||||
let mut expected = test.take("expected").unwrap();
|
||||
add_test(name, test::TestFn::dyn_test_fn(move || {
|
||||
let mut url = Url::parse(&href).unwrap();
|
||||
url.assert_invariants();
|
||||
let _ = quirks::$setter(&mut url, &new_value);
|
||||
assert_attributes!(url, expected,
|
||||
href protocol username password host hostname port pathname search hash);
|
||||
url.assert_invariants();
|
||||
}))
|
||||
}
|
||||
}}
|
||||
}
|
||||
macro_rules! assert_attributes {
|
||||
($url: expr, $expected: expr, $($attr: ident)+) => {
|
||||
$(
|
||||
if let Some(value) = $expected.take(stringify!($attr)) {
|
||||
assert_eq!(quirks::$attr(&$url), value.string())
|
||||
}
|
||||
)+
|
||||
}
|
||||
}
|
||||
setter!("protocol", set_protocol);
|
||||
setter!("username", set_username);
|
||||
setter!("password", set_password);
|
||||
setter!("hostname", set_hostname);
|
||||
setter!("host", set_host);
|
||||
setter!("port", set_port);
|
||||
setter!("pathname", set_pathname);
|
||||
setter!("search", set_search);
|
||||
setter!("hash", set_hash);
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let mut tests = Vec::new();
|
||||
{
|
||||
let mut add_one = |name: String, run: test::TestFn| {
|
||||
tests.push(test::TestDescAndFn {
|
||||
desc: test::TestDesc {
|
||||
name: test::DynTestName(name),
|
||||
ignore: false,
|
||||
should_panic: test::ShouldPanic::No,
|
||||
},
|
||||
testfn: run,
|
||||
})
|
||||
};
|
||||
collect_parsing(&mut add_one);
|
||||
collect_setters(&mut add_one);
|
||||
}
|
||||
test::test_main(&std::env::args().collect::<Vec<_>>(), tests)
|
||||
}
|
1148
third_party/rust/url/tests/setters_tests.json
vendored
1148
third_party/rust/url/tests/setters_tests.json
vendored
File diff suppressed because it is too large
Load Diff
303
third_party/rust/url/tests/unit.rs
vendored
303
third_party/rust/url/tests/unit.rs
vendored
@ -1,303 +0,0 @@
|
||||
// Copyright 2013-2014 The rust-url developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
//! Unit tests
|
||||
|
||||
extern crate url;
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::net::{Ipv4Addr, Ipv6Addr};
|
||||
use std::path::{Path, PathBuf};
|
||||
use url::{Host, Url, form_urlencoded};
|
||||
|
||||
macro_rules! assert_from_file_path {
|
||||
($path: expr) => { assert_from_file_path!($path, $path) };
|
||||
($path: expr, $url_path: expr) => {{
|
||||
let url = Url::from_file_path(Path::new($path)).unwrap();
|
||||
assert_eq!(url.host(), None);
|
||||
assert_eq!(url.path(), $url_path);
|
||||
assert_eq!(url.to_file_path(), Ok(PathBuf::from($path)));
|
||||
}};
|
||||
}
|
||||
|
||||
|
||||
|
||||
#[test]
|
||||
fn new_file_paths() {
|
||||
if cfg!(unix) {
|
||||
assert_eq!(Url::from_file_path(Path::new("relative")), Err(()));
|
||||
assert_eq!(Url::from_file_path(Path::new("../relative")), Err(()));
|
||||
}
|
||||
if cfg!(windows) {
|
||||
assert_eq!(Url::from_file_path(Path::new("relative")), Err(()));
|
||||
assert_eq!(Url::from_file_path(Path::new(r"..\relative")), Err(()));
|
||||
assert_eq!(Url::from_file_path(Path::new(r"\drive-relative")), Err(()));
|
||||
assert_eq!(Url::from_file_path(Path::new(r"\\ucn\")), Err(()));
|
||||
}
|
||||
|
||||
if cfg!(unix) {
|
||||
assert_from_file_path!("/foo/bar");
|
||||
assert_from_file_path!("/foo/ba\0r", "/foo/ba%00r");
|
||||
assert_from_file_path!("/foo/ba%00r", "/foo/ba%2500r");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(unix)]
|
||||
fn new_path_bad_utf8() {
|
||||
use std::ffi::OsStr;
|
||||
use std::os::unix::prelude::*;
|
||||
|
||||
let url = Url::from_file_path(Path::new(OsStr::from_bytes(b"/foo/ba\x80r"))).unwrap();
|
||||
let os_str = OsStr::from_bytes(b"/foo/ba\x80r");
|
||||
assert_eq!(url.to_file_path(), Ok(PathBuf::from(os_str)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn new_path_windows_fun() {
|
||||
if cfg!(windows) {
|
||||
assert_from_file_path!(r"C:\foo\bar", "/C:/foo/bar");
|
||||
assert_from_file_path!("C:\\foo\\ba\0r", "/C:/foo/ba%00r");
|
||||
|
||||
// Invalid UTF-8
|
||||
assert!(Url::parse("file:///C:/foo/ba%80r").unwrap().to_file_path().is_err());
|
||||
|
||||
// test windows canonicalized path
|
||||
let path = PathBuf::from(r"\\?\C:\foo\bar");
|
||||
assert!(Url::from_file_path(path).is_ok());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn new_directory_paths() {
|
||||
if cfg!(unix) {
|
||||
assert_eq!(Url::from_directory_path(Path::new("relative")), Err(()));
|
||||
assert_eq!(Url::from_directory_path(Path::new("../relative")), Err(()));
|
||||
|
||||
let url = Url::from_directory_path(Path::new("/foo/bar")).unwrap();
|
||||
assert_eq!(url.host(), None);
|
||||
assert_eq!(url.path(), "/foo/bar/");
|
||||
}
|
||||
if cfg!(windows) {
|
||||
assert_eq!(Url::from_directory_path(Path::new("relative")), Err(()));
|
||||
assert_eq!(Url::from_directory_path(Path::new(r"..\relative")), Err(()));
|
||||
assert_eq!(Url::from_directory_path(Path::new(r"\drive-relative")), Err(()));
|
||||
assert_eq!(Url::from_directory_path(Path::new(r"\\ucn\")), Err(()));
|
||||
|
||||
let url = Url::from_directory_path(Path::new(r"C:\foo\bar")).unwrap();
|
||||
assert_eq!(url.host(), None);
|
||||
assert_eq!(url.path(), "/C:/foo/bar/");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_str() {
|
||||
assert!("http://testing.com/this".parse::<Url>().is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn issue_124() {
|
||||
let url: Url = "file:a".parse().unwrap();
|
||||
assert_eq!(url.path(), "/a");
|
||||
let url: Url = "file:...".parse().unwrap();
|
||||
assert_eq!(url.path(), "/...");
|
||||
let url: Url = "file:..".parse().unwrap();
|
||||
assert_eq!(url.path(), "/");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_equality() {
|
||||
use std::hash::{Hash, Hasher, SipHasher};
|
||||
|
||||
fn check_eq(a: &Url, b: &Url) {
|
||||
assert_eq!(a, b);
|
||||
|
||||
let mut h1 = SipHasher::new();
|
||||
a.hash(&mut h1);
|
||||
let mut h2 = SipHasher::new();
|
||||
b.hash(&mut h2);
|
||||
assert_eq!(h1.finish(), h2.finish());
|
||||
}
|
||||
|
||||
fn url(s: &str) -> Url {
|
||||
let rv = s.parse().unwrap();
|
||||
check_eq(&rv, &rv);
|
||||
rv
|
||||
}
|
||||
|
||||
// Doesn't care if default port is given.
|
||||
let a: Url = url("https://example.com/");
|
||||
let b: Url = url("https://example.com:443/");
|
||||
check_eq(&a, &b);
|
||||
|
||||
// Different ports
|
||||
let a: Url = url("http://example.com/");
|
||||
let b: Url = url("http://example.com:8080/");
|
||||
assert!(a != b, "{:?} != {:?}", a, b);
|
||||
|
||||
// Different scheme
|
||||
let a: Url = url("http://example.com/");
|
||||
let b: Url = url("https://example.com/");
|
||||
assert!(a != b);
|
||||
|
||||
// Different host
|
||||
let a: Url = url("http://foo.com/");
|
||||
let b: Url = url("http://bar.com/");
|
||||
assert!(a != b);
|
||||
|
||||
// Missing path, automatically substituted. Semantically the same.
|
||||
let a: Url = url("http://foo.com");
|
||||
let b: Url = url("http://foo.com/");
|
||||
check_eq(&a, &b);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn host() {
|
||||
fn assert_host(input: &str, host: Host<&str>) {
|
||||
assert_eq!(Url::parse(input).unwrap().host(), Some(host));
|
||||
}
|
||||
assert_host("http://www.mozilla.org", Host::Domain("www.mozilla.org"));
|
||||
assert_host("http://1.35.33.49", Host::Ipv4(Ipv4Addr::new(1, 35, 33, 49)));
|
||||
assert_host("http://[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]", Host::Ipv6(Ipv6Addr::new(
|
||||
0x2001, 0x0db8, 0x85a3, 0x08d3, 0x1319, 0x8a2e, 0x0370, 0x7344)));
|
||||
assert_host("http://1.35.+33.49", Host::Domain("1.35.+33.49"));
|
||||
assert_host("http://[::]", Host::Ipv6(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 0)));
|
||||
assert_host("http://[::1]", Host::Ipv6(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 1)));
|
||||
assert_host("http://0x1.0X23.0x21.061", Host::Ipv4(Ipv4Addr::new(1, 35, 33, 49)));
|
||||
assert_host("http://0x1232131", Host::Ipv4(Ipv4Addr::new(1, 35, 33, 49)));
|
||||
assert_host("http://111", Host::Ipv4(Ipv4Addr::new(0, 0, 0, 111)));
|
||||
assert_host("http://2..2.3", Host::Domain("2..2.3"));
|
||||
assert!(Url::parse("http://42.0x1232131").is_err());
|
||||
assert!(Url::parse("http://192.168.0.257").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn host_serialization() {
|
||||
// libstd’s `Display for Ipv6Addr` serializes 0:0:0:0:0:0:_:_ and 0:0:0:0:0:ffff:_:_
|
||||
// using IPv4-like syntax, as suggested in https://tools.ietf.org/html/rfc5952#section-4
|
||||
// but https://url.spec.whatwg.org/#concept-ipv6-serializer specifies not to.
|
||||
|
||||
// Not [::0.0.0.2] / [::ffff:0.0.0.2]
|
||||
assert_eq!(Url::parse("http://[0::2]").unwrap().host_str(), Some("[::2]"));
|
||||
assert_eq!(Url::parse("http://[0::ffff:0:2]").unwrap().host_str(), Some("[::ffff:0:2]"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_idna() {
|
||||
assert!("http://goșu.ro".parse::<Url>().is_ok());
|
||||
assert_eq!(Url::parse("http://☃.net/").unwrap().host(), Some(Host::Domain("xn--n3h.net")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialization() {
|
||||
let data = [
|
||||
("http://example.com/", "http://example.com/"),
|
||||
("http://addslash.com", "http://addslash.com/"),
|
||||
("http://@emptyuser.com/", "http://emptyuser.com/"),
|
||||
("http://:@emptypass.com/", "http://:@emptypass.com/"),
|
||||
("http://user@user.com/", "http://user@user.com/"),
|
||||
("http://user:pass@userpass.com/", "http://user:pass@userpass.com/"),
|
||||
("http://slashquery.com/path/?q=something", "http://slashquery.com/path/?q=something"),
|
||||
("http://noslashquery.com/path?q=something", "http://noslashquery.com/path?q=something")
|
||||
];
|
||||
for &(input, result) in &data {
|
||||
let url = Url::parse(input).unwrap();
|
||||
assert_eq!(url.as_str(), result);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_form_urlencoded() {
|
||||
let pairs: &[(Cow<str>, Cow<str>)] = &[
|
||||
("foo".into(), "é&".into()),
|
||||
("bar".into(), "".into()),
|
||||
("foo".into(), "#".into())
|
||||
];
|
||||
let encoded = form_urlencoded::Serializer::new(String::new()).extend_pairs(pairs).finish();
|
||||
assert_eq!(encoded, "foo=%C3%A9%26&bar=&foo=%23");
|
||||
assert_eq!(form_urlencoded::parse(encoded.as_bytes()).collect::<Vec<_>>(), pairs.to_vec());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_form_serialize() {
|
||||
let encoded = form_urlencoded::Serializer::new(String::new())
|
||||
.append_pair("foo", "é&")
|
||||
.append_pair("bar", "")
|
||||
.append_pair("foo", "#")
|
||||
.finish();
|
||||
assert_eq!(encoded, "foo=%C3%A9%26&bar=&foo=%23");
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// https://github.com/servo/rust-url/issues/25
|
||||
fn issue_25() {
|
||||
let filename = if cfg!(windows) { r"C:\run\pg.sock" } else { "/run/pg.sock" };
|
||||
let mut url = Url::from_file_path(filename).unwrap();
|
||||
url.assert_invariants();
|
||||
url.set_scheme("postgres").unwrap();
|
||||
url.assert_invariants();
|
||||
url.set_host(Some("")).unwrap();
|
||||
url.assert_invariants();
|
||||
url.set_username("me").unwrap();
|
||||
url.assert_invariants();
|
||||
let expected = format!("postgres://me@/{}run/pg.sock", if cfg!(windows) { "C:/" } else { "" });
|
||||
assert_eq!(url.as_str(), expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// https://github.com/servo/rust-url/issues/61
|
||||
fn issue_61() {
|
||||
let mut url = Url::parse("http://mozilla.org").unwrap();
|
||||
url.set_scheme("https").unwrap();
|
||||
assert_eq!(url.port(), None);
|
||||
assert_eq!(url.port_or_known_default(), Some(443));
|
||||
url.assert_invariants();
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(not(windows))]
|
||||
/// https://github.com/servo/rust-url/issues/197
|
||||
fn issue_197() {
|
||||
let mut url = Url::from_file_path("/").expect("Failed to parse path");
|
||||
url.assert_invariants();
|
||||
assert_eq!(url, Url::parse("file:///").expect("Failed to parse path + protocol"));
|
||||
url.path_segments_mut().expect("path_segments_mut").pop_if_empty();
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// https://github.com/servo/rust-url/issues/222
|
||||
fn append_trailing_slash() {
|
||||
let mut url: Url = "http://localhost:6767/foo/bar?a=b".parse().unwrap();
|
||||
url.assert_invariants();
|
||||
url.path_segments_mut().unwrap().push("");
|
||||
url.assert_invariants();
|
||||
assert_eq!(url.to_string(), "http://localhost:6767/foo/bar/?a=b");
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// https://github.com/servo/rust-url/issues/227
|
||||
fn extend_query_pairs_then_mutate() {
|
||||
let mut url: Url = "http://localhost:6767/foo/bar".parse().unwrap();
|
||||
url.query_pairs_mut().extend_pairs(vec![ ("auth", "my-token") ].into_iter());
|
||||
url.assert_invariants();
|
||||
assert_eq!(url.to_string(), "http://localhost:6767/foo/bar?auth=my-token");
|
||||
url.path_segments_mut().unwrap().push("some_other_path");
|
||||
url.assert_invariants();
|
||||
assert_eq!(url.to_string(), "http://localhost:6767/foo/bar/some_other_path?auth=my-token");
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// https://github.com/servo/rust-url/issues/222
|
||||
fn append_empty_segment_then_mutate() {
|
||||
let mut url: Url = "http://localhost:6767/foo/bar?a=b".parse().unwrap();
|
||||
url.assert_invariants();
|
||||
url.path_segments_mut().unwrap().push("").pop();
|
||||
url.assert_invariants();
|
||||
assert_eq!(url.to_string(), "http://localhost:6767/foo/bar?a=b");
|
||||
}
|
4277
third_party/rust/url/tests/urltestdata.json
vendored
4277
third_party/rust/url/tests/urltestdata.json
vendored
File diff suppressed because it is too large
Load Diff
57
toolkit/library/gtest/rust/Cargo.lock
generated
57
toolkit/library/gtest/rust/Cargo.lock
generated
@ -18,29 +18,8 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"mp4parse_capi 0.5.1",
|
||||
"nsstring 0.1.0",
|
||||
"rust_url_capi 0.0.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "idna"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"matches 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicode-bidi 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicode-normalization 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "matches"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "mp4parse"
|
||||
version = "0.5.1"
|
||||
@ -70,41 +49,5 @@ dependencies = [
|
||||
"nsstring 0.1.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rust_url_capi"
|
||||
version = "0.0.1"
|
||||
dependencies = [
|
||||
"libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"url 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-bidi"
|
||||
version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"matches 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-normalization"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "url"
|
||||
version = "1.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"idna 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"matches 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[metadata]
|
||||
"checksum byteorder 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0fc10e8cc6b2580fda3f36eb6dc5316657f812a3df879a44a66fc9f0fdbc4855"
|
||||
"checksum idna 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1053236e00ce4f668aeca4a769a09b3bf5a682d802abd6f3cb39374f6b162c11"
|
||||
"checksum libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)" = "408014cace30ee0f767b1c4517980646a573ec61a57957aeeabcac8ac0a02e8d"
|
||||
"checksum matches 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "bcc3ad8109fa4b522f9b0cd81440422781f564aaf8c195de6b9d6642177ad0dd"
|
||||
"checksum unicode-bidi 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "c1f7ceb96afdfeedee42bade65a0d585a6a0106f681b6749c8ff4daa8df30b3f"
|
||||
"checksum unicode-normalization 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "26643a2f83bac55f1976fb716c10234485f9202dcd65cfbdf9da49867b271172"
|
||||
"checksum url 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "8527c62d9869a08325c38272b3f85668df22a65890c61a639d233dc0ed0b23a2"
|
||||
|
57
toolkit/library/rust/Cargo.lock
generated
57
toolkit/library/rust/Cargo.lock
generated
@ -16,29 +16,8 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"mp4parse_capi 0.5.1",
|
||||
"nsstring 0.1.0",
|
||||
"rust_url_capi 0.0.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "idna"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"matches 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicode-bidi 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicode-normalization 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "matches"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "mp4parse"
|
||||
version = "0.5.1"
|
||||
@ -57,41 +36,5 @@ dependencies = [
|
||||
name = "nsstring"
|
||||
version = "0.1.0"
|
||||
|
||||
[[package]]
|
||||
name = "rust_url_capi"
|
||||
version = "0.0.1"
|
||||
dependencies = [
|
||||
"libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"url 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-bidi"
|
||||
version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"matches 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-normalization"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "url"
|
||||
version = "1.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"idna 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"matches 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[metadata]
|
||||
"checksum byteorder 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0fc10e8cc6b2580fda3f36eb6dc5316657f812a3df879a44a66fc9f0fdbc4855"
|
||||
"checksum idna 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1053236e00ce4f668aeca4a769a09b3bf5a682d802abd6f3cb39374f6b162c11"
|
||||
"checksum libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)" = "408014cace30ee0f767b1c4517980646a573ec61a57957aeeabcac8ac0a02e8d"
|
||||
"checksum matches 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "bcc3ad8109fa4b522f9b0cd81440422781f564aaf8c195de6b9d6642177ad0dd"
|
||||
"checksum unicode-bidi 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "c1f7ceb96afdfeedee42bade65a0d585a6a0106f681b6749c8ff4daa8df30b3f"
|
||||
"checksum unicode-normalization 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "26643a2f83bac55f1976fb716c10234485f9202dcd65cfbdf9da49867b271172"
|
||||
"checksum url 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "8527c62d9869a08325c38272b3f85668df22a65890c61a639d233dc0ed0b23a2"
|
||||
|
@ -8,7 +8,6 @@ description = "Shared Rust code for libxul"
|
||||
[dependencies]
|
||||
mp4parse_capi = { path = "../../../../media/libstagefright/binding/mp4parse_capi" }
|
||||
nsstring = { path = "../../../../xpcom/rust/nsstring" }
|
||||
rust_url_capi = { path = "../../../../netwerk/base/rust-url-capi" }
|
||||
|
||||
[lib]
|
||||
path = "lib.rs"
|
||||
|
@ -4,4 +4,3 @@
|
||||
|
||||
extern crate mp4parse_capi;
|
||||
extern crate nsstring;
|
||||
extern crate rust_url_capi;
|
||||
|
Loading…
Reference in New Issue
Block a user